Removed old haddock reader code. Add dependency on haddock-library.

This also removes the dependency on alex and happy.
2014-06-18 11:33:09 -07:00 · 2014-06-18 11:33:09 -07:00 · ab390a10ec
commit ab390a10ec
parent b371e83d73
5 changed files with 25 additions and 370 deletions
--- a/6
+++ b/6
@ -12,11 +12,7 @@ Quick install
 -------------
 1.  Install the [Haskell platform].  This will give you [GHC] and
-    the [cabal-install] build tool, as well as `alex` and `happy`.
+    the [cabal-install] build tool.
    If you do not use the Haskell platform, you'll need to install
    `alex` and `happy` separately:
        cabal install alex happy
 2.  Update your package database:
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -255,8 +255,8 @@ Library
                 vector >= 0.10 && < 0.11,
                 hslua >= 0.3 && < 0.4,
                 binary >= 0.5 && < 0.8,
-                 SHA >= 1.6 && < 1.7
+                 SHA >= 1.6 && < 1.7,
-  Build-Tools:   alex, happy
+                 haddock-library >= 1.0 && < 1.1
  if flag(https)
     Build-Depends: http-client >= 0.3.2 && < 0.4,
                    http-client-tls >= 0.2 && < 0.3,
@ -322,9 +322,7 @@ Library
                   Text.Pandoc.XML,
                   Text.Pandoc.SelfContained,
                   Text.Pandoc.Process
-  Other-Modules:   Text.Pandoc.Readers.Haddock.Lex,
+  Other-Modules:   Text.Pandoc.Readers.Docx.Lists,
                   Text.Pandoc.Readers.Haddock.Parse,
                   Text.Pandoc.Readers.Docx.Lists,
                   Text.Pandoc.Readers.Docx.Parse,
                   Text.Pandoc.Writers.Shared,
                   Text.Pandoc.Asciify,
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@ -3,7 +3,8 @@
   Copyright   : Copyright (C) 2013 David Lazar
   License     : GNU GPL, version 2 or above
-   Maintainer  : David Lazar <lazar6@illinois.edu>
+   Maintainer  : David Lazar <lazar6@illinois.edu>,
                 John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
 Conversion of Haddock markup to 'Pandoc' document.
@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock
    ( readHaddock
    ) where
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
 import Data.Monoid
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Text.Pandoc.Readers.Haddock.Lex
+import Documentation.Haddock.Parser (parseParas, Identifier)
-import Text.Pandoc.Readers.Haddock.Parse
+import Documentation.Haddock.Types
 -- | Parse Haddock markup and return a 'Pandoc' document.
 readHaddock :: ReaderOptions -- ^ Reader options
            -> String        -- ^ String to parse
            -> Pandoc
-readHaddock _ s = Pandoc nullMeta blocks
+readHaddock _ = B.doc . docHToBlocks . parseParas
-  where
+
-    blocks = case parseParas (tokenise s (0,0)) of
+docHToBlocks :: DocH mod Identifier -> Blocks
-        Left [] -> error "parse failure"
+docHToBlocks d =
-        Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok)
+  case d of
-          where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")"
+    DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
-        Right x -> mergeLists (toList x)
+    DocParagraph ils -> B.para $ docHToInlines ils
 docHToInlines :: DocH mod Identifier -> Inlines
 docHToInlines d =
  case d of
    DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
    DocString s -> B.text s
 -- similar to 'docAppend' in Haddock.Doc
 mergeLists :: [Block] -> [Block]
--- a/src/Text/Pandoc/Readers/Haddock/Lex.x
+++ b/src/Text/Pandoc/Readers/Haddock/Lex.x
@ -1,171 +0,0 @@
 --
 -- Haddock - A Haskell Documentation Tool
 --
 -- (c) Simon Marlow 2002
 --
 -- This file was modified and integrated into GHC by David Waern 2006.
 -- Then moved back into Haddock by Isaac Dupree in 2009 :-)
 -- Then copied into Pandoc by David Lazar in 2013 :-D
 {
 {-# LANGUAGE BangPatterns #-}   -- Generated by Alex
 {-# OPTIONS -Wwarn -w #-}
 -- The above warning supression flag is a temporary kludge.
 -- While working on this module you are encouraged to remove it and fix
 -- any warnings in the module. See
 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
 -- for details
 module Text.Pandoc.Readers.Haddock.Lex (
    Token(..),
    LToken,
    tokenise,
    tokenPos
 ) where
 import Data.Char
 import Numeric (readHex)
 }
 %wrapper "posn"
 $ws       = $white # \n
 $digit    = [0-9]
 $hexdigit = [0-9a-fA-F]
 $special  = [\"\@]
 $alphanum = [A-Za-z0-9]
 $ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
 :-
 -- beginning of a paragraph
 <0,para> {
  $ws* \n               ;
  $ws* \>               { begin birdtrack }
  $ws* prop \> .* \n    { strtoken TokProperty `andBegin` property}
  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
  $ws* [\*\-]           { token TokBullet `andBegin` string }
  $ws* \[               { token TokDefStart `andBegin` def }
  $ws* \( $digit+ \)    { token TokNumber `andBegin` string }
  $ws* $digit+ \.       { token TokNumber `andBegin` string }
  $ws*                  { begin string }
 }
 -- beginning of a line
 <line> {
  $ws* \>               { begin birdtrack }
  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
  $ws* \n               { token TokPara `andBegin` para }
  -- ^ Here, we really want to be able to say
  -- $ws* (\n | <eof>)  { token TokPara `andBegin` para}
  -- because otherwise a trailing line of whitespace will result in
  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
  -- though (NOW I realise what it was for :-).  To get around this, we always
  -- append \n to the end of a docstring.
  ()                    { begin string }
 }
 <birdtrack> .*  \n?     { strtokenNL TokBirdTrack `andBegin` line }
 <property> ()           { token TokPara `andBegin` para }
 <example> {
  $ws*  \n              { token TokPara `andBegin` para }
  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
  ()                    { begin exampleresult }
 }
 <exampleexpr> .* \n     { strtokenNL TokExampleExpression `andBegin` example }
 <exampleresult> .* \n   { strtokenNL TokExampleResult `andBegin` example }
 <string,def> {
  $special              { strtoken $ \s -> TokSpecial (head s) }
  \<\< [^\>]* \>\>      { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
  \< [^\>]* \>          { strtoken $ \s -> TokURL (init (tail s)) }
  \# [^\#]* \#          { strtoken $ \s -> TokAName (init (tail s)) }
  \/ [^\/]* \/          { strtoken $ \s -> TokEmphasis (init (tail s)) }
  [\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
  \\ .                  { strtoken (TokString . tail) }
  "&#" $digit+ \;       { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
  "&#" [xX] $hexdigit+ \;
        { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
  -- allow special characters through if they don't fit one of the previous
  -- patterns.
  [\/\'\`\<\#\&\\]      { strtoken TokString }
  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
 }
 <def> {
  \]                    { token TokDefEnd `andBegin` string }
 }
 -- ']' doesn't have any special meaning outside of the [...] at the beginning
 -- of a definition paragraph.
 <string> {
  \]                    { strtoken TokString }
 }
 {
 -- | A located token
 type LToken = (Token, AlexPosn)
 data Token
  = TokPara
  | TokNumber
  | TokBullet
  | TokDefStart
  | TokDefEnd
  | TokSpecial Char
  | TokIdent String
  | TokString String
  | TokURL String
  | TokPic String
  | TokEmphasis String
  | TokAName String
  | TokBirdTrack String
  | TokProperty String
  | TokExamplePrompt String
  | TokExampleExpression String
  | TokExampleResult String
  deriving Show
 tokenPos :: LToken -> (Int, Int)
 tokenPos t = let AlexPn _ line col = snd t in (line, col)
 type StartCode = Int
 type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
 tokenise :: String -> (Int, Int) -> [LToken]
 tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
  where posn = AlexPn 0 line col
        go inp@(pos,_,_,str) sc =
          case alexScan inp sc of
                AlexEOF -> []
                AlexError _ -> []
                AlexSkip  inp' len     -> go inp' sc
                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
 -- NB. we add a final \n to the string, (see comment in the beginning of line
 -- production above).
 eofHack str = str++"\n"
 andBegin :: Action -> StartCode -> Action
 andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
 token :: Token -> Action
 token t = \pos _ sc cont -> (t, pos) : cont sc
 strtoken, strtokenNL :: (String -> Token) -> Action
 strtoken t = \pos str sc cont -> (t str, pos) : cont sc
 strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
 -- ^ We only want LF line endings in our internal doc string format, so we
 -- filter out all CRs.
 begin :: StartCode -> Action
 begin sc = \_ _ _ cont -> cont sc
 }
--- a/src/Text/Pandoc/Readers/Haddock/Parse.y
+++ b/src/Text/Pandoc/Readers/Haddock/Parse.y
@ -1,178 +0,0 @@
 -- This code was copied from the 'haddock' package, modified, and integrated
 -- into Pandoc by David Lazar.
 {
 {-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
 -- The above warning supression flag is a temporary kludge.
 -- While working on this module you are encouraged to remove it and fix
 -- any warnings in the module. See
 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
 -- for details
 module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
 import Text.Pandoc.Readers.Haddock.Lex
 import Text.Pandoc.Builder
 import Text.Pandoc.Shared (trim, trimr)
 import Data.Generics (everywhere, mkT)
 import Data.Char  (isSpace)
 import Data.Maybe (fromMaybe)
 import Data.List  (stripPrefix, intersperse)
 import Data.Monoid (mempty, mconcat)
 }
 %expect 0
 %tokentype  { LToken }
 %token
    '/'     { (TokSpecial '/',_) }
    '@'     { (TokSpecial '@',_) }
    '['     { (TokDefStart,_) }
    ']'     { (TokDefEnd,_) }
    DQUO    { (TokSpecial '\"',_) }
    URL     { (TokURL $$,_) }
    PIC     { (TokPic $$,_) }
    ANAME   { (TokAName $$,_) }
    '/../'  { (TokEmphasis $$,_) }
    '-'     { (TokBullet,_) }
    '(n)'   { (TokNumber,_) }
    '>..'   { (TokBirdTrack $$,_) }
    PROP    { (TokProperty $$,_) }
    PROMPT  { (TokExamplePrompt $$,_) }
    RESULT  { (TokExampleResult $$,_) }
    EXP     { (TokExampleExpression $$,_) }
    IDENT   { (TokIdent $$,_) }
    PARA    { (TokPara,_) }
    STRING  { (TokString $$,_) }
 %monad { Either [LToken] }
 %name parseParas doc
 %name parseString seq
 %%
 doc :: { Blocks }
    : apara PARA doc    { $1 <> $3 }
    | PARA doc          { $2 }
    | apara             { $1 }
    | {- empty -}       { mempty }
 apara :: { Blocks }
    : ulpara            { bulletList [$1] }
    | olpara            { orderedList [$1] }
    | defpara           { definitionList [$1] }
    | para              { $1 }
 ulpara :: { Blocks }
    : '-' para          { $2 }
 olpara  :: { Blocks }
    : '(n)' para        { $2 }
 defpara :: { (Inlines, [Blocks]) }
    : '[' seq ']' seq   { (trimInlines $2, [plain $ trimInlines $4]) }
 para :: { Blocks }
    : seq               { para' $1 }
    | codepara          { codeBlockWith ([], ["haskell"], []) $1 }
    | property          { $1 }
    | examples          { $1 }
 codepara :: { String }
    : '>..' codepara    { $1 ++ $2 }
    | '>..'             { $1 }
 property :: { Blocks }
    : PROP              { makeProperty $1 }
 examples :: { Blocks }
    : example examples  { $1 <> $2 }
    | example           { $1 }
 example :: { Blocks }
    : PROMPT EXP result { makeExample $1 $2 (lines $3) }
    | PROMPT EXP        { makeExample $1 $2 [] }
 result :: { String }
    : RESULT result     { $1 ++ $2 }
    | RESULT            { $1 }
 seq :: { Inlines }
    : elem seq          { $1 <> $2 }
    | elem              { $1 }
 elem :: { Inlines }
    : elem1             { $1 }
    | '@' seq1 '@'      { monospace $2 }
 seq1 :: { Inlines }
    : PARA seq1         { linebreak <> $2 }
    | elem1 seq1        { $1 <> $2 }
    | elem1             { $1 }
 elem1 :: { Inlines }
    : STRING            { text $1 }
    | '/../'            { emph (str $1) }
    | URL               { makeHyperlink $1 }
    | PIC               { image $1 $1 mempty }
    | ANAME             { mempty } -- TODO
    | IDENT             { codeWith ([], ["haskell"], []) $1 }
    | DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 }
 strings :: { String }
    : STRING            { $1 }
    | STRING strings    { $1 ++ $2 }
 {
 happyError :: [LToken] -> Either [LToken] a
 happyError toks = Left toks
 para' :: Inlines -> Blocks
 para' = para . trimInlines
 monospace :: Inlines -> Inlines
 monospace = everywhere (mkT go)
  where
    go (Str s) = Code nullAttr s
    go x = x
 -- | Create a `Hyperlink` from given string.
 --
 -- A hyperlink consists of a URL and an optional label.  The label is separated
 -- from the url by one or more whitespace characters.
 makeHyperlink :: String -> Inlines
 makeHyperlink input = case break isSpace $ trim input of
  (url, "") -> link url url (str url)
  (url, lb) -> link url url (trimInlines $ text lb)
 makeProperty :: String -> Blocks
 makeProperty s = case trim s of
  'p':'r':'o':'p':'>':xs ->
    codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
  xs ->
    error $ "makeProperty: invalid input " ++ show xs
 -- | Create an 'Example', stripping superfluous characters as appropriate
 makeExample :: String -> String -> [String] -> Blocks
 makeExample prompt expression result =
    para $ codeWith ([], ["haskell","expr"], []) (trim expression)
        <> linebreak
        <> (mconcat $ intersperse linebreak $ map coder result')
  where
    -- 1. drop trailing whitespace from the prompt, remember the prefix
    prefix = takeWhile isSpace prompt
    -- 2. drop, if possible, the exact same sequence of whitespace
    -- characters from each result line
    --
    -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
    -- empty line
    result' = map (substituteBlankLine . tryStripPrefix prefix) result
      where
        tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
        substituteBlankLine "<BLANKLINE>" = ""
        substituteBlankLine line          = line
    coder = codeWith ([], ["result"], [])
 }