Removed old haddock reader code. Add dependency on haddock-library.

This also removes the dependency on alex and happy.
This commit is contained in:
John MacFarlane 2014-06-18 11:33:09 -07:00
parent b371e83d73
commit ab390a10ec
5 changed files with 25 additions and 370 deletions

View file

@ -12,11 +12,7 @@ Quick install
-------------
1. Install the [Haskell platform]. This will give you [GHC] and
the [cabal-install] build tool, as well as `alex` and `happy`.
If you do not use the Haskell platform, you'll need to install
`alex` and `happy` separately:
cabal install alex happy
the [cabal-install] build tool.
2. Update your package database:

View file

@ -255,8 +255,8 @@ Library
vector >= 0.10 && < 0.11,
hslua >= 0.3 && < 0.4,
binary >= 0.5 && < 0.8,
SHA >= 1.6 && < 1.7
Build-Tools: alex, happy
SHA >= 1.6 && < 1.7,
haddock-library >= 1.0 && < 1.1
if flag(https)
Build-Depends: http-client >= 0.3.2 && < 0.4,
http-client-tls >= 0.2 && < 0.3,
@ -322,9 +322,7 @@ Library
Text.Pandoc.XML,
Text.Pandoc.SelfContained,
Text.Pandoc.Process
Other-Modules: Text.Pandoc.Readers.Haddock.Lex,
Text.Pandoc.Readers.Haddock.Parse,
Text.Pandoc.Readers.Docx.Lists,
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Parse,
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,

View file

@ -3,7 +3,8 @@
Copyright : Copyright (C) 2013 David Lazar
License : GNU GPL, version 2 or above
Maintainer : David Lazar <lazar6@illinois.edu>
Maintainer : David Lazar <lazar6@illinois.edu>,
John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Conversion of Haddock markup to 'Pandoc' document.
@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock
( readHaddock
) where
import Text.Pandoc.Builder
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Data.Monoid
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Readers.Haddock.Lex
import Text.Pandoc.Readers.Haddock.Parse
import Documentation.Haddock.Parser (parseParas, Identifier)
import Documentation.Haddock.Types
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Pandoc
readHaddock _ s = Pandoc nullMeta blocks
where
blocks = case parseParas (tokenise s (0,0)) of
Left [] -> error "parse failure"
Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok)
where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")"
Right x -> mergeLists (toList x)
readHaddock _ = B.doc . docHToBlocks . parseParas
docHToBlocks :: DocH mod Identifier -> Blocks
docHToBlocks d =
case d of
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
DocParagraph ils -> B.para $ docHToInlines ils
docHToInlines :: DocH mod Identifier -> Inlines
docHToInlines d =
case d of
DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
DocString s -> B.text s
-- similar to 'docAppend' in Haddock.Doc
mergeLists :: [Block] -> [Block]

View file

@ -1,171 +0,0 @@
--
-- Haddock - A Haskell Documentation Tool
--
-- (c) Simon Marlow 2002
--
-- This file was modified and integrated into GHC by David Waern 2006.
-- Then moved back into Haddock by Isaac Dupree in 2009 :-)
-- Then copied into Pandoc by David Lazar in 2013 :-D
{
{-# LANGUAGE BangPatterns #-} -- Generated by Alex
{-# OPTIONS -Wwarn -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
module Text.Pandoc.Readers.Haddock.Lex (
Token(..),
LToken,
tokenise,
tokenPos
) where
import Data.Char
import Numeric (readHex)
}
%wrapper "posn"
$ws = $white # \n
$digit = [0-9]
$hexdigit = [0-9a-fA-F]
$special = [\"\@]
$alphanum = [A-Za-z0-9]
$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
:-
-- beginning of a paragraph
<0,para> {
$ws* \n ;
$ws* \> { begin birdtrack }
$ws* prop \> .* \n { strtoken TokProperty `andBegin` property}
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
$ws* [\*\-] { token TokBullet `andBegin` string }
$ws* \[ { token TokDefStart `andBegin` def }
$ws* \( $digit+ \) { token TokNumber `andBegin` string }
$ws* $digit+ \. { token TokNumber `andBegin` string }
$ws* { begin string }
}
-- beginning of a line
<line> {
$ws* \> { begin birdtrack }
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
$ws* \n { token TokPara `andBegin` para }
-- ^ Here, we really want to be able to say
-- $ws* (\n | <eof>) { token TokPara `andBegin` para}
-- because otherwise a trailing line of whitespace will result in
-- a spurious TokString at the end of a docstring. We don't have <eof>,
-- though (NOW I realise what it was for :-). To get around this, we always
-- append \n to the end of a docstring.
() { begin string }
}
<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
<property> () { token TokPara `andBegin` para }
<example> {
$ws* \n { token TokPara `andBegin` para }
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
() { begin exampleresult }
}
<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example }
<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example }
<string,def> {
$special { strtoken $ \s -> TokSpecial (head s) }
\<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
\< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) }
\# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) }
\/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
[\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
\\ . { strtoken (TokString . tail) }
"&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
"&#" [xX] $hexdigit+ \;
{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-- allow special characters through if they don't fit one of the previous
-- patterns.
[\/\'\`\<\#\&\\] { strtoken TokString }
[^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
[^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
}
<def> {
\] { token TokDefEnd `andBegin` string }
}
-- ']' doesn't have any special meaning outside of the [...] at the beginning
-- of a definition paragraph.
<string> {
\] { strtoken TokString }
}
{
-- | A located token
type LToken = (Token, AlexPosn)
data Token
= TokPara
| TokNumber
| TokBullet
| TokDefStart
| TokDefEnd
| TokSpecial Char
| TokIdent String
| TokString String
| TokURL String
| TokPic String
| TokEmphasis String
| TokAName String
| TokBirdTrack String
| TokProperty String
| TokExamplePrompt String
| TokExampleExpression String
| TokExampleResult String
deriving Show
tokenPos :: LToken -> (Int, Int)
tokenPos t = let AlexPn _ line col = snd t in (line, col)
type StartCode = Int
type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
tokenise :: String -> (Int, Int) -> [LToken]
tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
where posn = AlexPn 0 line col
go inp@(pos,_,_,str) sc =
case alexScan inp sc of
AlexEOF -> []
AlexError _ -> []
AlexSkip inp' len -> go inp' sc
AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
-- NB. we add a final \n to the string, (see comment in the beginning of line
-- production above).
eofHack str = str++"\n"
andBegin :: Action -> StartCode -> Action
andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
token :: Token -> Action
token t = \pos _ sc cont -> (t, pos) : cont sc
strtoken, strtokenNL :: (String -> Token) -> Action
strtoken t = \pos str sc cont -> (t str, pos) : cont sc
strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
-- ^ We only want LF line endings in our internal doc string format, so we
-- filter out all CRs.
begin :: StartCode -> Action
begin sc = \_ _ _ cont -> cont sc
}

View file

@ -1,178 +0,0 @@
-- This code was copied from the 'haddock' package, modified, and integrated
-- into Pandoc by David Lazar.
{
{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
import Text.Pandoc.Readers.Haddock.Lex
import Text.Pandoc.Builder
import Text.Pandoc.Shared (trim, trimr)
import Data.Generics (everywhere, mkT)
import Data.Char (isSpace)
import Data.Maybe (fromMaybe)
import Data.List (stripPrefix, intersperse)
import Data.Monoid (mempty, mconcat)
}
%expect 0
%tokentype { LToken }
%token
'/' { (TokSpecial '/',_) }
'@' { (TokSpecial '@',_) }
'[' { (TokDefStart,_) }
']' { (TokDefEnd,_) }
DQUO { (TokSpecial '\"',_) }
URL { (TokURL $$,_) }
PIC { (TokPic $$,_) }
ANAME { (TokAName $$,_) }
'/../' { (TokEmphasis $$,_) }
'-' { (TokBullet,_) }
'(n)' { (TokNumber,_) }
'>..' { (TokBirdTrack $$,_) }
PROP { (TokProperty $$,_) }
PROMPT { (TokExamplePrompt $$,_) }
RESULT { (TokExampleResult $$,_) }
EXP { (TokExampleExpression $$,_) }
IDENT { (TokIdent $$,_) }
PARA { (TokPara,_) }
STRING { (TokString $$,_) }
%monad { Either [LToken] }
%name parseParas doc
%name parseString seq
%%
doc :: { Blocks }
: apara PARA doc { $1 <> $3 }
| PARA doc { $2 }
| apara { $1 }
| {- empty -} { mempty }
apara :: { Blocks }
: ulpara { bulletList [$1] }
| olpara { orderedList [$1] }
| defpara { definitionList [$1] }
| para { $1 }
ulpara :: { Blocks }
: '-' para { $2 }
olpara :: { Blocks }
: '(n)' para { $2 }
defpara :: { (Inlines, [Blocks]) }
: '[' seq ']' seq { (trimInlines $2, [plain $ trimInlines $4]) }
para :: { Blocks }
: seq { para' $1 }
| codepara { codeBlockWith ([], ["haskell"], []) $1 }
| property { $1 }
| examples { $1 }
codepara :: { String }
: '>..' codepara { $1 ++ $2 }
| '>..' { $1 }
property :: { Blocks }
: PROP { makeProperty $1 }
examples :: { Blocks }
: example examples { $1 <> $2 }
| example { $1 }
example :: { Blocks }
: PROMPT EXP result { makeExample $1 $2 (lines $3) }
| PROMPT EXP { makeExample $1 $2 [] }
result :: { String }
: RESULT result { $1 ++ $2 }
| RESULT { $1 }
seq :: { Inlines }
: elem seq { $1 <> $2 }
| elem { $1 }
elem :: { Inlines }
: elem1 { $1 }
| '@' seq1 '@' { monospace $2 }
seq1 :: { Inlines }
: PARA seq1 { linebreak <> $2 }
| elem1 seq1 { $1 <> $2 }
| elem1 { $1 }
elem1 :: { Inlines }
: STRING { text $1 }
| '/../' { emph (str $1) }
| URL { makeHyperlink $1 }
| PIC { image $1 $1 mempty }
| ANAME { mempty } -- TODO
| IDENT { codeWith ([], ["haskell"], []) $1 }
| DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 }
strings :: { String }
: STRING { $1 }
| STRING strings { $1 ++ $2 }
{
happyError :: [LToken] -> Either [LToken] a
happyError toks = Left toks
para' :: Inlines -> Blocks
para' = para . trimInlines
monospace :: Inlines -> Inlines
monospace = everywhere (mkT go)
where
go (Str s) = Code nullAttr s
go x = x
-- | Create a `Hyperlink` from given string.
--
-- A hyperlink consists of a URL and an optional label. The label is separated
-- from the url by one or more whitespace characters.
makeHyperlink :: String -> Inlines
makeHyperlink input = case break isSpace $ trim input of
(url, "") -> link url url (str url)
(url, lb) -> link url url (trimInlines $ text lb)
makeProperty :: String -> Blocks
makeProperty s = case trim s of
'p':'r':'o':'p':'>':xs ->
codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
xs ->
error $ "makeProperty: invalid input " ++ show xs
-- | Create an 'Example', stripping superfluous characters as appropriate
makeExample :: String -> String -> [String] -> Blocks
makeExample prompt expression result =
para $ codeWith ([], ["haskell","expr"], []) (trim expression)
<> linebreak
<> (mconcat $ intersperse linebreak $ map coder result')
where
-- 1. drop trailing whitespace from the prompt, remember the prefix
prefix = takeWhile isSpace prompt
-- 2. drop, if possible, the exact same sequence of whitespace
-- characters from each result line
--
-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-- empty line
result' = map (substituteBlankLine . tryStripPrefix prefix) result
where
tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine line = line
coder = codeWith ([], ["result"], [])
}