Removed old haddock reader code. Add dependency on haddock-library.
This also removes the dependency on alex and happy.
This commit is contained in:
parent
b371e83d73
commit
ab390a10ec
5 changed files with 25 additions and 370 deletions
6
INSTALL
6
INSTALL
|
@ -12,11 +12,7 @@ Quick install
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
1. Install the [Haskell platform]. This will give you [GHC] and
|
1. Install the [Haskell platform]. This will give you [GHC] and
|
||||||
the [cabal-install] build tool, as well as `alex` and `happy`.
|
the [cabal-install] build tool.
|
||||||
If you do not use the Haskell platform, you'll need to install
|
|
||||||
`alex` and `happy` separately:
|
|
||||||
|
|
||||||
cabal install alex happy
|
|
||||||
|
|
||||||
2. Update your package database:
|
2. Update your package database:
|
||||||
|
|
||||||
|
|
|
@ -255,8 +255,8 @@ Library
|
||||||
vector >= 0.10 && < 0.11,
|
vector >= 0.10 && < 0.11,
|
||||||
hslua >= 0.3 && < 0.4,
|
hslua >= 0.3 && < 0.4,
|
||||||
binary >= 0.5 && < 0.8,
|
binary >= 0.5 && < 0.8,
|
||||||
SHA >= 1.6 && < 1.7
|
SHA >= 1.6 && < 1.7,
|
||||||
Build-Tools: alex, happy
|
haddock-library >= 1.0 && < 1.1
|
||||||
if flag(https)
|
if flag(https)
|
||||||
Build-Depends: http-client >= 0.3.2 && < 0.4,
|
Build-Depends: http-client >= 0.3.2 && < 0.4,
|
||||||
http-client-tls >= 0.2 && < 0.3,
|
http-client-tls >= 0.2 && < 0.3,
|
||||||
|
@ -322,9 +322,7 @@ Library
|
||||||
Text.Pandoc.XML,
|
Text.Pandoc.XML,
|
||||||
Text.Pandoc.SelfContained,
|
Text.Pandoc.SelfContained,
|
||||||
Text.Pandoc.Process
|
Text.Pandoc.Process
|
||||||
Other-Modules: Text.Pandoc.Readers.Haddock.Lex,
|
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
|
||||||
Text.Pandoc.Readers.Haddock.Parse,
|
|
||||||
Text.Pandoc.Readers.Docx.Lists,
|
|
||||||
Text.Pandoc.Readers.Docx.Parse,
|
Text.Pandoc.Readers.Docx.Parse,
|
||||||
Text.Pandoc.Writers.Shared,
|
Text.Pandoc.Writers.Shared,
|
||||||
Text.Pandoc.Asciify,
|
Text.Pandoc.Asciify,
|
||||||
|
|
|
@ -3,7 +3,8 @@
|
||||||
Copyright : Copyright (C) 2013 David Lazar
|
Copyright : Copyright (C) 2013 David Lazar
|
||||||
License : GNU GPL, version 2 or above
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
Maintainer : David Lazar <lazar6@illinois.edu>
|
Maintainer : David Lazar <lazar6@illinois.edu>,
|
||||||
|
John MacFarlane <jgm@berkeley.edu>
|
||||||
Stability : alpha
|
Stability : alpha
|
||||||
|
|
||||||
Conversion of Haddock markup to 'Pandoc' document.
|
Conversion of Haddock markup to 'Pandoc' document.
|
||||||
|
@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock
|
||||||
( readHaddock
|
( readHaddock
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Text.Pandoc.Builder
|
import Text.Pandoc.Builder (Blocks, Inlines)
|
||||||
|
import qualified Text.Pandoc.Builder as B
|
||||||
|
import Data.Monoid
|
||||||
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Options
|
import Text.Pandoc.Options
|
||||||
import Text.Pandoc.Readers.Haddock.Lex
|
import Documentation.Haddock.Parser (parseParas, Identifier)
|
||||||
import Text.Pandoc.Readers.Haddock.Parse
|
import Documentation.Haddock.Types
|
||||||
|
|
||||||
-- | Parse Haddock markup and return a 'Pandoc' document.
|
-- | Parse Haddock markup and return a 'Pandoc' document.
|
||||||
readHaddock :: ReaderOptions -- ^ Reader options
|
readHaddock :: ReaderOptions -- ^ Reader options
|
||||||
-> String -- ^ String to parse
|
-> String -- ^ String to parse
|
||||||
-> Pandoc
|
-> Pandoc
|
||||||
readHaddock _ s = Pandoc nullMeta blocks
|
readHaddock _ = B.doc . docHToBlocks . parseParas
|
||||||
where
|
|
||||||
blocks = case parseParas (tokenise s (0,0)) of
|
docHToBlocks :: DocH mod Identifier -> Blocks
|
||||||
Left [] -> error "parse failure"
|
docHToBlocks d =
|
||||||
Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok)
|
case d of
|
||||||
where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")"
|
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
|
||||||
Right x -> mergeLists (toList x)
|
DocParagraph ils -> B.para $ docHToInlines ils
|
||||||
|
|
||||||
|
docHToInlines :: DocH mod Identifier -> Inlines
|
||||||
|
docHToInlines d =
|
||||||
|
case d of
|
||||||
|
DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
|
||||||
|
DocString s -> B.text s
|
||||||
|
|
||||||
-- similar to 'docAppend' in Haddock.Doc
|
-- similar to 'docAppend' in Haddock.Doc
|
||||||
mergeLists :: [Block] -> [Block]
|
mergeLists :: [Block] -> [Block]
|
||||||
|
|
|
@ -1,171 +0,0 @@
|
||||||
--
|
|
||||||
-- Haddock - A Haskell Documentation Tool
|
|
||||||
--
|
|
||||||
-- (c) Simon Marlow 2002
|
|
||||||
--
|
|
||||||
-- This file was modified and integrated into GHC by David Waern 2006.
|
|
||||||
-- Then moved back into Haddock by Isaac Dupree in 2009 :-)
|
|
||||||
-- Then copied into Pandoc by David Lazar in 2013 :-D
|
|
||||||
|
|
||||||
{
|
|
||||||
{-# LANGUAGE BangPatterns #-} -- Generated by Alex
|
|
||||||
{-# OPTIONS -Wwarn -w #-}
|
|
||||||
-- The above warning supression flag is a temporary kludge.
|
|
||||||
-- While working on this module you are encouraged to remove it and fix
|
|
||||||
-- any warnings in the module. See
|
|
||||||
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
|
|
||||||
-- for details
|
|
||||||
|
|
||||||
module Text.Pandoc.Readers.Haddock.Lex (
|
|
||||||
Token(..),
|
|
||||||
LToken,
|
|
||||||
tokenise,
|
|
||||||
tokenPos
|
|
||||||
) where
|
|
||||||
|
|
||||||
import Data.Char
|
|
||||||
import Numeric (readHex)
|
|
||||||
}
|
|
||||||
|
|
||||||
%wrapper "posn"
|
|
||||||
|
|
||||||
$ws = $white # \n
|
|
||||||
$digit = [0-9]
|
|
||||||
$hexdigit = [0-9a-fA-F]
|
|
||||||
$special = [\"\@]
|
|
||||||
$alphanum = [A-Za-z0-9]
|
|
||||||
$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
|
|
||||||
|
|
||||||
:-
|
|
||||||
|
|
||||||
-- beginning of a paragraph
|
|
||||||
<0,para> {
|
|
||||||
$ws* \n ;
|
|
||||||
$ws* \> { begin birdtrack }
|
|
||||||
$ws* prop \> .* \n { strtoken TokProperty `andBegin` property}
|
|
||||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
|
||||||
$ws* [\*\-] { token TokBullet `andBegin` string }
|
|
||||||
$ws* \[ { token TokDefStart `andBegin` def }
|
|
||||||
$ws* \( $digit+ \) { token TokNumber `andBegin` string }
|
|
||||||
$ws* $digit+ \. { token TokNumber `andBegin` string }
|
|
||||||
$ws* { begin string }
|
|
||||||
}
|
|
||||||
|
|
||||||
-- beginning of a line
|
|
||||||
<line> {
|
|
||||||
$ws* \> { begin birdtrack }
|
|
||||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
|
||||||
|
|
||||||
$ws* \n { token TokPara `andBegin` para }
|
|
||||||
-- ^ Here, we really want to be able to say
|
|
||||||
-- $ws* (\n | <eof>) { token TokPara `andBegin` para}
|
|
||||||
-- because otherwise a trailing line of whitespace will result in
|
|
||||||
-- a spurious TokString at the end of a docstring. We don't have <eof>,
|
|
||||||
-- though (NOW I realise what it was for :-). To get around this, we always
|
|
||||||
-- append \n to the end of a docstring.
|
|
||||||
|
|
||||||
() { begin string }
|
|
||||||
}
|
|
||||||
|
|
||||||
<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
|
|
||||||
|
|
||||||
<property> () { token TokPara `andBegin` para }
|
|
||||||
|
|
||||||
<example> {
|
|
||||||
$ws* \n { token TokPara `andBegin` para }
|
|
||||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
|
||||||
() { begin exampleresult }
|
|
||||||
}
|
|
||||||
|
|
||||||
<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example }
|
|
||||||
|
|
||||||
<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example }
|
|
||||||
|
|
||||||
<string,def> {
|
|
||||||
$special { strtoken $ \s -> TokSpecial (head s) }
|
|
||||||
\<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
|
|
||||||
\< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) }
|
|
||||||
\# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) }
|
|
||||||
\/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
|
|
||||||
[\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
|
|
||||||
\\ . { strtoken (TokString . tail) }
|
|
||||||
"&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
|
|
||||||
"&#" [xX] $hexdigit+ \;
|
|
||||||
{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
|
|
||||||
-- allow special characters through if they don't fit one of the previous
|
|
||||||
-- patterns.
|
|
||||||
[\/\'\`\<\#\&\\] { strtoken TokString }
|
|
||||||
[^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
|
|
||||||
[^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
|
|
||||||
}
|
|
||||||
|
|
||||||
<def> {
|
|
||||||
\] { token TokDefEnd `andBegin` string }
|
|
||||||
}
|
|
||||||
|
|
||||||
-- ']' doesn't have any special meaning outside of the [...] at the beginning
|
|
||||||
-- of a definition paragraph.
|
|
||||||
<string> {
|
|
||||||
\] { strtoken TokString }
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
-- | A located token
|
|
||||||
type LToken = (Token, AlexPosn)
|
|
||||||
|
|
||||||
data Token
|
|
||||||
= TokPara
|
|
||||||
| TokNumber
|
|
||||||
| TokBullet
|
|
||||||
| TokDefStart
|
|
||||||
| TokDefEnd
|
|
||||||
| TokSpecial Char
|
|
||||||
| TokIdent String
|
|
||||||
| TokString String
|
|
||||||
| TokURL String
|
|
||||||
| TokPic String
|
|
||||||
| TokEmphasis String
|
|
||||||
| TokAName String
|
|
||||||
| TokBirdTrack String
|
|
||||||
| TokProperty String
|
|
||||||
| TokExamplePrompt String
|
|
||||||
| TokExampleExpression String
|
|
||||||
| TokExampleResult String
|
|
||||||
deriving Show
|
|
||||||
|
|
||||||
tokenPos :: LToken -> (Int, Int)
|
|
||||||
tokenPos t = let AlexPn _ line col = snd t in (line, col)
|
|
||||||
|
|
||||||
type StartCode = Int
|
|
||||||
type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
|
|
||||||
|
|
||||||
tokenise :: String -> (Int, Int) -> [LToken]
|
|
||||||
tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
|
|
||||||
where posn = AlexPn 0 line col
|
|
||||||
go inp@(pos,_,_,str) sc =
|
|
||||||
case alexScan inp sc of
|
|
||||||
AlexEOF -> []
|
|
||||||
AlexError _ -> []
|
|
||||||
AlexSkip inp' len -> go inp' sc
|
|
||||||
AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
|
|
||||||
|
|
||||||
-- NB. we add a final \n to the string, (see comment in the beginning of line
|
|
||||||
-- production above).
|
|
||||||
eofHack str = str++"\n"
|
|
||||||
|
|
||||||
andBegin :: Action -> StartCode -> Action
|
|
||||||
andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
|
|
||||||
|
|
||||||
token :: Token -> Action
|
|
||||||
token t = \pos _ sc cont -> (t, pos) : cont sc
|
|
||||||
|
|
||||||
strtoken, strtokenNL :: (String -> Token) -> Action
|
|
||||||
strtoken t = \pos str sc cont -> (t str, pos) : cont sc
|
|
||||||
strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
|
|
||||||
-- ^ We only want LF line endings in our internal doc string format, so we
|
|
||||||
-- filter out all CRs.
|
|
||||||
|
|
||||||
begin :: StartCode -> Action
|
|
||||||
begin sc = \_ _ _ cont -> cont sc
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,178 +0,0 @@
|
||||||
-- This code was copied from the 'haddock' package, modified, and integrated
|
|
||||||
-- into Pandoc by David Lazar.
|
|
||||||
{
|
|
||||||
{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
|
|
||||||
-- The above warning supression flag is a temporary kludge.
|
|
||||||
-- While working on this module you are encouraged to remove it and fix
|
|
||||||
-- any warnings in the module. See
|
|
||||||
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
|
|
||||||
-- for details
|
|
||||||
|
|
||||||
module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
|
|
||||||
|
|
||||||
import Text.Pandoc.Readers.Haddock.Lex
|
|
||||||
import Text.Pandoc.Builder
|
|
||||||
import Text.Pandoc.Shared (trim, trimr)
|
|
||||||
import Data.Generics (everywhere, mkT)
|
|
||||||
import Data.Char (isSpace)
|
|
||||||
import Data.Maybe (fromMaybe)
|
|
||||||
import Data.List (stripPrefix, intersperse)
|
|
||||||
import Data.Monoid (mempty, mconcat)
|
|
||||||
}
|
|
||||||
|
|
||||||
%expect 0
|
|
||||||
|
|
||||||
%tokentype { LToken }
|
|
||||||
|
|
||||||
%token
|
|
||||||
'/' { (TokSpecial '/',_) }
|
|
||||||
'@' { (TokSpecial '@',_) }
|
|
||||||
'[' { (TokDefStart,_) }
|
|
||||||
']' { (TokDefEnd,_) }
|
|
||||||
DQUO { (TokSpecial '\"',_) }
|
|
||||||
URL { (TokURL $$,_) }
|
|
||||||
PIC { (TokPic $$,_) }
|
|
||||||
ANAME { (TokAName $$,_) }
|
|
||||||
'/../' { (TokEmphasis $$,_) }
|
|
||||||
'-' { (TokBullet,_) }
|
|
||||||
'(n)' { (TokNumber,_) }
|
|
||||||
'>..' { (TokBirdTrack $$,_) }
|
|
||||||
PROP { (TokProperty $$,_) }
|
|
||||||
PROMPT { (TokExamplePrompt $$,_) }
|
|
||||||
RESULT { (TokExampleResult $$,_) }
|
|
||||||
EXP { (TokExampleExpression $$,_) }
|
|
||||||
IDENT { (TokIdent $$,_) }
|
|
||||||
PARA { (TokPara,_) }
|
|
||||||
STRING { (TokString $$,_) }
|
|
||||||
|
|
||||||
%monad { Either [LToken] }
|
|
||||||
|
|
||||||
%name parseParas doc
|
|
||||||
%name parseString seq
|
|
||||||
|
|
||||||
%%
|
|
||||||
|
|
||||||
doc :: { Blocks }
|
|
||||||
: apara PARA doc { $1 <> $3 }
|
|
||||||
| PARA doc { $2 }
|
|
||||||
| apara { $1 }
|
|
||||||
| {- empty -} { mempty }
|
|
||||||
|
|
||||||
apara :: { Blocks }
|
|
||||||
: ulpara { bulletList [$1] }
|
|
||||||
| olpara { orderedList [$1] }
|
|
||||||
| defpara { definitionList [$1] }
|
|
||||||
| para { $1 }
|
|
||||||
|
|
||||||
ulpara :: { Blocks }
|
|
||||||
: '-' para { $2 }
|
|
||||||
|
|
||||||
olpara :: { Blocks }
|
|
||||||
: '(n)' para { $2 }
|
|
||||||
|
|
||||||
defpara :: { (Inlines, [Blocks]) }
|
|
||||||
: '[' seq ']' seq { (trimInlines $2, [plain $ trimInlines $4]) }
|
|
||||||
|
|
||||||
para :: { Blocks }
|
|
||||||
: seq { para' $1 }
|
|
||||||
| codepara { codeBlockWith ([], ["haskell"], []) $1 }
|
|
||||||
| property { $1 }
|
|
||||||
| examples { $1 }
|
|
||||||
|
|
||||||
codepara :: { String }
|
|
||||||
: '>..' codepara { $1 ++ $2 }
|
|
||||||
| '>..' { $1 }
|
|
||||||
|
|
||||||
property :: { Blocks }
|
|
||||||
: PROP { makeProperty $1 }
|
|
||||||
|
|
||||||
examples :: { Blocks }
|
|
||||||
: example examples { $1 <> $2 }
|
|
||||||
| example { $1 }
|
|
||||||
|
|
||||||
example :: { Blocks }
|
|
||||||
: PROMPT EXP result { makeExample $1 $2 (lines $3) }
|
|
||||||
| PROMPT EXP { makeExample $1 $2 [] }
|
|
||||||
|
|
||||||
result :: { String }
|
|
||||||
: RESULT result { $1 ++ $2 }
|
|
||||||
| RESULT { $1 }
|
|
||||||
|
|
||||||
seq :: { Inlines }
|
|
||||||
: elem seq { $1 <> $2 }
|
|
||||||
| elem { $1 }
|
|
||||||
|
|
||||||
elem :: { Inlines }
|
|
||||||
: elem1 { $1 }
|
|
||||||
| '@' seq1 '@' { monospace $2 }
|
|
||||||
|
|
||||||
seq1 :: { Inlines }
|
|
||||||
: PARA seq1 { linebreak <> $2 }
|
|
||||||
| elem1 seq1 { $1 <> $2 }
|
|
||||||
| elem1 { $1 }
|
|
||||||
|
|
||||||
elem1 :: { Inlines }
|
|
||||||
: STRING { text $1 }
|
|
||||||
| '/../' { emph (str $1) }
|
|
||||||
| URL { makeHyperlink $1 }
|
|
||||||
| PIC { image $1 $1 mempty }
|
|
||||||
| ANAME { mempty } -- TODO
|
|
||||||
| IDENT { codeWith ([], ["haskell"], []) $1 }
|
|
||||||
| DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 }
|
|
||||||
|
|
||||||
strings :: { String }
|
|
||||||
: STRING { $1 }
|
|
||||||
| STRING strings { $1 ++ $2 }
|
|
||||||
|
|
||||||
{
|
|
||||||
happyError :: [LToken] -> Either [LToken] a
|
|
||||||
happyError toks = Left toks
|
|
||||||
|
|
||||||
para' :: Inlines -> Blocks
|
|
||||||
para' = para . trimInlines
|
|
||||||
|
|
||||||
monospace :: Inlines -> Inlines
|
|
||||||
monospace = everywhere (mkT go)
|
|
||||||
where
|
|
||||||
go (Str s) = Code nullAttr s
|
|
||||||
go x = x
|
|
||||||
|
|
||||||
-- | Create a `Hyperlink` from given string.
|
|
||||||
--
|
|
||||||
-- A hyperlink consists of a URL and an optional label. The label is separated
|
|
||||||
-- from the url by one or more whitespace characters.
|
|
||||||
makeHyperlink :: String -> Inlines
|
|
||||||
makeHyperlink input = case break isSpace $ trim input of
|
|
||||||
(url, "") -> link url url (str url)
|
|
||||||
(url, lb) -> link url url (trimInlines $ text lb)
|
|
||||||
|
|
||||||
makeProperty :: String -> Blocks
|
|
||||||
makeProperty s = case trim s of
|
|
||||||
'p':'r':'o':'p':'>':xs ->
|
|
||||||
codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
|
|
||||||
xs ->
|
|
||||||
error $ "makeProperty: invalid input " ++ show xs
|
|
||||||
|
|
||||||
-- | Create an 'Example', stripping superfluous characters as appropriate
|
|
||||||
makeExample :: String -> String -> [String] -> Blocks
|
|
||||||
makeExample prompt expression result =
|
|
||||||
para $ codeWith ([], ["haskell","expr"], []) (trim expression)
|
|
||||||
<> linebreak
|
|
||||||
<> (mconcat $ intersperse linebreak $ map coder result')
|
|
||||||
where
|
|
||||||
-- 1. drop trailing whitespace from the prompt, remember the prefix
|
|
||||||
prefix = takeWhile isSpace prompt
|
|
||||||
|
|
||||||
-- 2. drop, if possible, the exact same sequence of whitespace
|
|
||||||
-- characters from each result line
|
|
||||||
--
|
|
||||||
-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
|
|
||||||
-- empty line
|
|
||||||
result' = map (substituteBlankLine . tryStripPrefix prefix) result
|
|
||||||
where
|
|
||||||
tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
|
|
||||||
|
|
||||||
substituteBlankLine "<BLANKLINE>" = ""
|
|
||||||
substituteBlankLine line = line
|
|
||||||
coder = codeWith ([], ["result"], [])
|
|
||||||
}
|
|
Loading…
Add table
Reference in a new issue