Add reader for Haddock markup based on Haddock's own lexer/parser.
This commit is contained in:
parent
ee0fc19bc5
commit
18459b95ba
5 changed files with 395 additions and 1 deletions
|
@ -223,6 +223,7 @@ Library
|
|||
Build-Depends: base >= 4.2 && <5,
|
||||
syb >= 0.1 && < 0.5,
|
||||
containers >= 0.1 && < 0.6,
|
||||
array >= 0.3 && < 0.5,
|
||||
parsec >= 3.1 && < 3.2,
|
||||
mtl >= 1.1 && < 2.2,
|
||||
network >= 2 && < 2.5,
|
||||
|
@ -287,6 +288,7 @@ Library
|
|||
Text.Pandoc.Readers.TeXMath,
|
||||
Text.Pandoc.Readers.Textile,
|
||||
Text.Pandoc.Readers.Native,
|
||||
Text.Pandoc.Readers.Haddock,
|
||||
Text.Pandoc.Writers.Native,
|
||||
Text.Pandoc.Writers.Docbook,
|
||||
Text.Pandoc.Writers.OPML,
|
||||
|
@ -313,7 +315,9 @@ Library
|
|||
Text.Pandoc.XML,
|
||||
Text.Pandoc.Biblio,
|
||||
Text.Pandoc.SelfContained
|
||||
Other-Modules: Text.Pandoc.MIME,
|
||||
Other-Modules: Text.Pandoc.Readers.Haddock.Lex,
|
||||
Text.Pandoc.MIME,
|
||||
Text.Pandoc.Readers.Haddock.Parse,
|
||||
Text.Pandoc.Parsing,
|
||||
Text.Pandoc.UUID,
|
||||
Text.Pandoc.ImageSize,
|
||||
|
|
|
@ -73,6 +73,7 @@ module Text.Pandoc
|
|||
, readTextile
|
||||
, readDocBook
|
||||
, readOPML
|
||||
, readHaddock
|
||||
, readNative
|
||||
-- * Writers: converting /from/ Pandoc format
|
||||
, Writer (..)
|
||||
|
@ -120,6 +121,7 @@ import Text.Pandoc.Readers.LaTeX
|
|||
import Text.Pandoc.Readers.HTML
|
||||
import Text.Pandoc.Readers.Textile
|
||||
import Text.Pandoc.Readers.Native
|
||||
import Text.Pandoc.Readers.Haddock
|
||||
import Text.Pandoc.Writers.Native
|
||||
import Text.Pandoc.Writers.Markdown
|
||||
import Text.Pandoc.Writers.RST
|
||||
|
@ -200,6 +202,7 @@ readers = [("native" , \_ s -> return $ readNative s)
|
|||
,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
|
||||
,("html" , \o s -> return $ readHtml o s)
|
||||
,("latex" , \o s -> return $ readLaTeX o s)
|
||||
,("haddock" , \o s -> return $ readHaddock o s)
|
||||
]
|
||||
|
||||
data Writer = PureStringWriter (WriterOptions -> Pandoc -> String)
|
||||
|
|
39
src/Text/Pandoc/Readers/Haddock.hs
Normal file
39
src/Text/Pandoc/Readers/Haddock.hs
Normal file
|
@ -0,0 +1,39 @@
|
|||
{- |
|
||||
Module : Text.Pandoc.Readers.Haddock
|
||||
Copyright : Copyright (C) 2013 David Lazar
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : David Lazar <lazar6@illinois.edu>
|
||||
Stability : alpha
|
||||
|
||||
Conversion of Haddock markup to 'Pandoc' document.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Haddock
|
||||
( readHaddock
|
||||
) where
|
||||
|
||||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Readers.Haddock.Lex
|
||||
import Text.Pandoc.Readers.Haddock.Parse
|
||||
|
||||
-- | Parse Haddock markup and return a 'Pandoc' document.
|
||||
readHaddock :: ReaderOptions -- ^ Reader options
|
||||
-> String -- ^ String to parse
|
||||
-> Pandoc
|
||||
readHaddock _ s = Pandoc (Meta [] [] []) blocks
|
||||
where
|
||||
blocks = case parseParas (tokenise s (0,0)) of
|
||||
Nothing -> []
|
||||
Just x -> mergeLists (toList x)
|
||||
|
||||
-- similar to 'docAppend' in Haddock.Doc
|
||||
mergeLists :: [Block] -> [Block]
|
||||
mergeLists (BulletList xs : BulletList ys : blocks)
|
||||
= mergeLists (BulletList (xs ++ ys) : blocks)
|
||||
mergeLists (OrderedList _ xs : OrderedList a ys : blocks)
|
||||
= mergeLists (OrderedList a (xs ++ ys) : blocks)
|
||||
mergeLists (DefinitionList xs : DefinitionList ys : blocks)
|
||||
= mergeLists (DefinitionList (xs ++ ys) : blocks)
|
||||
mergeLists (x : blocks) = x : mergeLists blocks
|
||||
mergeLists [] = []
|
169
src/Text/Pandoc/Readers/Haddock/Lex.x
Normal file
169
src/Text/Pandoc/Readers/Haddock/Lex.x
Normal file
|
@ -0,0 +1,169 @@
|
|||
--
|
||||
-- Haddock - A Haskell Documentation Tool
|
||||
--
|
||||
-- (c) Simon Marlow 2002
|
||||
--
|
||||
-- This file was modified and integrated into GHC by David Waern 2006.
|
||||
-- Then moved back into Haddock by Isaac Dupree in 2009 :-)
|
||||
-- Then copied into Pandoc by David Lazar in 2013 :-D
|
||||
|
||||
{
|
||||
{-# LANGUAGE BangPatterns #-} -- Generated by Alex
|
||||
{-# OPTIONS -Wwarn -w #-}
|
||||
-- The above warning supression flag is a temporary kludge.
|
||||
-- While working on this module you are encouraged to remove it and fix
|
||||
-- any warnings in the module. See
|
||||
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
|
||||
-- for details
|
||||
|
||||
module Text.Pandoc.Readers.Haddock.Lex (
|
||||
Token(..),
|
||||
LToken,
|
||||
tokenise
|
||||
) where
|
||||
|
||||
import Data.Char
|
||||
import Numeric (readHex)
|
||||
}
|
||||
|
||||
%wrapper "posn"
|
||||
|
||||
$ws = $white # \n
|
||||
$digit = [0-9]
|
||||
$hexdigit = [0-9a-fA-F]
|
||||
$special = [\"\@]
|
||||
$alphanum = [A-Za-z0-9]
|
||||
$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
|
||||
|
||||
:-
|
||||
|
||||
-- beginning of a paragraph
|
||||
<0,para> {
|
||||
$ws* \n ;
|
||||
$ws* \> { begin birdtrack }
|
||||
$ws* prop \> .* \n { strtoken TokProperty `andBegin` property}
|
||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
||||
$ws* [\*\-] { token TokBullet `andBegin` string }
|
||||
$ws* \[ { token TokDefStart `andBegin` def }
|
||||
$ws* \( $digit+ \) { token TokNumber `andBegin` string }
|
||||
$ws* $digit+ \. { token TokNumber `andBegin` string }
|
||||
$ws* { begin string }
|
||||
}
|
||||
|
||||
-- beginning of a line
|
||||
<line> {
|
||||
$ws* \> { begin birdtrack }
|
||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
||||
$ws* \n { token TokPara `andBegin` para }
|
||||
|
||||
-- Here, we really want to be able to say
|
||||
-- $ws* (\n | <eof>) { token TokPara `andBegin` para}
|
||||
-- because otherwise a trailing line of whitespace will result in
|
||||
-- a spurious TokString at the end of a docstring. We don't have <eof>,
|
||||
-- though (NOW I realise what it was for :-). To get around this, we always
|
||||
-- append \n to the end of a docstring.
|
||||
() { begin string }
|
||||
}
|
||||
|
||||
<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
|
||||
|
||||
<property> () { token TokPara `andBegin` para }
|
||||
|
||||
<example> {
|
||||
$ws* \n { token TokPara `andBegin` para }
|
||||
$ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
|
||||
() { begin exampleresult }
|
||||
}
|
||||
|
||||
<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example }
|
||||
|
||||
<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example }
|
||||
|
||||
<string,def> {
|
||||
$special { strtoken $ \s -> TokSpecial (head s) }
|
||||
\<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
|
||||
\< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) }
|
||||
\# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) }
|
||||
\/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
|
||||
[\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
|
||||
\\ . { strtoken (TokString . tail) }
|
||||
"&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
|
||||
"&#" [xX] $hexdigit+ \;
|
||||
{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
|
||||
-- allow special characters through if they don't fit one of the previous
|
||||
-- patterns.
|
||||
[\/\'\`\<\#\&\\] { strtoken TokString }
|
||||
[^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
|
||||
[^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
|
||||
}
|
||||
|
||||
<def> {
|
||||
\] { token TokDefEnd `andBegin` string }
|
||||
}
|
||||
|
||||
-- ']' doesn't have any special meaning outside of the [...] at the beginning
|
||||
-- of a definition paragraph.
|
||||
<string> {
|
||||
\] { strtoken TokString }
|
||||
}
|
||||
|
||||
{
|
||||
-- | A located token
|
||||
type LToken = (Token, AlexPosn)
|
||||
|
||||
data Token
|
||||
= TokPara
|
||||
| TokNumber
|
||||
| TokBullet
|
||||
| TokDefStart
|
||||
| TokDefEnd
|
||||
| TokSpecial Char
|
||||
| TokIdent String
|
||||
| TokString String
|
||||
| TokURL String
|
||||
| TokPic String
|
||||
| TokEmphasis String
|
||||
| TokAName String
|
||||
| TokBirdTrack String
|
||||
| TokProperty String
|
||||
| TokExamplePrompt String
|
||||
| TokExampleExpression String
|
||||
| TokExampleResult String
|
||||
-- deriving Show
|
||||
|
||||
tokenPos :: LToken -> (Int, Int)
|
||||
tokenPos t = let AlexPn _ line col = snd t in (line, col)
|
||||
|
||||
type StartCode = Int
|
||||
type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
|
||||
|
||||
tokenise :: String -> (Int, Int) -> [LToken]
|
||||
tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
|
||||
where posn = AlexPn 0 line col
|
||||
go inp@(pos,_,_,str) sc =
|
||||
case alexScan inp sc of
|
||||
AlexEOF -> []
|
||||
AlexError _ -> []
|
||||
AlexSkip inp' len -> go inp' sc
|
||||
AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
|
||||
|
||||
-- NB. we add a final \n to the string, (see comment in the beginning of line
|
||||
-- production above).
|
||||
eofHack str = str++"\n"
|
||||
|
||||
andBegin :: Action -> StartCode -> Action
|
||||
andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
|
||||
|
||||
token :: Token -> Action
|
||||
token t = \pos _ sc cont -> (t, pos) : cont sc
|
||||
|
||||
strtoken, strtokenNL :: (String -> Token) -> Action
|
||||
strtoken t = \pos str sc cont -> (t str, pos) : cont sc
|
||||
strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
|
||||
-- ^ We only want LF line endings in our internal doc string format, so we
|
||||
-- filter out all CRs.
|
||||
|
||||
begin :: StartCode -> Action
|
||||
begin sc = \_ _ _ cont -> cont sc
|
||||
|
||||
}
|
179
src/Text/Pandoc/Readers/Haddock/Parse.y
Normal file
179
src/Text/Pandoc/Readers/Haddock/Parse.y
Normal file
|
@ -0,0 +1,179 @@
|
|||
-- This code was copied from the 'haddock' package, modified, and integrated
|
||||
-- into Pandoc by David Lazar.
|
||||
{
|
||||
{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
|
||||
{-# OPTIONS -Wwarn -w #-}
|
||||
-- The above warning supression flag is a temporary kludge.
|
||||
-- While working on this module you are encouraged to remove it and fix
|
||||
-- any warnings in the module. See
|
||||
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
|
||||
-- for details
|
||||
|
||||
module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
|
||||
|
||||
import Text.Pandoc.Readers.Haddock.Lex
|
||||
import Text.Pandoc.Builder
|
||||
import Data.Generics (everywhere, mkT)
|
||||
import Data.Char (isSpace)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.List (stripPrefix)
|
||||
import Data.Monoid (mempty)
|
||||
}
|
||||
|
||||
%expect 0
|
||||
|
||||
%tokentype { LToken }
|
||||
|
||||
%token
|
||||
'/' { (TokSpecial '/',_) }
|
||||
'@' { (TokSpecial '@',_) }
|
||||
'[' { (TokDefStart,_) }
|
||||
']' { (TokDefEnd,_) }
|
||||
DQUO { (TokSpecial '\"',_) }
|
||||
URL { (TokURL $$,_) }
|
||||
PIC { (TokPic $$,_) }
|
||||
ANAME { (TokAName $$,_) }
|
||||
'/../' { (TokEmphasis $$,_) }
|
||||
'-' { (TokBullet,_) }
|
||||
'(n)' { (TokNumber,_) }
|
||||
'>..' { (TokBirdTrack $$,_) }
|
||||
PROP { (TokProperty $$,_) }
|
||||
PROMPT { (TokExamplePrompt $$,_) }
|
||||
RESULT { (TokExampleResult $$,_) }
|
||||
EXP { (TokExampleExpression $$,_) }
|
||||
IDENT { (TokIdent $$,_) }
|
||||
PARA { (TokPara,_) }
|
||||
STRING { (TokString $$,_) }
|
||||
|
||||
%monad { Maybe }
|
||||
|
||||
%name parseParas doc
|
||||
%name parseString seq
|
||||
|
||||
%%
|
||||
|
||||
doc :: { Blocks }
|
||||
: apara PARA doc { $1 <> $3 }
|
||||
| PARA doc { $2 }
|
||||
| apara { $1 }
|
||||
| {- empty -} { mempty }
|
||||
|
||||
apara :: { Blocks }
|
||||
: ulpara { bulletList [$1] }
|
||||
| olpara { orderedList [$1] }
|
||||
| defpara { definitionList [$1] }
|
||||
| para { $1 }
|
||||
|
||||
ulpara :: { Blocks }
|
||||
: '-' para { $2 }
|
||||
|
||||
olpara :: { Blocks }
|
||||
: '(n)' para { $2 }
|
||||
|
||||
defpara :: { (Inlines, [Blocks]) }
|
||||
: '[' seq ']' seq { ($2, [plain $4]) }
|
||||
|
||||
para :: { Blocks }
|
||||
: seq { para $1 }
|
||||
| codepara { codeBlock $1 }
|
||||
| property { $1 }
|
||||
| examples { $1 }
|
||||
|
||||
codepara :: { String }
|
||||
: '>..' codepara { $1 ++ $2 }
|
||||
| '>..' { $1 }
|
||||
|
||||
property :: { Blocks }
|
||||
: PROP { makeProperty $1 }
|
||||
|
||||
examples :: { Blocks }
|
||||
: example examples { $1 <> $2 }
|
||||
| example { $1 }
|
||||
|
||||
example :: { Blocks }
|
||||
: PROMPT EXP result { makeExample $1 $2 (lines $3) }
|
||||
| PROMPT EXP { makeExample $1 $2 [] }
|
||||
|
||||
result :: { String }
|
||||
: RESULT result { $1 ++ $2 }
|
||||
| RESULT { $1 }
|
||||
|
||||
seq :: { Inlines }
|
||||
: elem seq { $1 <> $2 }
|
||||
| elem { $1 }
|
||||
|
||||
elem :: { Inlines }
|
||||
: elem1 { $1 }
|
||||
| '@' seq1 '@' { monospace $2 }
|
||||
|
||||
seq1 :: { Inlines }
|
||||
: PARA seq1 { linebreak <> $2 }
|
||||
| elem1 seq1 { $1 <> $2 }
|
||||
| elem1 { $1 }
|
||||
|
||||
elem1 :: { Inlines }
|
||||
: STRING { str $1 }
|
||||
| '/../' { emph (str $1) }
|
||||
| URL { makeHyperlink $1 }
|
||||
| PIC { image $1 $1 mempty }
|
||||
| ANAME { mempty } -- TODO
|
||||
| IDENT { code $1 }
|
||||
| DQUO strings DQUO { code $2 }
|
||||
|
||||
strings :: { String }
|
||||
: STRING { $1 }
|
||||
| STRING strings { $1 ++ $2 }
|
||||
|
||||
{
|
||||
happyError :: [LToken] -> Maybe a
|
||||
happyError toks = Nothing
|
||||
|
||||
monospace :: Inlines -> Inlines
|
||||
monospace = everywhere (mkT go)
|
||||
where
|
||||
go (Str s) = Code nullAttr s
|
||||
go Space = Code nullAttr " "
|
||||
go x = x
|
||||
|
||||
-- | Create a `Hyperlink` from given string.
|
||||
--
|
||||
-- A hyperlink consists of a URL and an optional label. The label is separated
|
||||
-- from the url by one or more whitespace characters.
|
||||
makeHyperlink :: String -> Inlines
|
||||
makeHyperlink input = case break isSpace $ strip input of
|
||||
(url, "") -> link url url (str url)
|
||||
(url, lb) -> link url url (str label)
|
||||
where label = dropWhile isSpace lb
|
||||
|
||||
makeProperty :: String -> Blocks
|
||||
makeProperty s = case strip s of
|
||||
'p':'r':'o':'p':'>':xs ->
|
||||
codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
|
||||
xs ->
|
||||
error $ "makeProperty: invalid input " ++ show xs
|
||||
|
||||
-- | Create an 'Example', stripping superfluous characters as appropriate
|
||||
makeExample :: String -> String -> [String] -> Blocks
|
||||
makeExample prompt expression result =
|
||||
para $ codeWith ([], ["expr"], []) (strip expression ++ "\n")
|
||||
<> codeWith ([], ["result"], []) (unlines result')
|
||||
where
|
||||
-- 1. drop trailing whitespace from the prompt, remember the prefix
|
||||
(prefix, _) = span isSpace prompt
|
||||
|
||||
-- 2. drop, if possible, the exact same sequence of whitespace
|
||||
-- characters from each result line
|
||||
--
|
||||
-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
|
||||
-- empty line
|
||||
result' = map (substituteBlankLine . tryStripPrefix prefix) result
|
||||
where
|
||||
tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
|
||||
|
||||
substituteBlankLine "<BLANKLINE>" = ""
|
||||
substituteBlankLine line = line
|
||||
|
||||
-- | Remove all leading and trailing whitespace
|
||||
strip :: String -> String
|
||||
strip = dropWhile isSpace . reverse . dropWhile isSpace . reverse
|
||||
}
|
Loading…
Reference in a new issue