RST reader: Improved field lists.

Field lists now work properly with block content.
(Thanks to Lachlan Musicman for pointing out the bug.)

In addition, definition list items are now always Para instead
of Plain -- which matches behavior of rst2xml.py.

Finally, in image blocks, the alt attribute is parsed properly
and used for the alt, not also the title.
This commit is contained in:
John MacFarlane 2011-01-26 17:18:02 -08:00
parent 80f5a89a0b
commit 703c421c9e
3 changed files with 80 additions and 82 deletions

View file

@ -34,10 +34,11 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when, unless )
import Control.Monad ( when )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
import Data.Maybe ( catMaybes )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@ -121,10 +122,9 @@ parseBlocks = manyTill block eof
block :: GenParser Char ParserState Block
block = choice [ codeBlock
, rawHtmlBlock
, rawLaTeXBlock
, fieldList
, rawBlock
, blockQuote
, fieldList
, imageBlock
, customCodeBlock
, unknownDirective
@ -142,48 +142,54 @@ block = choice [ codeBlock
-- field list
--
fieldListItem :: String -> GenParser Char st ([Char], [Char])
fieldListItem indent = try $ do
rawFieldListItem :: String -> GenParser Char ParserState (String, String)
rawFieldListItem indent = try $ do
string indent
char ':'
name <- many1 $ alphaNum <|> spaceChar
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- option "" $ try $ lookAhead (string indent >> spaceChar) >>
indentedBlock
return (name, first ++ if null rest
then ""
else ("\n" ++ rest))
rest <- option "" $ try $ do lookAhead (string indent >> spaceChar)
indentedBlock
let raw = first ++ "\n" ++ rest ++ "\n"
return (name, raw)
fieldListItem :: String
-> GenParser Char ParserState (Maybe ([Inline], [[Block]]))
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = [Str name]
contents <- parseFromString (many block) raw
case (name, contents) of
("Author", x) -> do
updateState $ \st ->
st{ stateAuthors = stateAuthors st ++ [extractContents x] }
return Nothing
("Authors", [BulletList auths]) -> do
updateState $ \st -> st{ stateAuthors = map extractContents auths }
return Nothing
("Date", x) -> do
updateState $ \st -> st{ stateDate = extractContents x }
return Nothing
("Title", x) -> do
updateState $ \st -> st{ stateTitle = extractContents x }
return Nothing
_ -> return $ Just (term, [contents])
extractContents :: [Block] -> [Inline]
extractContents [Plain auth] = auth
extractContents [Para auth] = auth
extractContents _ = []
fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
blanklines
let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
unless (null authors) $ do
authors' <- mapM (parseFromString (many inline)) authors
updateState $ \st -> st {stateAuthors = map normalizeSpaces authors'}
case (lookup "Date" items) of
Just dat -> do
dat' <- parseFromString (many inline) dat
updateState $ \st -> st{ stateDate = normalizeSpaces dat' }
Nothing -> return ()
case (lookup "Title" items) of
Just tit -> parseFromString (many inline) tit >>=
\t -> updateState $ \st -> st {stateTitle = t}
Nothing -> return ()
let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
(x /= "Date") && (x /= "Title")) items
if null remaining
then return Null
else do terms <- mapM (return . (:[]) . Str . fst) remaining
defs <- mapM (parseFromString (many block) . snd)
remaining
return $ DefinitionList $ zip terms $ map (:[]) defs
if null items
then return Null
else return $ DefinitionList $ catMaybes items
--
-- line block
@ -237,15 +243,16 @@ plain = many1 inline >>= return . Plain . normalizeSpaces
-- image block
--
imageBlock :: GenParser Char st Block
imageBlock :: GenParser Char ParserState Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
many1 $ fieldListItem indent
fields <- try $ do indent <- lookAhead $ many (oneOf " /t")
many $ rawFieldListItem indent
optional blanklines
case lookup "alt" fields of
Just alt -> return $ Plain [Image [Str alt] (src, alt)]
Just alt -> return $ Plain [Image [Str $ removeTrailingSpace alt]
(src, "")]
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
--
-- header blocks
@ -320,20 +327,19 @@ hrule = try $ do
indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
return $ result ++ "\n"
manyTill anyChar newline
-- two or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: GenParser Char st [Char]
indentedBlock = do
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many $ choice $ [ indentedLine indents,
try $ do b <- blanklines
l <- indentedLine indents
return (b ++ l) ]
optional blanklines
return $ concat lns
optional blanklines
return $ unlines lns
codeBlock :: GenParser Char st Block
codeBlock = try $ do
@ -371,25 +377,16 @@ birdTrackLine = do
manyTill anyChar newline
--
-- raw html
-- raw html/latex/etc
--
rawHtmlBlock :: GenParser Char st Block
rawHtmlBlock = try $ do
string ".. raw:: html"
blanklines
indentedBlock >>= return . RawBlock "html"
--
-- raw latex
--
rawLaTeXBlock :: GenParser Char st Block
rawLaTeXBlock = try $ do
string ".. raw:: latex"
rawBlock :: GenParser Char st Block
rawBlock = try $ do
string ".. raw:: "
lang <- many1 (letter <|> digit)
blanklines
result <- indentedBlock
return $ RawBlock "latex" result
return $ RawBlock lang result
--
-- block quotes
@ -416,7 +413,7 @@ definitionListItem = try $ do
term <- many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
contents <- parseFromString parseBlocks $ raw ++ "\n"
return (normalizeSpaces term, [contents])
definitionList :: GenParser Char ParserState Block

View file

@ -27,20 +27,20 @@ tests = [ "field list" =:
:Authors: - Me
- Myself
- I
:Indentation: Since the field marker may be quite long, the second
and subsequent lines of the field body do not have to line up
with the first line, but they must be indented relative to the
field name marker, and they must line up with each other.
:Parameter i: integer
:Indentation: Since the field marker may be quite long, the second
and subsequent lines of the field body do not have to line up
with the first line, but they must be indented relative to the
field name marker, and they must line up with each other.
:Parameter i: integer
|] =?> ( setAuthors ["Me","Myself","I"]
$ setDate "2001-08-16"
$ doc
$ definitionList [ (str "Hostname", [plain "media08"])
, (str "IP address", [plain "10.0.0.19"])
, (str "Size", [plain "3ru"])
, (str "Version", [plain "1"])
, (str "Indentation", [para "Since the field marker may be quite long, the second. and subsequent lines of the field body do not have to line up. with the first line, but they must be indented relative to the. field name marker, and they must line up with each other."])
, (str "Parameter i", [plain "integer"])
$ definitionList [ (str "Hostname", [para "media08"])
, (str "IP address", [para "10.0.0.19"])
, (str "Size", [para "3ru"])
, (str "Version", [para "1"])
, (str "Indentation", [para "Since the field marker may be quite long, the second and subsequent lines of the field body do not have to line up with the first line, but they must be indented relative to the field name marker, and they must line up with each other."])
, (str "Parameter i", [para "integer"])
])
]

View file

@ -1,7 +1,7 @@
Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":",Space,Str "Subtitle"], docAuthors = [[Str "John",Space,Str "MacFarlane"],[Str "Anonymous"]], docDate = [Str "July",Space,Str "17,",Space,Str "2006"]})
[DefinitionList
[([Str "Revision"],
[[Plain [Str "3"]]])]
[[Para [Str "3"]]])]
,Header 1 [Str "Level",Space,Str "one",Space,Str "header"]
,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Apostrophe,Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."]
,Header 2 [Str "Level",Space,Str "two",Space,Str "header"]
@ -158,20 +158,21 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
,([Str "term",Space,Str "with",Space,Emph [Str "emphasis"]],
[[Para [Str "Definition",Space,Str "3",Str "."]]])]
,Header 1 [Str "Field",Space,Str "Lists"]
,BlockQuote
[DefinitionList
[([Str "address"],
[[Para [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
,([Str "city"],
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
,([Str "phone"],
[[Para [Str "123",EnDash,Str "4567"]]])]]
,DefinitionList
[([Str "address"],
[[Plain [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
[[Para [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
,([Str "city"],
[[Plain [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
,([Str "phone"],
[[Plain [Str "123",EnDash,Str "4567"]]])]
,DefinitionList
[([Str "address"],
[[Plain [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
,([Str "city"],
[[Plain [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
,([Str "phone"],
[[Plain [Str "123",EnDash,Str "4567"]]])]
[[Para [Str "123",EnDash,Str "4567"]]])]
,Header 1 [Str "HTML",Space,Str "Blocks"]
,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line",Str ":"]
,RawBlock "html" "<div>foo</div>\n"
@ -223,7 +224,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
,Header 1 [Str "Images"]
,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902)",Str ":"]
,Plain [Image [Str "image"] ("lalune.jpg","")]
,Plain [Image [Str "Voyage dans la Lune"] ("lalune.jpg","Voyage dans la Lune")]
,Plain [Image [Str "Voyage dans la Lune"] ("lalune.jpg","")]
,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon",Str "."]
,Header 1 [Str "Comments"]
,Para [Str "First",Space,Str "paragraph"]