LaTeX reader: improvements in raw LaTeX parsing.

+ "loose punctuation" (like {}) parsed as Space
+ Para elements must contain more than Str "" and Space elements
+ Added parser for "\ignore" command used in literate haskell.
+ Reworked unknownCommand and rawLaTeXInline: when not in "parse raw"
  mode, these parsers simply strip off the command part and allow
  the arguments to be parsed normally.  So, for example,
  \blorg{\emph{hi}} will be parsed as Emph "hi" rather than
  Str "{\\emph{hi}}".


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1420 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2008-09-06 18:05:18 +00:00
parent ee2dee238d
commit fc24c79db6

View file

@ -159,6 +159,7 @@ block = choice [ hrule
, specialEnvironment , specialEnvironment
, itemBlock , itemBlock
, unknownEnvironment , unknownEnvironment
, ignore
, unknownCommand ] <?> "block" , unknownCommand ] <?> "block"
-- --
@ -283,7 +284,12 @@ definitionList = try $ do
-- --
para :: GenParser Char ParserState Block para :: GenParser Char ParserState Block
para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces para = do
res <- many1 inline
spaces
return $ if null (filter (`notElem` [Str "", Space]) res)
then Null
else Para $ normalizeSpaces res
-- --
-- title authors date -- title authors date
@ -331,7 +337,7 @@ itemBlock :: GenParser Char ParserState Block
itemBlock = try $ do itemBlock = try $ do
("item", _, args) <- command ("item", _, args) <- command
state <- getState state <- getState
if (stateParserContext state == ListItemState) if stateParserContext state == ListItemState
then fail "item should be handled by list block" then fail "item should be handled by list block"
else if null args else if null args
then return Null then return Null
@ -381,20 +387,33 @@ unknownEnvironment = try $ do
else anyEnvironment -- otherwise just the contents else anyEnvironment -- otherwise just the contents
return result return result
-- \ignore{} is used conventionally in literate haskell for definitions
-- that are to be processed by the compiler but not printed.
ignore :: GenParser Char ParserState Block
ignore = try $ do
("ignore", _, _) <- command
spaces
return Null
unknownCommand :: GenParser Char ParserState Block unknownCommand :: GenParser Char ParserState Block
unknownCommand = try $ do unknownCommand = try $ do
notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
"document"] "document"]
state <- getState
if stateParseRaw state
then do
(name, star, args) <- command (name, star, args) <- command
spaces spaces
let argStr = concat args if name == "item" && stateParserContext state == ListItemState
state <- getState
if name == "item" && (stateParserContext state) == ListItemState
then fail "should not be parsed as raw" then fail "should not be parsed as raw"
else return "" else return ""
if stateParseRaw state return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] else do -- skip unknown command, leaving arguments to be parsed
else return $ Plain [Str (joinWithSep " " args)] char '\\'
letter
many (letter <|> digit)
spaces
return Null
-- latex comment -- latex comment
comment :: GenParser Char st Block comment :: GenParser Char st Block
@ -523,9 +542,9 @@ escapedChar = do
result <- escaped (oneOf " $%&_#{}\n") result <- escaped (oneOf " $%&_#{}\n")
return $ if result == Str "\n" then Str " " else result return $ if result == Str "\n" then Str " " else result
-- ignore standalone, nonescaped special characters -- treat nonescaped special characters as spaces
unescapedChar :: GenParser Char st Inline unescapedChar :: GenParser Char st Inline
unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "") unescapedChar = oneOf "`$^&_#{}|<>" >> return Space
specialChar :: GenParser Char st Inline specialChar :: GenParser Char st Inline
specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
@ -727,12 +746,14 @@ footnote = try $ do
-- | Parse any LaTeX command and return it in a raw TeX inline element. -- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do rawLaTeXInline = try $ do
(name, star, args) <- command notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"]
state <- getState state <- getState
if ((name == "begin") || (name == "end") || (name == "item"))
then fail "not an inline command"
else string ""
if stateParseRaw state if stateParseRaw state
then return $ TeX ("\\" ++ name ++ star ++ concat args) then do
else return $ Str (joinWithSep " " args) (name, star, args) <- command
return $ TeX ("\\" ++ name ++ star ++ concat args)
else do -- skip unknown command, leaving arguments to be parsed
char '\\'
letter
many (letter <|> digit)
return $ Str ""