LaTeX reader: improvements in raw LaTeX parsing.
+ "loose punctuation" (like {}) parsed as Space + Para elements must contain more than Str "" and Space elements + Added parser for "\ignore" command used in literate haskell. + Reworked unknownCommand and rawLaTeXInline: when not in "parse raw" mode, these parsers simply strip off the command part and allow the arguments to be parsed normally. So, for example, \blorg{\emph{hi}} will be parsed as Emph "hi" rather than Str "{\\emph{hi}}". git-svn-id: https://pandoc.googlecode.com/svn/trunk@1420 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
ee2dee238d
commit
fc24c79db6
1 changed files with 41 additions and 20 deletions
|
@ -159,6 +159,7 @@ block = choice [ hrule
|
||||||
, specialEnvironment
|
, specialEnvironment
|
||||||
, itemBlock
|
, itemBlock
|
||||||
, unknownEnvironment
|
, unknownEnvironment
|
||||||
|
, ignore
|
||||||
, unknownCommand ] <?> "block"
|
, unknownCommand ] <?> "block"
|
||||||
|
|
||||||
--
|
--
|
||||||
|
@ -283,7 +284,12 @@ definitionList = try $ do
|
||||||
--
|
--
|
||||||
|
|
||||||
para :: GenParser Char ParserState Block
|
para :: GenParser Char ParserState Block
|
||||||
para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
|
para = do
|
||||||
|
res <- many1 inline
|
||||||
|
spaces
|
||||||
|
return $ if null (filter (`notElem` [Str "", Space]) res)
|
||||||
|
then Null
|
||||||
|
else Para $ normalizeSpaces res
|
||||||
|
|
||||||
--
|
--
|
||||||
-- title authors date
|
-- title authors date
|
||||||
|
@ -331,7 +337,7 @@ itemBlock :: GenParser Char ParserState Block
|
||||||
itemBlock = try $ do
|
itemBlock = try $ do
|
||||||
("item", _, args) <- command
|
("item", _, args) <- command
|
||||||
state <- getState
|
state <- getState
|
||||||
if (stateParserContext state == ListItemState)
|
if stateParserContext state == ListItemState
|
||||||
then fail "item should be handled by list block"
|
then fail "item should be handled by list block"
|
||||||
else if null args
|
else if null args
|
||||||
then return Null
|
then return Null
|
||||||
|
@ -381,20 +387,33 @@ unknownEnvironment = try $ do
|
||||||
else anyEnvironment -- otherwise just the contents
|
else anyEnvironment -- otherwise just the contents
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
-- \ignore{} is used conventionally in literate haskell for definitions
|
||||||
|
-- that are to be processed by the compiler but not printed.
|
||||||
|
ignore :: GenParser Char ParserState Block
|
||||||
|
ignore = try $ do
|
||||||
|
("ignore", _, _) <- command
|
||||||
|
spaces
|
||||||
|
return Null
|
||||||
|
|
||||||
unknownCommand :: GenParser Char ParserState Block
|
unknownCommand :: GenParser Char ParserState Block
|
||||||
unknownCommand = try $ do
|
unknownCommand = try $ do
|
||||||
notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
|
notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
|
||||||
"document"]
|
"document"]
|
||||||
|
state <- getState
|
||||||
|
if stateParseRaw state
|
||||||
|
then do
|
||||||
(name, star, args) <- command
|
(name, star, args) <- command
|
||||||
spaces
|
spaces
|
||||||
let argStr = concat args
|
if name == "item" && stateParserContext state == ListItemState
|
||||||
state <- getState
|
|
||||||
if name == "item" && (stateParserContext state) == ListItemState
|
|
||||||
then fail "should not be parsed as raw"
|
then fail "should not be parsed as raw"
|
||||||
else return ""
|
else return ""
|
||||||
if stateParseRaw state
|
return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
|
||||||
then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
|
else do -- skip unknown command, leaving arguments to be parsed
|
||||||
else return $ Plain [Str (joinWithSep " " args)]
|
char '\\'
|
||||||
|
letter
|
||||||
|
many (letter <|> digit)
|
||||||
|
spaces
|
||||||
|
return Null
|
||||||
|
|
||||||
-- latex comment
|
-- latex comment
|
||||||
comment :: GenParser Char st Block
|
comment :: GenParser Char st Block
|
||||||
|
@ -523,9 +542,9 @@ escapedChar = do
|
||||||
result <- escaped (oneOf " $%&_#{}\n")
|
result <- escaped (oneOf " $%&_#{}\n")
|
||||||
return $ if result == Str "\n" then Str " " else result
|
return $ if result == Str "\n" then Str " " else result
|
||||||
|
|
||||||
-- ignore standalone, nonescaped special characters
|
-- treat nonescaped special characters as spaces
|
||||||
unescapedChar :: GenParser Char st Inline
|
unescapedChar :: GenParser Char st Inline
|
||||||
unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
|
unescapedChar = oneOf "`$^&_#{}|<>" >> return Space
|
||||||
|
|
||||||
specialChar :: GenParser Char st Inline
|
specialChar :: GenParser Char st Inline
|
||||||
specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
|
specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
|
||||||
|
@ -727,12 +746,14 @@ footnote = try $ do
|
||||||
-- | Parse any LaTeX command and return it in a raw TeX inline element.
|
-- | Parse any LaTeX command and return it in a raw TeX inline element.
|
||||||
rawLaTeXInline :: GenParser Char ParserState Inline
|
rawLaTeXInline :: GenParser Char ParserState Inline
|
||||||
rawLaTeXInline = try $ do
|
rawLaTeXInline = try $ do
|
||||||
(name, star, args) <- command
|
notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"]
|
||||||
state <- getState
|
state <- getState
|
||||||
if ((name == "begin") || (name == "end") || (name == "item"))
|
|
||||||
then fail "not an inline command"
|
|
||||||
else string ""
|
|
||||||
if stateParseRaw state
|
if stateParseRaw state
|
||||||
then return $ TeX ("\\" ++ name ++ star ++ concat args)
|
then do
|
||||||
else return $ Str (joinWithSep " " args)
|
(name, star, args) <- command
|
||||||
|
return $ TeX ("\\" ++ name ++ star ++ concat args)
|
||||||
|
else do -- skip unknown command, leaving arguments to be parsed
|
||||||
|
char '\\'
|
||||||
|
letter
|
||||||
|
many (letter <|> digit)
|
||||||
|
return $ Str ""
|
||||||
|
|
Loading…
Reference in a new issue