pandoc/src/Text/Pandoc/Readers/LaTeX.hs

1050 lines
33 KiB
Haskell
Raw Normal View History

{-
2010-03-23 13:31:09 -07:00
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.LaTeX
2010-03-23 13:31:09 -07:00
Copyright : Copyright (C) 2006-2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
-}
module Text.Pandoc.Readers.LaTeX (
readLaTeX,
rawLaTeXInline,
rawLaTeXEnvironment'
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
import Data.Char ( chr, toUpper )
import Data.List ( intercalate, isPrefixOf, isSuffixOf )
import Control.Monad
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readLaTeX = readWith parseLaTeX
-- characters with special meaning
specialChars :: [Char]
specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
--
-- utility functions
--
-- | Returns text between brackets and its matching pair.
bracketedText :: Char -> Char -> GenParser Char st [Char]
bracketedText openB closeB = do
2011-12-05 20:55:23 -08:00
result <- charsInBalanced openB closeB anyChar
return $ [openB] ++ result ++ [closeB]
-- | Returns an option or argument of a LaTeX command.
optOrArg :: GenParser Char st [Char]
optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
-- | True if the string begins with '{'.
isArg :: [Char] -> Bool
isArg ('{':_) = True
isArg _ = False
-- | Returns list of options and arguments of a LaTeX command.
commandArgs :: GenParser Char st [[Char]]
commandArgs = many optOrArg
-- | Parses LaTeX command, returns (name, star, list of options or arguments).
command :: GenParser Char st ([Char], [Char], [[Char]])
command = do
char '\\'
name <- many1 letter
star <- option "" (string "*") -- some commands have starred versions
args <- commandArgs
return (name, star, args)
begin :: [Char] -> GenParser Char st [Char]
begin name = try $ do
string "\\begin"
spaces
char '{'
string name
char '}'
optional commandArgs
spaces
return name
end :: [Char] -> GenParser Char st [Char]
end name = try $ do
string "\\end"
spaces
char '{'
string name
char '}'
return name
-- | Returns a list of block elements containing the contents of an
-- environment.
environment :: [Char] -> GenParser Char ParserState [Block]
environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
anyEnvironment :: GenParser Char ParserState Block
anyEnvironment = try $ do
string "\\begin"
spaces
char '{'
name <- many letter
star <- option "" (string "*") -- some environments have starred variants
char '}'
optional commandArgs
spaces
contents <- manyTill block (end (name ++ star))
spaces
return $ BlockQuote contents
--
-- parsing documents
--
-- | Process LaTeX preamble, extracting metadata.
processLaTeXPreamble :: GenParser Char ParserState ()
processLaTeXPreamble = do
try $ string "\\documentclass"
skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX :: GenParser Char ParserState Pandoc
parseLaTeX = do
spaces
skipMany $ comment >> spaces
blocks <- try (processLaTeXPreamble >> environment "document")
<|> (many block >>~ (spaces >> eof))
state <- getState
let blocks' = filter (/= Null) blocks
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
return $ Pandoc (Meta title' authors' date') blocks'
--
-- parsing blocks
--
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = spaces >> many block
block :: GenParser Char ParserState Block
block = choice [ hrule
, codeBlock
, header
, list
, blockQuote
2011-01-07 10:15:48 -08:00
, simpleTable
, commentBlock
, macro
, bibliographic
, para
, itemBlock
, unknownEnvironment
, ignore
, unknownCommand
] <?> "block"
--
-- header blocks
--
header :: GenParser Char ParserState Block
header = section <|> chapter
chapter :: GenParser Char ParserState Block
chapter = try $ do
string "\\chapter"
result <- headerWithLevel 1
updateState $ \s -> s{ stateHasChapters = True }
return result
section :: GenParser Char ParserState Block
section = try $ do
char '\\'
subs <- many (try (string "sub"))
base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
st <- getState
let lev = if stateHasChapters st
then length subs + base + 1
else length subs + base
headerWithLevel lev
headerWithLevel :: Int -> GenParser Char ParserState Block
headerWithLevel lev = try $ do
spaces
optional (char '*')
spaces
optional $ bracketedText '[' ']' -- alt title
spaces
char '{'
title' <- manyTill inline (char '}')
spaces
return $ Header lev (normalizeSpaces title')
--
-- hrule block
--
hrule :: GenParser Char st Block
hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
"\\newpage" ] >> spaces >> return HorizontalRule
2011-01-07 10:15:48 -08:00
-- tables
simpleTable :: GenParser Char ParserState Block
simpleTable = try $ do
string "\\begin"
spaces
string "{tabular}"
spaces
aligns <- parseAligns
let cols = length aligns
optional hline
header' <- option [] $ parseTableHeader cols
rows <- many (parseTableRow cols >>~ optional hline)
spaces
end "tabular"
spaces
let header'' = if null header'
then replicate cols []
else header'
return $ Table [] aligns (replicate cols 0) header'' rows
hline :: GenParser Char st ()
hline = try $ spaces >> string "\\hline" >> return ()
parseAligns :: GenParser Char ParserState [Alignment]
parseAligns = try $ do
char '{'
optional $ char '|'
let cAlign = char 'c' >> return AlignCenter
let lAlign = char 'l' >> return AlignLeft
let rAlign = char 'r' >> return AlignRight
let alignChar = cAlign <|> lAlign <|> rAlign
aligns' <- sepEndBy alignChar (optional $ char '|')
char '}'
spaces
return aligns'
parseTableHeader :: Int -- ^ number of columns
-> GenParser Char ParserState [TableCell]
parseTableHeader cols = try $ do
cells' <- parseTableRow cols
hline
return cells'
parseTableRow :: Int -- ^ number of columns
-> GenParser Char ParserState [TableCell]
parseTableRow cols = try $ do
let tableCellInline = notFollowedBy (char '&' <|>
(try $ char '\\' >> char '\\')) >> inline
cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
(many tableCellInline)) (char '&')
guard $ length cells' == cols
spaces
(try $ string "\\\\" >> spaces) <|>
(lookAhead (end "tabular") >> return ())
return cells'
--
-- code blocks
--
codeBlock :: GenParser Char ParserState Block
codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
-- Note: Verbatim is from fancyvrb.
codeBlockWith :: String -> GenParser Char st Block
codeBlockWith env = try $ do
string "\\begin"
spaces -- don't use begin function because it
string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble
optional blanklines -- blank lines, but not leading space
contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
spaces
let classes = if env == "code" then ["haskell"] else []
return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
lhsCodeBlock :: GenParser Char ParserState Block
lhsCodeBlock = do
failUnlessLHS
(CodeBlock (_,_,_) cont) <- codeBlockWith "code"
return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont
--
-- block quotes
--
blockQuote :: GenParser Char ParserState Block
blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
return . BlockQuote
--
-- list blocks
--
list :: GenParser Char ParserState Block
list = bulletList <|> orderedList <|> definitionList <?> "list"
listItem :: GenParser Char ParserState ([Inline], [Block])
listItem = try $ do
("item", _, args) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
updateState (\s -> s {stateParserContext = ListItemState})
blocks <- many block
updateState (\s -> s {stateParserContext = oldParserContext})
opt <- case args of
([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
parseFromString (many inline) $ tail $ init x
_ -> return []
return (opt, blocks)
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
string "\\begin"
spaces
string "{enumerate}"
spaces
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ do failIfStrict
char '['
res <- anyOrderedListMarker
char ']'
return res
spaces
option "" $ try $ do string "\\setlength{\\itemindent}"
char '{'
manyTill anyChar (char '}')
spaces
start <- option 1 $ try $ do failIfStrict
string "\\setcounter{enum"
many1 (oneOf "iv")
string "}{"
num <- many1 digit
char '}'
spaces
return $ (read num) + 1
items <- many listItem
end "enumerate"
spaces
return $ OrderedList (start, style, delim) $ map snd items
bulletList :: GenParser Char ParserState Block
bulletList = try $ do
begin "itemize"
items <- many listItem
end "itemize"
spaces
return (BulletList $ map snd items)
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
begin "description"
items <- many listItem
end "description"
spaces
return $ DefinitionList $ map (\(t,d) -> (t,[d])) items
--
-- paragraph block
--
para :: GenParser Char ParserState Block
para = do
res <- many1 inline
spaces
return $ if null (filter (`notElem` [Str "", Space]) res)
then Null
else Para $ normalizeSpaces res
--
-- title authors date
--
bibliographic :: GenParser Char ParserState Block
bibliographic = choice [ maketitle, title, subtitle, authors, date ]
maketitle :: GenParser Char st Block
maketitle = try (string "\\maketitle") >> spaces >> return Null
title :: GenParser Char ParserState Block
title = try $ do
string "\\title{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = tit })
return Null
subtitle :: GenParser Char ParserState Block
subtitle = try $ do
string "\\subtitle{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = stateTitle state ++
Str ":" : LineBreak : tit })
return Null
authors :: GenParser Char ParserState Block
authors = try $ do
string "\\author{"
let andsep = try $ string "\\and" >> notFollowedBy letter >>
spaces >> return '&'
raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
let authors' = map normalizeSpaces raw
char '}'
spaces
updateState (\s -> s { stateAuthors = authors' })
return Null
date :: GenParser Char ParserState Block
date = try $ do
string "\\date{"
date' <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateDate = normalizeSpaces date' })
return Null
--
-- item block
-- for use in unknown environments that aren't being parsed as raw latex
--
-- this forces items to be parsed in different blocks
itemBlock :: GenParser Char ParserState Block
itemBlock = try $ do
("item", _, args) <- command
state <- getState
if stateParserContext state == ListItemState
then fail "item should be handled by list block"
else if null args
then return Null
else return $ Plain [Str (stripFirstAndLast (head args))]
--
-- raw LaTeX
--
-- | Parse any LaTeX environment and return a Para block containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment :: GenParser Char st Block
rawLaTeXEnvironment = do
contents <- rawLaTeXEnvironment'
spaces
return $ RawBlock "latex" contents
-- | Parse any LaTeX environment and return a string containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment' :: GenParser Char st String
rawLaTeXEnvironment' = try $ do
string "\\begin"
spaces
char '{'
name <- many1 letter
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
args <- option [] commandArgs
let argStr = concat args
contents <- manyTill (choice [ (many1 (noneOf "\\")),
rawLaTeXEnvironment',
string "\\" ])
(end name')
return $ "\\begin{" ++ name' ++ "}" ++ argStr ++
concat contents ++ "\\end{" ++ name' ++ "}"
unknownEnvironment :: GenParser Char ParserState Block
unknownEnvironment = try $ do
state <- getState
result <- if stateParseRaw state -- check whether we should include raw TeX
then rawLaTeXEnvironment -- if so, get whole raw environment
else anyEnvironment -- otherwise just the contents
return result
-- \ignore{} is used conventionally in literate haskell for definitions
-- that are to be processed by the compiler but not printed.
ignore :: GenParser Char ParserState Block
ignore = try $ do
("ignore", _, _) <- command
spaces
return Null
demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
demacro (n,st,args) = try $ do
let raw = "\\" ++ n ++ st ++ concat args
s' <- applyMacros' raw
if raw == s'
then return $ RawInline "latex" raw
else do
inp <- getInput
setInput $ s' ++ inp
return $ Str ""
unknownCommand :: GenParser Char ParserState Block
unknownCommand = try $ do
spaces
notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] >>
notFollowedBy letter
state <- getState
when (stateParserContext state == ListItemState) $
notFollowedBy' (string "\\item")
if stateParseRaw state
then command >>= demacro >>= return . Plain . (:[])
else do
(name, _, args) <- command
spaces
unless (name `elem` commandsToIgnore) $ do
-- put arguments back in input to be parsed
inp <- getInput
setInput $ intercalate " " args ++ inp
return Null
commandsToIgnore :: [String]
commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
skipChar :: GenParser Char ParserState Block
skipChar = do
satisfy (/='\\') <|>
(notFollowedBy' (try $
string "\\begin" >> spaces >> string "{document}") >>
anyChar)
spaces
return Null
commentBlock :: GenParser Char st Block
commentBlock = many1 (comment >> spaces) >> return Null
--
-- inline
--
inline :: GenParser Char ParserState Inline
inline = choice [ str
, endline
, whitespace
, quoted
, apostrophe
, strong
, math
, ellipses
, emDash
, enDash
, hyphen
, emph
, strikeout
, superscript
, subscript
, code
, url
, link
, image
, footnote
, linebreak
, accentedChar
, nonbreakingSpace
, cite
, specialChar
, ensureMath
, rawLaTeXInline'
, escapedChar
, emptyGroup
, unescapedChar
, comment
] <?> "inline"
-- latex comment
comment :: GenParser Char st Inline
comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
accentedChar :: GenParser Char st Inline
accentedChar = normalAccentedChar <|> specialAccentedChar
normalAccentedChar :: GenParser Char st Inline
normalAccentedChar = try $ do
char '\\'
accent <- oneOf "'`^\"~"
character <- (try $ char '{' >> letter >>~ char '}') <|> letter
let table = fromMaybe [] $ lookup character accentTable
let result = case lookup accent table of
Just num -> chr num
Nothing -> '?'
return $ Str [result]
-- an association list of letters and association list of accents
-- and decimal character numbers.
accentTable :: [(Char, [(Char, Int)])]
accentTable =
[ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
('N', [('~', 209)]),
('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
('n', [('~', 241)]),
('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar :: GenParser Char st Inline
2011-01-05 14:57:06 -08:00
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
oslash, pound, euro, copyright, sect ]
ccedil :: GenParser Char st Inline
ccedil = try $ do
char '\\'
letter' <- oneOfStrings ["cc", "cC"]
notFollowedBy letter
let num = if letter' == "cc" then 231 else 199
return $ Str [chr num]
aring :: GenParser Char st Inline
aring = try $ do
char '\\'
letter' <- oneOfStrings ["aa", "AA"]
notFollowedBy letter
let num = if letter' == "aa" then 229 else 197
return $ Str [chr num]
iuml :: GenParser Char st Inline
iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
return (Str [chr 239])
szlig :: GenParser Char st Inline
szlig = try (string "\\ss") >> notFollowedBy letter >> return (Str [chr 223])
oslash :: GenParser Char st Inline
oslash = try $ do
char '\\'
letter' <- choice [char 'o', char 'O']
notFollowedBy letter
let num = if letter' == 'o' then 248 else 216
return $ Str [chr num]
2011-01-05 14:57:06 -08:00
lslash :: GenParser Char st Inline
lslash = try $ do
cmd <- oneOfStrings ["{\\L}","{\\l}"]
<|> (oneOfStrings ["\\L ","\\l "] >>~ notFollowedBy letter)
2011-01-05 14:57:06 -08:00
return $ if 'l' `elem` cmd
then Str "\x142"
else Str "\x141"
aelig :: GenParser Char st Inline
aelig = try $ do
char '\\'
letter' <- oneOfStrings ["ae", "AE"]
notFollowedBy letter
let num = if letter' == "ae" then 230 else 198
return $ Str [chr num]
pound :: GenParser Char st Inline
pound = try (string "\\pounds" >> notFollowedBy letter) >> return (Str [chr 163])
euro :: GenParser Char st Inline
euro = try (string "\\euro" >> notFollowedBy letter) >> return (Str [chr 8364])
copyright :: GenParser Char st Inline
copyright = try (string "\\copyright" >> notFollowedBy letter) >> return (Str [chr 169])
sect :: GenParser Char st Inline
sect = try (string "\\S" >> notFollowedBy letter) >> return (Str [chr 167])
escapedChar :: GenParser Char st Inline
escapedChar = do
result <- escaped (oneOf specialChars)
return $ if result == '\n' then Str " " else Str [result]
emptyGroup :: GenParser Char st Inline
emptyGroup = try $ do
char '{'
spaces
char '}'
return $ Str ""
-- nonescaped special characters
unescapedChar :: GenParser Char st Inline
unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
specialChar :: GenParser Char st Inline
2012-01-26 11:52:25 -08:00
specialChar = choice [ spacer, interwordSpace, sentenceEnd,
backslash, tilde, caret,
bar, lt, gt, doubleQuote ]
spacer :: GenParser Char st Inline
spacer = try (string "\\,") >> return (Str "")
2012-01-26 11:52:25 -08:00
sentenceEnd :: GenParser Char st Inline
sentenceEnd = try (string "\\@") >> return (Str "")
interwordSpace :: GenParser Char st Inline
interwordSpace = try (string "\\ ") >> return (Str "\160")
backslash :: GenParser Char st Inline
backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
tilde :: GenParser Char st Inline
tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
caret :: GenParser Char st Inline
caret = try (string "\\^{}") >> return (Str "^")
bar :: GenParser Char st Inline
bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
lt :: GenParser Char st Inline
lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
gt :: GenParser Char st Inline
gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
doubleQuote :: GenParser Char st Inline
doubleQuote = char '"' >> return (Str "\"")
code :: GenParser Char ParserState Inline
code = code1 <|> code2 <|> code3 <|> lhsInlineCode
code1 :: GenParser Char st Inline
code1 = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
return $ Code nullAttr $ removeLeadingTrailingSpace result
code2 :: GenParser Char st Inline
code2 = try $ do
string "\\texttt{"
result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
return $ Code nullAttr result
code3 :: GenParser Char st Inline
code3 = try $ do
string "\\lstinline"
marker <- anyChar
result <- manyTill anyChar (char marker)
return $ Code nullAttr $ removeLeadingTrailingSpace result
lhsInlineCode :: GenParser Char ParserState Inline
lhsInlineCode = try $ do
failUnlessLHS
char '|'
result <- manyTill (noneOf "|\n") (char '|')
return $ Code ("",["haskell"],[]) result
emph :: GenParser Char ParserState Inline
emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
manyTill inline (char '}') >>= return . Emph
strikeout :: GenParser Char ParserState Inline
strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
return . Strikeout
superscript :: GenParser Char ParserState Inline
superscript = try $ string "\\textsuperscript{" >>
manyTill inline (char '}') >>= return . Superscript
-- note: \textsubscript isn't a standard latex command, but we use
-- a defined version in pandoc.
subscript :: GenParser Char ParserState Inline
subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
return . Subscript
apostrophe :: GenParser Char ParserState Inline
apostrophe = char '\'' >> return (Str "\x2019")
quoted :: GenParser Char ParserState Inline
quoted = doubleQuoted <|> singleQuoted
singleQuoted :: GenParser Char ParserState Inline
singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
return . Quoted SingleQuote . normalizeSpaces
doubleQuoted :: GenParser Char ParserState Inline
doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
return . Quoted DoubleQuote . normalizeSpaces
singleQuoteStart :: GenParser Char st Char
singleQuoteStart = char '`'
singleQuoteEnd :: GenParser Char st ()
singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
doubleQuoteStart :: CharParser st String
doubleQuoteStart = string "``"
doubleQuoteEnd :: CharParser st String
doubleQuoteEnd = try $ string "''"
ellipses :: GenParser Char st Inline
ellipses = try $ do
char '\\'
optional $ char 'l'
string "dots"
optional $ try $ string "{}"
return (Str "")
enDash :: GenParser Char st Inline
enDash = try (string "--") >> return (Str "-")
emDash :: GenParser Char st Inline
emDash = try (string "---") >> return (Str "")
hyphen :: GenParser Char st Inline
hyphen = char '-' >> return (Str "-")
strong :: GenParser Char ParserState Inline
strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
return . Strong
whitespace :: GenParser Char st Inline
whitespace = many1 (oneOf " \t") >> return Space
nonbreakingSpace :: GenParser Char st Inline
nonbreakingSpace = char '~' >> return (Str "\160")
-- hard line break
linebreak :: GenParser Char st Inline
linebreak = try $ do
string "\\\\"
optional $ bracketedText '[' ']' -- e.g. \\[10pt]
spaces
return LineBreak
str :: GenParser Char st Inline
str = many1 (noneOf specialChars) >>= return . Str
-- endline internal to paragraph
endline :: GenParser Char st Inline
endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
math :: GenParser Char ParserState Inline
math = (math3 >>= applyMacros' >>= return . Math DisplayMath)
<|> (math1 >>= applyMacros' >>= return . Math InlineMath)
<|> (math2 >>= applyMacros' >>= return . Math InlineMath)
<|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
<|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
<|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
<?> "math"
math1 :: GenParser Char st String
math1 = try $ char '$' >> manyTill anyChar (char '$')
math2 :: GenParser Char st String
math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
math3 :: GenParser Char st String
math3 = try $ char '$' >> math1 >>~ char '$'
math4 :: GenParser Char st String
math4 = try $ do
name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
begin "multline" <|> begin "multline*"
manyTill anyChar (end name)
math5 :: GenParser Char st String
math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
math6 :: GenParser Char st String
math6 = try $ do
name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
begin "split" <|> begin "aligned" <|> begin "alignedat"
res <- manyTill anyChar (end name)
return $ filter (/= '&') res -- remove alignment codes
ensureMath :: GenParser Char st Inline
ensureMath = try $ do
(n, _, args) <- command
guard $ n == "ensuremath" && not (null args)
return $ Math InlineMath $ tail $ init $ head args
--
-- links and images
--
url :: GenParser Char ParserState Inline
url = try $ do
string "\\url"
2011-12-05 20:55:23 -08:00
url' <- charsInBalanced '{' '}' anyChar
return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
link :: GenParser Char ParserState Inline
link = try $ do
string "\\href{"
url' <- manyTill anyChar (char '}')
char '{'
label' <- manyTill inline (char '}')
2010-03-23 15:07:48 -07:00
return $ Link (normalizeSpaces label') (escapeURI url', "")
image :: GenParser Char ParserState Inline
image = try $ do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
2010-03-23 15:07:48 -07:00
let (src,tit) = case args' of
[] -> ("", "")
(x:_) -> (stripFirstAndLast x, "")
return $ Image [Str "image"] (escapeURI src, tit)
footnote :: GenParser Char ParserState Inline
footnote = try $ do
(name, _, (contents:[])) <- command
if ((name == "footnote") || (name == "thanks"))
then string ""
else fail "not a footnote or thanks command"
let contents' = stripFirstAndLast contents
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ contents'
blocks <- parseBlocks
setInput rest
return $ Note blocks
-- | citations
cite :: GenParser Char ParserState Inline
cite = simpleCite <|> complexNatbibCites
simpleCiteArgs :: GenParser Char ParserState [Citation]
simpleCiteArgs = try $ do
first <- optionMaybe $ (char '[') >> manyTill inline (char ']')
second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
char '{'
keys <- many1Till citationLabel (char '}')
let (pre, suf) = case (first , second ) of
(Just s , Nothing) -> ([], s )
(Just s , Just t ) -> (s , t )
_ -> ([], [])
conv k = Citation { citationId = k
, citationPrefix = []
, citationSuffix = []
, citationMode = NormalCitation
, citationHash = 0
, citationNoteNum = 0
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
simpleCite :: GenParser Char ParserState Inline
simpleCite = try $ do
char '\\'
let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
++ ["footcitetext"]
normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
++ biblatex
supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
mintext = ["textcites"]
mnormal = map (++ "s") biblatex
cmdend = notFollowedBy (letter <|> char '*')
capit [] = []
capit (x:xs) = toUpper x : xs
addUpper xs = xs ++ map capit xs
toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
(mode, multi) <- toparser normal (NormalCitation, False)
<|> toparser supress (SuppressAuthor, False)
<|> toparser intext (AuthorInText , False)
<|> toparser mnormal (NormalCitation, True )
<|> toparser mintext (AuthorInText , True )
cits <- if multi then
many1 simpleCiteArgs
else
simpleCiteArgs >>= \c -> return [c]
let (c:cs) = concat cits
cits' = case mode of
AuthorInText -> c {citationMode = mode} : cs
_ -> map (\a -> a {citationMode = mode}) (c:cs)
return $ Cite cits' []
complexNatbibCites :: GenParser Char ParserState Inline
complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
complexNatbibTextual :: GenParser Char ParserState Inline
complexNatbibTextual = try $ do
string "\\citeauthor{"
manyTill (noneOf "}") (char '}')
skipSpaces
Cite (c:cs) _ <- complexNatbibParenthetical
return $ Cite (c {citationMode = AuthorInText} : cs) []
complexNatbibParenthetical :: GenParser Char ParserState Inline
complexNatbibParenthetical = try $ do
string "\\citetext{"
cits <- many1Till parseOne (char '}')
return $ Cite (concat cits) []
where
parseOne = do
skipSpaces
pref <- many (notFollowedBy (oneOf "\\}") >> inline)
(Cite cites _) <- simpleCite
suff <- many (notFollowedBy (oneOf "\\};") >> inline)
skipSpaces
optional $ char ';'
return $ addPrefix pref $ addSuffix suff $ cites
addPrefix :: [Inline] -> [Citation] -> [Citation]
addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
addPrefix _ _ = []
addSuffix :: [Inline] -> [Citation] -> [Citation]
addSuffix s ks@(_:_) = let k = last ks
in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
addSuffix _ _ = []
citationLabel :: GenParser Char ParserState String
citationLabel = do
res <- many1 $ noneOf ",}"
optional $ char ','
return $ removeLeadingTrailingSpace res
-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = do
notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
"\\section"]
rawLaTeXInline
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
state <- getState
if stateParseRaw state
then command >>= demacro
else do
(name,st,args) <- command
x <- demacro (name,st,args)
unless (x == Str "" || name `elem` commandsToIgnore) $ do
inp <- getInput
setInput $ intercalate " " args ++ inp
return $ Str ""