pandoc/src/Text/Pandoc/Readers/LaTeX.hs

1400 lines
48 KiB
Haskell
Raw Normal View History

2013-08-10 17:23:51 -07:00
{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
{-
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.LaTeX
Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
rawLaTeXInline,
rawLaTeXBlock,
inlineCommand,
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
2015-04-12 09:47:13 +03:00
import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
import Control.Applicative ((<|>), many, optional)
2015-04-12 09:47:13 +03:00
import Data.Maybe (fromMaybe, maybeToList)
import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
import Data.List (intercalate)
import qualified Data.Map as M
import Text.Pandoc.Highlighting (fromListingsLanguage)
import Text.Pandoc.ImageSize (numUnit, showFl)
2015-02-18 13:03:45 +00:00
import Text.Pandoc.Error
import Control.Monad.Except (throwError, catchError)
import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileLazy,
warning, warningWithPos)
-- | Parse LaTeX from string and return 'Pandoc' document.
2016-11-28 17:13:46 -05:00
readLaTeX :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
2016-11-28 17:13:46 -05:00
-> m Pandoc
readLaTeX opts ltx = do
parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
case parsed of
Right result -> return result
Left e -> throwError e
2016-11-28 17:13:46 -05:00
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
let meta = stateMeta st
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
2016-11-28 17:13:46 -05:00
type LP m = ParserT String ParserState m
2016-11-28 17:13:46 -05:00
anyControlSeq :: PandocMonad m => LP m String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
2015-04-12 09:47:13 +03:00
case next of
'\n' -> return ""
c | isLetter c -> (c:) <$> (many letter <* optional sp)
| otherwise -> return [c]
2016-11-28 17:13:46 -05:00
controlSeq :: PandocMonad m => String -> LP m String
controlSeq name = try $ do
char '\\'
case name of
"" -> mzero
[c] | not (isLetter c) -> string [c]
cs -> string cs <* notFollowedBy letter <* optional sp
return name
2016-11-28 17:13:46 -05:00
dimenarg :: PandocMonad m => LP m String
dimenarg = try $ do
ch <- option "" $ string "="
num <- many1 digit
dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
return $ ch ++ num ++ dim
2016-11-28 17:13:46 -05:00
sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline
2016-11-28 17:13:46 -05:00
whitespace :: PandocMonad m => LP m ()
whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
2016-11-28 17:13:46 -05:00
endline :: PandocMonad m => LP m ()
endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
2016-11-28 17:13:46 -05:00
tildeEscape :: PandocMonad m => LP m Char
tildeEscape = try $ do
string "^^"
c <- satisfy (\x -> x >= '\0' && x <= '\128')
d <- if isLowerHex c
then option "" $ count 1 (satisfy isLowerHex)
else return ""
if null d
then case ord c of
x | x >= 64 && x <= 127 -> return $ chr (x - 64)
| otherwise -> return $ chr (x + 64)
else return $ chr $ read ('0':'x':c:d)
2016-11-28 17:13:46 -05:00
comment :: PandocMonad m => LP m ()
comment = do
char '%'
skipMany (satisfy (/='\n'))
optional newline
return ()
2016-11-28 17:13:46 -05:00
bgroup :: PandocMonad m => LP m ()
bgroup = try $ do
skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
() <$ char '{'
<|> () <$ controlSeq "bgroup"
<|> () <$ controlSeq "begingroup"
2016-11-28 17:13:46 -05:00
egroup :: PandocMonad m => LP m ()
egroup = () <$ char '}'
<|> () <$ controlSeq "egroup"
<|> () <$ controlSeq "endgroup"
2016-11-28 17:13:46 -05:00
grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
2016-11-28 17:13:46 -05:00
braced :: PandocMonad m => LP m String
braced = bgroup *> (concat <$> manyTill
( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
<|> try (string "\\}")
<|> try (string "\\{")
<|> try (string "\\\\")
<|> ((\x -> "{" ++ x ++ "}") <$> braced)
<|> count 1 anyChar
) egroup)
2016-11-28 17:13:46 -05:00
bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
2016-11-28 17:13:46 -05:00
mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
2016-11-28 17:13:46 -05:00
mathInline :: PandocMonad m => LP m String -> LP m Inlines
mathInline p = math <$> (try p >>= applyMacros')
2016-11-28 17:13:46 -05:00
mathChars :: PandocMonad m => LP m String
mathChars =
concat <$> many (escapedChar
<|> (snd <$> withRaw braced)
<|> many1 (satisfy isOrdChar))
where escapedChar = try $ do char '\\'
c <- anyChar
return ['\\',c]
isOrdChar '$' = False
isOrdChar '{' = False
isOrdChar '}' = False
isOrdChar '\\' = False
isOrdChar _ = True
2016-11-28 17:13:46 -05:00
quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
quoted' f starter ender = do
startchs <- starter
smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
if smart
then do
ils <- many (notFollowedBy ender >> inline)
(ender >> return (f (mconcat ils))) <|>
(<> mconcat ils) <$>
lit (case startchs of
"``" -> ""
"`" -> ""
_ -> startchs)
else lit startchs
2016-11-28 17:13:46 -05:00
doubleQuote :: PandocMonad m => LP m Inlines
doubleQuote = do
quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "") (void $ char '”')
-- the following is used by babel for localized quotes:
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
2016-11-28 17:13:46 -05:00
singleQuote :: PandocMonad m => LP m Inlines
singleQuote = do
smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
if smart
then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
<|> quoted' singleQuoted (string "") (try $ char '' >> notFollowedBy letter)
else str <$> many1 (oneOf "`\'")
2016-11-28 17:13:46 -05:00
inline :: PandocMonad m => LP m Inlines
inline = (mempty <$ comment)
<|> (space <$ whitespace)
<|> (softbreak <$ endline)
<|> inlineText
<|> inlineCommand
<|> inlineEnvironment
<|> inlineGroup
<|> (char '-' *> option (str "-")
2015-04-12 09:47:13 +03:00
(char '-' *> option (str "") (str "" <$ char '-')))
<|> doubleQuote
<|> singleQuote
<|> (str "" <$ try (string "''"))
<|> (str "" <$ char '”')
<|> (str "" <$ char '\'')
<|> (str "" <$ char '')
<|> (str "\160" <$ char '~')
2015-04-12 09:47:13 +03:00
<|> mathDisplay (string "$$" *> mathChars <* string "$$")
<|> mathInline (char '$' *> mathChars <* char '$')
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str . (:[]) <$> tildeEscape)
<|> (do res <- oneOf "#&~^'`\"[]"
pos <- getPosition
warningWithPos pos ("Parsing unescaped '" ++ [res] ++ "'")
return $ str [res])
2016-11-28 17:13:46 -05:00
inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
2016-11-28 17:13:46 -05:00
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
if isNull ils
then return mempty
else return $ spanWith nullAttr ils
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
2016-11-28 17:13:46 -05:00
block :: PandocMonad m => LP m Blocks
block = (mempty <$ comment)
<|> (mempty <$ ((spaceChar <|> newline) *> spaces))
<|> environment
<|> include
<|> macro
<|> blockCommand
<|> paragraph
<|> grouped block
2012-02-04 12:51:27 -08:00
<|> (mempty <$ char '&') -- loose & in table environment
2016-11-28 17:13:46 -05:00
blocks :: PandocMonad m => LP m Blocks
blocks = mconcat <$> many block
2016-11-28 17:13:46 -05:00
getRawCommand :: PandocMonad m => String -> LP m String
getRawCommand name' = do
rawargs <- withRaw (many (try (optional sp *> opt)) *>
option "" (try (optional sp *> dimenarg)) *>
many braced)
return $ '\\' : name' ++ snd rawargs
2015-04-12 09:47:13 +03:00
lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
lookupListDefault d = (fromMaybe d .) . lookupList
where
lookupList l m = msum $ map (`M.lookup` m) l
2016-11-28 17:13:46 -05:00
blockCommand :: PandocMonad m => LP m Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
let raw = do
rawcommand <- getRawCommand name'
transformed <- applyMacros' rawcommand
guard $ transformed /= rawcommand
notFollowedBy $ parseFromString inlines transformed
parseFromString blocks transformed
2015-04-12 09:47:13 +03:00
lookupListDefault raw [name',name] blockCommands
inBrackets :: Inlines -> Inlines
2015-04-12 09:47:13 +03:00
inBrackets x = str "[" <> x <> str "]"
-- eat an optional argument and one or more arguments in braces
2016-11-28 17:13:46 -05:00
ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
2015-04-12 09:47:13 +03:00
(getOption readerParseRaw >>= guard >> withRaw optargs)
2016-11-28 17:13:46 -05:00
ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
2015-04-12 09:47:13 +03:00
(getOption readerParseRaw >>= guard >> withRaw optargs)
2016-11-28 17:13:46 -05:00
blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
, ("title", mempty <$ (skipopts *>
(grouped inline >>= addMeta "title")
<|> (grouped block >>= addMeta "title")))
, ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
, ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
, ("signature", mempty <$ (skipopts *> authors))
, ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-- sectioning
, ("chapter", updateState (\s -> s{ stateHasChapters = True })
*> section nullAttr 0)
, ("chapter*", updateState (\s -> s{ stateHasChapters = True })
*> section ("",["unnumbered"],[]) 0)
, ("section", section nullAttr 1)
, ("section*", section ("",["unnumbered"],[]) 1)
, ("subsection", section nullAttr 2)
, ("subsection*", section ("",["unnumbered"],[]) 2)
, ("subsubsection", section nullAttr 3)
, ("subsubsection*", section ("",["unnumbered"],[]) 3)
, ("paragraph", section nullAttr 4)
, ("paragraph*", section ("",["unnumbered"],[]) 4)
, ("subparagraph", section nullAttr 5)
, ("subparagraph*", section ("",["unnumbered"],[]) 5)
2012-02-05 09:28:56 -08:00
-- beamer slides
, ("frametitle", section nullAttr 3)
, ("framesubtitle", section nullAttr 4)
2012-02-05 09:28:56 -08:00
-- letters
, ("opening", (para . trimInlines) <$> (skipopts *> tok))
, ("closing", skipopts *> closing)
2012-02-05 09:28:56 -08:00
--
, ("hrule", pure horizontalRule)
, ("strut", pure mempty)
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
2015-04-12 09:47:13 +03:00
, ("item", skipopts *> looseItem)
, ("documentclass", skipopts *> braced *> preamble)
2012-05-22 15:38:11 -07:00
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> setCaption)
, ("bibliography", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs))
] ++ map ignoreBlocks
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks
[ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
-- newcommand, etc. should be parsed by macro, but we need this
-- here so these aren't parsed as inline commands to ignore
, "special", "pdfannot", "pdfstringdef"
, "bibliographystyle"
, "maketitle", "makeindex", "makeglossary"
, "addcontentsline", "addtocontents", "addtocounter"
-- \ignore{} is used conventionally in literate haskell for definitions
-- that are to be processed by the compiler but not printed.
, "ignore"
, "hyperdef"
, "markboth", "markright", "markleft"
, "newpage"
]
2016-11-28 17:13:46 -05:00
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
addMeta field val = updateState $ \st ->
st{ stateMeta = addMetaField field val $ stateMeta st }
splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
2016-11-28 17:13:46 -05:00
setCaption :: PandocMonad m => LP m Blocks
setCaption = do
ils <- tok
mblabel <- option Nothing $
try $ spaces' >> controlSeq "label" >> (Just <$> tok)
let ils' = case mblabel of
Just lab -> ils <> spanWith
("",[],[("data-label", stringify lab)]) mempty
Nothing -> ils
updateState $ \st -> st{ stateCaption = Just ils' }
return mempty
2016-11-28 17:13:46 -05:00
resetCaption :: PandocMonad m => LP m ()
resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
2016-11-28 17:13:46 -05:00
authors :: PandocMonad m => LP m ()
authors = try $ do
char '{'
2012-02-05 09:06:34 -08:00
let oneAuthor = mconcat <$>
many1 (notFollowedBy' (controlSeq "and") >>
(inline <|> mempty <$ blockCommand))
-- skip e.g. \vspace{10pt}
auths <- sepBy oneAuthor (controlSeq "and")
2012-02-05 09:06:34 -08:00
char '}'
addMeta "author" (map trimInlines auths)
2016-11-28 17:13:46 -05:00
section :: PandocMonad m => Attr -> Int -> LP m Blocks
section (ident, classes, kvs) lvl = do
hasChapters <- stateHasChapters `fmap` getState
let lvl' = if hasChapters then lvl + 1 else lvl
skipopts
contents <- grouped inline
lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
2016-11-28 17:13:46 -05:00
inlineCommand :: PandocMonad m => LP m Inlines
inlineCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
guard $ not $ isBlockCommand name
parseRaw <- getOption readerParseRaw
star <- option "" (string "*")
let name' = name ++ star
let raw = do
rawargs <- withRaw
(skipangles *> skipopts *> option "" dimenarg *> many braced)
let rawcommand = '\\' : name ++ star ++ snd rawargs
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
(lookupListDefault mzero [name',name] inlineCommands <*
optional (try (string "{}")))
2015-04-12 09:47:13 +03:00
<|> raw
2016-11-28 17:13:46 -05:00
unlessParseRaw :: PandocMonad m => LP m ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
2016-11-28 17:13:46 -05:00
isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
2016-11-28 17:13:46 -05:00
inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
inlineEnvironments = M.fromList
[ ("displaymath", mathEnv id Nothing "displaymath")
, ("math", math <$> verbEnv "math")
, ("equation", mathEnv id Nothing "equation")
, ("equation*", mathEnv id Nothing "equation*")
, ("gather", mathEnv id (Just "gathered") "gather")
, ("gather*", mathEnv id (Just "gathered") "gather*")
, ("multline", mathEnv id (Just "gathered") "multline")
, ("multline*", mathEnv id (Just "gathered") "multline*")
, ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
, ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
, ("align", mathEnv id (Just "aligned") "align")
, ("align*", mathEnv id (Just "aligned") "align*")
, ("alignat", mathEnv id (Just "aligned") "alignat")
, ("alignat*", mathEnv id (Just "aligned") "alignat*")
]
2016-11-28 17:13:46 -05:00
inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
inlineCommands = M.fromList $
[ ("emph", extractSpaces emph <$> tok)
, ("textit", extractSpaces emph <$> tok)
, ("textsl", extractSpaces emph <$> tok)
, ("textsc", extractSpaces smallcaps <$> tok)
, ("sout", extractSpaces strikeout <$> tok)
, ("textsuperscript", extractSpaces superscript <$> tok)
, ("textsubscript", extractSpaces subscript <$> tok)
, ("textbackslash", lit "\\")
, ("backslash", lit "\\")
, ("slash", lit "/")
, ("textbf", extractSpaces strong <$> tok)
, ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
, ("ldots", lit "")
, ("dots", lit "")
, ("mdots", lit "")
, ("sim", lit "~")
, ("label", unlessParseRaw >> (inBrackets <$> tok))
, ("ref", unlessParseRaw >> (inBrackets <$> tok))
, ("noindent", unlessParseRaw >> return mempty)
, ("textgreek", tok)
2014-10-03 11:33:02 +10:00
, ("sep", lit ",")
, ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
, ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
, ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
2015-04-12 09:47:13 +03:00
, ("ensuremath", mathInline braced)
, ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
, ("P", lit "")
, ("S", lit "§")
, ("$", lit "$")
, ("%", lit "%")
, ("&", lit "&")
, ("#", lit "#")
, ("_", lit "_")
, ("{", lit "{")
, ("}", lit "}")
-- old TeX commands
, ("em", extractSpaces emph <$> inlines)
, ("it", extractSpaces emph <$> inlines)
, ("sl", extractSpaces emph <$> inlines)
, ("bf", extractSpaces strong <$> inlines)
, ("rm", inlines)
, ("itshape", extractSpaces emph <$> inlines)
, ("slshape", extractSpaces emph <$> inlines)
, ("scshape", extractSpaces smallcaps <$> inlines)
, ("bfseries", extractSpaces strong <$> inlines)
, ("/", pure mempty) -- italic correction
, ("aa", lit "å")
, ("AA", lit "Å")
, ("ss", lit "ß")
, ("o", lit "ø")
, ("O", lit "Ø")
, ("L", lit "Ł")
, ("l", lit "ł")
, ("ae", lit "æ")
, ("AE", lit "Æ")
, ("oe", lit "œ")
, ("OE", lit "Œ")
, ("pounds", lit "£")
, ("euro", lit "")
, ("copyright", lit "©")
, ("textasciicircum", lit "^")
, ("textasciitilde", lit "~")
, ("H", try $ tok >>= accent hungarumlaut)
2012-02-25 09:55:38 -08:00
, ("`", option (str "`") $ try $ tok >>= accent grave)
, ("'", option (str "'") $ try $ tok >>= accent acute)
, ("^", option (str "^") $ try $ tok >>= accent circ)
, ("~", option (str "~") $ try $ tok >>= accent tilde)
, ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
, (".", option (str ".") $ try $ tok >>= accent dot)
, ("=", option (str "=") $ try $ tok >>= accent macron)
, ("c", option (str "c") $ try $ tok >>= accent cedilla)
, ("v", option (str "v") $ try $ tok >>= accent hacek)
, ("u", option (str "u") $ try $ tok >>= accent breve)
, ("i", lit "i")
, ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
, (",", pure mempty)
, ("@", pure mempty)
, (" ", lit "\160")
, ("ps", pure $ str "PS." <> space)
2012-02-05 08:50:25 -08:00
, ("TeX", lit "TeX")
, ("LaTeX", lit "LaTeX")
, ("bar", lit "|")
, ("textless", lit "<")
, ("textgreater", lit ">")
, ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
, ("lstinline", skipopts *> doverb)
, ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
pure (link url "" (str url)))
, ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
tok >>= \lab ->
pure (link url "" lab))
, ("includegraphics", do options <- option [] keyvals
src <- unescapeURL . removeDoubleQuotes <$> braced
mkImage options src)
2012-10-15 20:15:34 -07:00
, ("enquote", enquote)
, ("cite", citation "cite" NormalCitation False)
, ("Cite", citation "Cite" NormalCitation False)
, ("citep", citation "citep" NormalCitation False)
, ("citep*", citation "citep*" NormalCitation False)
, ("citeal", citation "citeal" NormalCitation False)
, ("citealp", citation "citealp" NormalCitation False)
, ("citealp*", citation "citealp*" NormalCitation False)
, ("autocite", citation "autocite" NormalCitation False)
, ("smartcite", citation "smartcite" NormalCitation False)
, ("footcite", inNote <$> citation "footcite" NormalCitation False)
, ("parencite", citation "parencite" NormalCitation False)
, ("supercite", citation "supercite" NormalCitation False)
, ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
, ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
, ("citeyear", citation "citeyear" SuppressAuthor False)
, ("autocite*", citation "autocite*" SuppressAuthor False)
, ("cite*", citation "cite*" SuppressAuthor False)
, ("parencite*", citation "parencite*" SuppressAuthor False)
, ("textcite", citation "textcite" AuthorInText False)
, ("citet", citation "citet" AuthorInText False)
, ("citet*", citation "citet*" AuthorInText False)
, ("citealt", citation "citealt" AuthorInText False)
, ("citealt*", citation "citealt*" AuthorInText False)
, ("textcites", citation "textcites" AuthorInText True)
, ("cites", citation "cites" NormalCitation True)
, ("autocites", citation "autocites" NormalCitation True)
, ("footcites", inNote <$> citation "footcites" NormalCitation True)
, ("parencites", citation "parencites" NormalCitation True)
, ("supercites", citation "supercites" NormalCitation True)
, ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
, ("Autocite", citation "Autocite" NormalCitation False)
, ("Smartcite", citation "Smartcite" NormalCitation False)
, ("Footcite", citation "Footcite" NormalCitation False)
, ("Parencite", citation "Parencite" NormalCitation False)
, ("Supercite", citation "Supercite" NormalCitation False)
, ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
, ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
, ("Citeyear", citation "Citeyear" SuppressAuthor False)
, ("Autocite*", citation "Autocite*" SuppressAuthor False)
, ("Cite*", citation "Cite*" SuppressAuthor False)
, ("Parencite*", citation "Parencite*" SuppressAuthor False)
, ("Textcite", citation "Textcite" AuthorInText False)
, ("Textcites", citation "Textcites" AuthorInText True)
, ("Cites", citation "Cites" NormalCitation True)
, ("Autocites", citation "Autocites" NormalCitation True)
, ("Footcites", citation "Footcites" NormalCitation True)
, ("Parencites", citation "Parencites" NormalCitation True)
, ("Supercites", citation "Supercites" NormalCitation True)
, ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
, ("citetext", complexNatbibCitation NormalCitation)
, ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
complexNatbibCitation AuthorInText)
<|> citation "citeauthor" AuthorInText False)
, ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
addMeta "nocite"))
] ++ map ignoreInlines
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks:
[ "index" ]
2016-11-28 17:13:46 -05:00
mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
let replaceTextwidth (k,v) = case numUnit v of
Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
_ -> (k, v)
let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
"" -> do
defaultExt <- getOption readerDefaultImageExtension
return $ imageWith attr (addExtension src defaultExt) "" alt
_ -> return $ imageWith attr src "" alt
inNote :: Inlines -> Inlines
inNote ils =
note $ para $ ils <> str "."
unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
2016-11-28 17:13:46 -05:00
enquote :: PandocMonad m => LP m Inlines
2012-10-15 20:15:34 -07:00
enquote = do
skipopts
context <- stateQuoteContext <$> getState
if context == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
2016-11-28 17:13:46 -05:00
doverb :: PandocMonad m => LP m Inlines
doverb = do
marker <- anyChar
code <$> manyTill (satisfy (/='\n')) (char marker)
2016-11-28 17:13:46 -05:00
doLHSverb :: PandocMonad m => LP m Inlines
doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
2016-11-28 17:13:46 -05:00
lit :: String -> LP m Inlines
lit = pure . str
2016-11-28 17:13:46 -05:00
accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
case toList ils of
2015-04-12 09:47:13 +03:00
(Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
[] -> mzero
_ -> return ils
grave :: Char -> String
grave 'A' = "À"
grave 'E' = "È"
grave 'I' = "Ì"
grave 'O' = "Ò"
grave 'U' = "Ù"
grave 'a' = "à"
grave 'e' = "è"
grave 'i' = "ì"
grave 'o' = "ò"
grave 'u' = "ù"
grave c = [c]
acute :: Char -> String
acute 'A' = "Á"
acute 'E' = "É"
acute 'I' = "Í"
acute 'O' = "Ó"
acute 'U' = "Ú"
acute 'Y' = "Ý"
acute 'a' = "á"
acute 'e' = "é"
acute 'i' = "í"
acute 'o' = "ó"
acute 'u' = "ú"
acute 'y' = "ý"
acute 'C' = "Ć"
acute 'c' = "ć"
acute 'L' = "Ĺ"
acute 'l' = "ĺ"
acute 'N' = "Ń"
acute 'n' = "ń"
acute 'R' = "Ŕ"
acute 'r' = "ŕ"
acute 'S' = "Ś"
acute 's' = "ś"
acute 'Z' = "Ź"
acute 'z' = "ź"
acute c = [c]
circ :: Char -> String
circ 'A' = "Â"
circ 'E' = "Ê"
circ 'I' = "Î"
circ 'O' = "Ô"
circ 'U' = "Û"
circ 'a' = "â"
circ 'e' = "ê"
circ 'i' = "î"
circ 'o' = "ô"
circ 'u' = "û"
circ 'C' = "Ĉ"
circ 'c' = "ĉ"
circ 'G' = "Ĝ"
circ 'g' = "ĝ"
circ 'H' = "Ĥ"
circ 'h' = "ĥ"
circ 'J' = "Ĵ"
circ 'j' = "ĵ"
circ 'S' = "Ŝ"
circ 's' = "ŝ"
circ 'W' = "Ŵ"
circ 'w' = "ŵ"
circ 'Y' = "Ŷ"
circ 'y' = "ŷ"
circ c = [c]
tilde :: Char -> String
tilde 'A' = "Ã"
tilde 'a' = "ã"
tilde 'O' = "Õ"
tilde 'o' = "õ"
tilde 'I' = "Ĩ"
tilde 'i' = "ĩ"
tilde 'U' = "Ũ"
tilde 'u' = "ũ"
tilde 'N' = "Ñ"
tilde 'n' = "ñ"
tilde c = [c]
umlaut :: Char -> String
umlaut 'A' = "Ä"
umlaut 'E' = "Ë"
umlaut 'I' = "Ï"
umlaut 'O' = "Ö"
umlaut 'U' = "Ü"
umlaut 'a' = "ä"
umlaut 'e' = "ë"
umlaut 'i' = "ï"
umlaut 'o' = "ö"
umlaut 'u' = "ü"
umlaut c = [c]
hungarumlaut :: Char -> String
hungarumlaut 'A' = ""
hungarumlaut 'E' = ""
hungarumlaut 'I' = ""
hungarumlaut 'O' = "Ő"
hungarumlaut 'U' = "Ű"
hungarumlaut 'Y' = "ӳ"
hungarumlaut 'a' = ""
hungarumlaut 'e' = ""
hungarumlaut 'i' = ""
hungarumlaut 'o' = "ő"
hungarumlaut 'u' = "ű"
hungarumlaut 'y' = "ӳ"
hungarumlaut c = [c]
dot :: Char -> String
dot 'C' = "Ċ"
dot 'c' = "ċ"
dot 'E' = "Ė"
dot 'e' = "ė"
dot 'G' = "Ġ"
dot 'g' = "ġ"
dot 'I' = "İ"
dot 'Z' = "Ż"
dot 'z' = "ż"
dot c = [c]
macron :: Char -> String
macron 'A' = "Ā"
macron 'E' = "Ē"
macron 'I' = "Ī"
macron 'O' = "Ō"
macron 'U' = "Ū"
macron 'a' = "ā"
macron 'e' = "ē"
macron 'i' = "ī"
macron 'o' = "ō"
macron 'u' = "ū"
macron c = [c]
cedilla :: Char -> String
cedilla 'c' = "ç"
cedilla 'C' = "Ç"
cedilla 's' = "ş"
cedilla 'S' = "Ş"
cedilla 't' = "ţ"
cedilla 'T' = "Ţ"
cedilla 'e' = "ȩ"
cedilla 'E' = "Ȩ"
cedilla 'h' = ""
cedilla 'H' = ""
cedilla 'o' = ""
cedilla 'O' = ""
cedilla c = [c]
hacek :: Char -> String
hacek 'A' = "Ǎ"
hacek 'a' = "ǎ"
hacek 'C' = "Č"
hacek 'c' = "č"
hacek 'D' = "Ď"
hacek 'd' = "ď"
hacek 'E' = "Ě"
hacek 'e' = "ě"
hacek 'G' = "Ǧ"
hacek 'g' = "ǧ"
hacek 'H' = "Ȟ"
hacek 'h' = "ȟ"
hacek 'I' = "Ǐ"
hacek 'i' = "ǐ"
hacek 'j' = "ǰ"
hacek 'K' = "Ǩ"
hacek 'k' = "ǩ"
hacek 'L' = "Ľ"
hacek 'l' = "ľ"
hacek 'N' = "Ň"
hacek 'n' = "ň"
hacek 'O' = "Ǒ"
hacek 'o' = "ǒ"
hacek 'R' = "Ř"
hacek 'r' = "ř"
hacek 'S' = "Š"
hacek 's' = "š"
hacek 'T' = "Ť"
hacek 't' = "ť"
hacek 'U' = "Ǔ"
hacek 'u' = "ǔ"
hacek 'Z' = "Ž"
hacek 'z' = "ž"
hacek c = [c]
breve :: Char -> String
breve 'A' = "Ă"
breve 'a' = "ă"
breve 'E' = "Ĕ"
breve 'e' = "ĕ"
breve 'G' = "Ğ"
breve 'g' = "ğ"
breve 'I' = "Ĭ"
breve 'i' = "ĭ"
breve 'O' = "Ŏ"
breve 'o' = "ŏ"
breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
2016-11-28 17:13:46 -05:00
tok :: PandocMonad m => LP m Inlines
2015-04-12 09:47:13 +03:00
tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
2016-11-28 17:13:46 -05:00
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
2016-11-28 17:13:46 -05:00
rawopt :: PandocMonad m => LP m String
rawopt = do
contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
try (string "\\[") <|> rawopt)
optional sp
return $ "[" ++ contents ++ "]"
2016-11-28 17:13:46 -05:00
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
2016-11-28 17:13:46 -05:00
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
char '<'
skipMany (noneOf ">")
char '>'
return ()
2016-11-28 17:13:46 -05:00
skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
2016-11-28 17:13:46 -05:00
inlineText :: PandocMonad m => LP m Inlines
inlineText = str <$> many1 inlineChar
2016-11-28 17:13:46 -05:00
inlineChar :: PandocMonad m => LP m Char
inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
2016-11-28 17:13:46 -05:00
environment :: PandocMonad m => LP m Blocks
environment = do
controlSeq "begin"
name <- braced
2015-04-12 09:47:13 +03:00
M.findWithDefault mzero name environments
<|> rawEnv name
2012-02-04 12:27:24 -08:00
2016-11-28 17:13:46 -05:00
inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
controlSeq "begin"
name <- braced
2015-04-12 09:47:13 +03:00
M.findWithDefault mzero name inlineEnvironments
2016-11-28 17:13:46 -05:00
rawEnv :: PandocMonad m => String -> LP m Blocks
2012-02-04 12:27:24 -08:00
rawEnv name = do
parseRaw <- getOption readerParseRaw
rawOptions <- mconcat <$> many rawopt
let addBegin x = "\\begin{" ++ name ++ "}" ++ rawOptions ++ x
2012-02-04 12:27:24 -08:00
if parseRaw
then (rawBlock "latex" . addBegin) <$>
(withRaw (env name blocks) >>= applyMacros' . snd)
else env name blocks
----
braced' :: PandocMonad m => LP m String
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
maybeAddExtension :: String -> FilePath -> FilePath
maybeAddExtension ext fp =
if null (takeExtension fp)
then addExtension fp ext
else fp
include :: PandocMonad m => LP m Blocks
include = do
fs' <- try $ do
char '\\'
name <- try (string "include")
<|> try (string "input")
<|> string "usepackage"
-- skip options
2015-04-12 09:47:13 +03:00
skipMany $ try $ char '[' *> manyTill anyChar (char ']')
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
then map (maybeAddExtension ".sty") fs
else map (maybeAddExtension ".tex") fs
oldPos <- getPosition
oldInput <- getInput
-- now process each include file in order...
mconcat <$> forM fs' (\f -> do
containers <- stateContainers <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
updateState $ \s -> s{ stateContainers = f : stateContainers s }
contents <- lift $ readTeXFile f
setPosition $ newPos f 1 1
setInput contents
bs <- blocks
setInput oldInput
setPosition oldPos
updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
return bs)
readTeXFile :: PandocMonad m => FilePath -> m String
readTeXFile f = do
texinputs <- fromMaybe "." <$> lookupEnv "TEXINPUTS"
readFileFromDirs (splitBy (==':') texinputs) f
readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m String
readFileFromDirs [] f = do
warning $ "Could not load include file " ++ f ++ ", skipping."
return ""
readFileFromDirs (d:ds) f = do
res <- readFileLazy' (d </> f)
case res of
Right s -> return s
Left _ -> readFileFromDirs ds f
readFileLazy' :: PandocMonad m => FilePath -> m (Either PandocError String)
readFileLazy' f = catchError ((Right . UTF8.toStringLazy) <$> readFileLazy f) $
\(e :: PandocError) -> return (Left e)
----
2016-11-28 17:13:46 -05:00
keyval :: PandocMonad m => LP m (String, String)
keyval = try $ do
key <- many1 alphaNum
val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
skipMany spaceChar
optional (char ',')
skipMany spaceChar
return (key, val)
2016-11-28 17:13:46 -05:00
keyvals :: PandocMonad m => LP m [(String, String)]
keyvals = try $ char '[' *> manyTill keyval (char ']')
2016-11-28 17:13:46 -05:00
alltt :: PandocMonad m => String -> LP m Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
2015-04-12 09:47:13 +03:00
intercalate "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
2016-11-28 17:13:46 -05:00
rawLaTeXBlock :: PandocMonad m => LP m String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
2016-11-28 17:13:46 -05:00
rawLaTeXInline :: PandocMonad m => LP m Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
2016-11-28 17:13:46 -05:00
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
where go (Image attr alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
2015-04-12 09:47:13 +03:00
return $ case mbcapt of
Just ils -> Image attr (toList ils) (src, "fig:")
Nothing -> Image attr alt (src,tit)
go x = return x
2016-11-28 17:13:46 -05:00
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
2015-04-12 09:47:13 +03:00
return $ case mbcapt of
Just ils -> Table (toList ils) als ws hs rs
Nothing -> Table c als ws hs rs
go x = return x
2016-11-28 17:13:46 -05:00
environments :: PandocMonad m => M.Map String (LP m Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
, ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
2015-04-12 09:47:13 +03:00
, ("letter", env "letter" letterContents)
, ("minipage", env "minipage" $
skipopts *> spaces' *> optional braced *> spaces' *> blocks)
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
2015-10-15 23:15:40 -07:00
, ("longtable", env "longtable" $
resetCaption *> simpTable False >>= addTableCaption)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
, ("tabular*", env "tabular" $ simpTable True)
, ("tabular", env "tabular" $ simpTable False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
2015-04-12 09:47:13 +03:00
, ("enumerate", orderedList')
, ("alltt", alltt =<< verbEnv "alltt")
, ("code", guardEnabled Ext_literate_haskell *>
2012-02-04 12:27:37 -08:00
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
, ("comment", mempty <$ verbEnv "comment")
2015-04-12 09:47:13 +03:00
, ("verbatim", codeBlock <$> verbEnv "verbatim")
, ("Verbatim", fancyverbEnv "Verbatim")
, ("BVerbatim", fancyverbEnv "BVerbatim")
, ("lstlisting", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
2015-04-12 09:47:13 +03:00
++ maybeToList (lookup "language" options
>>= fromListingsLanguage)
let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
2015-04-12 09:47:13 +03:00
codeBlockWith attr <$> verbEnv "lstlisting")
, ("minted", do options <- option [] keyvals
lang <- grouped (many1 $ satisfy (/='}'))
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ lang | not (null lang) ] ++
[ "numberLines" |
lookup "linenos" options == Just "true" ]
let attr = ("",classes,kvs)
2015-04-12 09:47:13 +03:00
codeBlockWith attr <$> verbEnv "minted")
, ("obeylines", parseFromString
(para . trimInlines . mconcat <$> many inline) =<<
intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
, ("displaymath", mathEnv para Nothing "displaymath")
, ("equation", mathEnv para Nothing "equation")
, ("equation*", mathEnv para Nothing "equation*")
, ("gather", mathEnv para (Just "gathered") "gather")
, ("gather*", mathEnv para (Just "gathered") "gather*")
, ("multline", mathEnv para (Just "gathered") "multline")
, ("multline*", mathEnv para (Just "gathered") "multline*")
, ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
, ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
, ("align", mathEnv para (Just "aligned") "align")
, ("align*", mathEnv para (Just "aligned") "align*")
, ("alignat", mathEnv para (Just "aligned") "alignat")
, ("alignat*", mathEnv para (Just "aligned") "alignat*")
]
2016-11-28 17:13:46 -05:00
letterContents :: PandocMonad m => LP m Blocks
2015-04-12 09:47:13 +03:00
letterContents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
let addr = case lookupMeta "address" (stateMeta st) of
Just (MetaBlocks [Plain xs]) ->
para $ trimInlines $ fromList xs
_ -> mempty
return $ addr <> bs -- sig added by \closing
2016-11-28 17:13:46 -05:00
closing :: PandocMonad m => LP m Blocks
closing = do
contents <- tok
st <- getState
let extractInlines (MetaBlocks [Plain ys]) = ys
extractInlines (MetaBlocks [Para ys ]) = ys
extractInlines _ = []
let sigs = case lookupMeta "author" (stateMeta st) of
Just (MetaList xs) ->
para $ trimInlines $ fromList $
intercalate [LineBreak] $ map extractInlines xs
_ -> mempty
return $ para (trimInlines contents) <> sigs
2016-11-28 17:13:46 -05:00
item :: PandocMonad m => LP m Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
2016-11-28 17:13:46 -05:00
looseItem :: PandocMonad m => LP m Blocks
2015-04-12 09:47:13 +03:00
looseItem = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
else return mempty
2016-11-28 17:13:46 -05:00
descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
blocks -- skip blocks before item
controlSeq "item"
optional sp
ils <- opt
bs <- blocks
return (ils, [bs])
2016-11-28 17:13:46 -05:00
env :: PandocMonad m => String -> LP m a -> LP m a
env name p = p <*
(try (controlSeq "end" *> braced >>= guard . (== name))
<?> ("\\end{" ++ name ++ "}"))
2016-11-28 17:13:46 -05:00
listenv :: PandocMonad m => String -> LP m a -> LP m a
listenv name p = try $ do
oldCtx <- stateParserContext `fmap` getState
updateState $ \st -> st{ stateParserContext = ListItemState }
res <- env name p
updateState $ \st -> st{ stateParserContext = oldCtx }
return res
2016-11-28 17:13:46 -05:00
mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a
mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
where inner x = case innerEnv of
Nothing -> x
Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
"\\end{" ++ y ++ "}"
2016-11-28 17:13:46 -05:00
verbEnv :: PandocMonad m => String -> LP m String
verbEnv name = do
skipopts
optional blankline
let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
2016-11-28 17:13:46 -05:00
fancyverbEnv :: PandocMonad m => String -> LP m Blocks
fancyverbEnv name = do
options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
let attr = ("",classes,kvs)
codeBlockWith attr <$> verbEnv name
2016-11-28 17:13:46 -05:00
orderedList' :: PandocMonad m => LP m Blocks
2015-04-12 09:47:13 +03:00
orderedList' = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
spaces
start <- option 1 $ try $ do controlSeq "setcounter"
grouped (string "enum" *> many1 (oneOf "iv"))
optional sp
num <- grouped (many1 digit)
spaces
2015-04-12 09:47:13 +03:00
return (read num + 1 :: Int)
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
2016-11-28 17:13:46 -05:00
paragraph :: PandocMonad m => LP m Blocks
paragraph = do
x <- trimInlines . mconcat <$> many1 inline
if x == mempty
then return mempty
else return $ para x
2016-11-28 17:13:46 -05:00
preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
2015-04-12 09:47:13 +03:00
preambleBlock = void comment
<|> void sp
<|> void blanklines
<|> void include
2015-04-12 09:47:13 +03:00
<|> void macro
<|> void blockCommand
<|> void anyControlSeq
<|> void braced
<|> void anyChar
-------
-- citations
addPrefix :: [Inline] -> [Citation] -> [Citation]
addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
addPrefix _ _ = []
addSuffix :: [Inline] -> [Citation] -> [Citation]
addSuffix s ks@(_:_) =
let k = last ks
in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
addSuffix _ _ = []
2016-11-28 17:13:46 -05:00
simpleCiteArgs :: PandocMonad m => LP m [Citation]
simpleCiteArgs = try $ do
first <- optionMaybe $ toList <$> opt
second <- optionMaybe $ toList <$> opt
char '{'
optional sp
keys <- manyTill citationLabel (char '}')
let (pre, suf) = case (first , second ) of
(Just s , Nothing) -> (mempty, s )
(Just s , Just t ) -> (s , t )
_ -> (mempty, mempty)
conv k = Citation { citationId = k
, citationPrefix = []
, citationSuffix = []
, citationMode = NormalCitation
, citationHash = 0
, citationNoteNum = 0
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
2016-11-28 17:13:46 -05:00
citationLabel :: PandocMonad m => LP m String
citationLabel = optional sp *>
(many1 (satisfy isBibtexKeyChar)
<* optional sp
<* optional (char ',')
<* optional sp)
where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
2016-11-28 17:13:46 -05:00
cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
cites mode multi = try $ do
cits <- if multi
then many1 simpleCiteArgs
else count 1 simpleCiteArgs
let cs = concat cits
return $ case mode of
AuthorInText -> case cs of
(c:rest) -> c {citationMode = mode} : rest
[] -> []
_ -> map (\a -> a {citationMode = mode}) cs
2011-01-07 10:15:48 -08:00
2016-11-28 17:13:46 -05:00
citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
(c,raw) <- withRaw $ cites mode multi
return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
2016-11-28 17:13:46 -05:00
complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
complexNatbibCitation mode = try $ do
let ils = (toList . trimInlines . mconcat) <$>
many (notFollowedBy (oneOf "\\};") >> inline)
let parseOne = try $ do
skipSpaces
pref <- ils
cit' <- inline -- expect a citation
let citlist = toList cit'
cits' <- case citlist of
[Cite cs _] -> return cs
_ -> mzero
suff <- ils
skipSpaces
optional $ char ';'
2015-04-12 09:47:13 +03:00
return $ addPrefix pref $ addSuffix suff cits'
(c:cits, raw) <- withRaw $ grouped parseOne
return $ cite (c{ citationMode = mode }:cits)
(rawInline "latex" $ "\\citetext" ++ raw)
2011-01-07 10:15:48 -08:00
-- tables
2011-01-07 10:15:48 -08:00
2016-11-28 17:13:46 -05:00
parseAligns :: PandocMonad m => LP m [Alignment]
2011-01-07 10:15:48 -08:00
parseAligns = try $ do
char '{'
let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
maybeBar
let cAlign = AlignCenter <$ char 'c'
let lAlign = AlignLeft <$ char 'l'
let rAlign = AlignRight <$ char 'r'
let parAlign = AlignLeft <$ (char 'p' >> braced)
let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
aligns' <- sepEndBy alignChar maybeBar
spaces
2011-01-07 10:15:48 -08:00
char '}'
spaces
return aligns'
2016-11-28 17:13:46 -05:00
hline :: PandocMonad m => LP m ()
hline = try $ do
spaces'
controlSeq "hline" <|>
-- booktabs rules:
controlSeq "toprule" <|>
controlSeq "bottomrule" <|>
controlSeq "midrule" <|>
controlSeq "endhead" <|>
controlSeq "endfirsthead"
spaces'
optional $ bracketed (many1 (satisfy (/=']')))
return ()
2011-01-07 10:15:48 -08:00
2016-11-28 17:13:46 -05:00
lbreak :: PandocMonad m => LP m ()
lbreak = () <$ try (spaces' *>
(controlSeq "\\" <|> controlSeq "tabularnewline") <*
spaces')
2016-11-28 17:13:46 -05:00
amp :: PandocMonad m => LP m ()
amp = () <$ try (spaces' *> char '&' <* spaces')
2016-11-28 17:13:46 -05:00
parseTableRow :: PandocMonad m => Int -- ^ number of columns
-> LP m [Blocks]
2011-01-07 10:15:48 -08:00
parseTableRow cols = try $ do
let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline
let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
env "minipage"
(skipopts *> spaces' *> optional braced *> spaces' *> blocks)
let tableCell = minipage <|>
((plain . trimInlines . mconcat) <$> many tableCellInline)
cells' <- sepBy1 tableCell amp
let numcells = length cells'
guard $ numcells <= cols && numcells >= 1
guard $ cells' /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
let cells'' = cells' ++ replicate (cols - numcells) mempty
spaces'
return cells''
2011-01-07 10:15:48 -08:00
2016-11-28 17:13:46 -05:00
spaces' :: PandocMonad m => LP m ()
spaces' = spaces *> skipMany (comment *> spaces)
2016-11-28 17:13:46 -05:00
simpTable :: PandocMonad m => Bool -> LP m Blocks
simpTable hasWidthParameter = try $ do
when hasWidthParameter $ () <$ (spaces' >> tok)
skipopts
aligns <- parseAligns
let cols = length aligns
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
spaces'
skipMany hline
spaces'
header' <- option [] $ try (parseTableRow cols <* lbreak <* many1 hline)
spaces'
rows <- sepEndBy (parseTableRow cols) (lbreak <* optional (skipMany hline))
spaces'
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
spaces'
let header'' = if null header'
then replicate cols mempty
else header'
2012-02-04 12:51:27 -08:00
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns (repeat 0)) header'' rows
removeDoubleQuotes :: String -> String
removeDoubleQuotes ('"':xs) =
case reverse xs of
'"':ys -> reverse ys
_ -> '"':xs
removeDoubleQuotes xs = xs