pandoc/src/Text/Pandoc/Readers/LaTeX.hs
2014-07-22 16:37:41 +01:00

1294 lines
42 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.LaTeX
Copyright : Copyright (C) 2006-2014 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
rawLaTeXInline,
rawLaTeXBlock,
inlineCommand,
handleIncludes
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
import Data.Char (isLetter, isAlphaNum)
import Control.Applicative
import Data.Monoid
import Data.Maybe (fromMaybe)
import System.Environment (getEnv)
import System.FilePath (replaceExtension, (</>))
import Data.List (intercalate, intersperse)
import qualified Data.Map as M
import qualified Control.Exception as E
import System.FilePath (takeExtension, addExtension)
import Text.Pandoc.Highlighting (fromListingsLanguage)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
let meta = stateMeta st
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
type LP = Parser [Char] ParserState
anyControlSeq :: LP String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
name <- case next of
'\n' -> return ""
c | isLetter c -> (c:) <$> (many letter <* optional sp)
| otherwise -> return [c]
return name
controlSeq :: String -> LP String
controlSeq name = try $ do
char '\\'
case name of
"" -> mzero
[c] | not (isLetter c) -> string [c]
cs -> string cs <* notFollowedBy letter <* optional sp
return name
dimenarg :: LP String
dimenarg = try $ do
ch <- option "" $ string "="
num <- many1 digit
dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
return $ ch ++ num ++ dim
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
<|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
tildeEscape :: LP Char
tildeEscape = try $ do
string "^^"
c <- satisfy (\x -> x >= '\0' && x <= '\128')
d <- if isLowerHex c
then option "" $ count 1 (satisfy isLowerHex)
else return ""
if null d
then case ord c of
x | x >= 64 && x <= 127 -> return $ chr (x - 64)
| otherwise -> return $ chr (x + 64)
else return $ chr $ read ('0':'x':c:d)
comment :: LP ()
comment = do
char '%'
skipMany (satisfy (/='\n'))
optional newline
return ()
bgroup :: LP ()
bgroup = () <$ char '{'
<|> () <$ controlSeq "bgroup"
<|> () <$ controlSeq "begingroup"
egroup :: LP ()
egroup = () <$ char '}'
<|> () <$ controlSeq "egroup"
<|> () <$ controlSeq "endgroup"
grouped :: Monoid a => LP a -> LP a
grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
braced :: LP String
braced = bgroup *> (concat <$> manyTill
( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
<|> try (string "\\}")
<|> try (string "\\{")
<|> try (string "\\\\")
<|> ((\x -> "{" ++ x ++ "}") <$> braced)
<|> count 1 anyChar
) egroup)
bracketed :: Monoid a => LP a -> LP a
bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
mathDisplay :: LP String -> LP Inlines
mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
mathInline :: LP String -> LP Inlines
mathInline p = math <$> (try p >>= applyMacros')
mathChars :: LP String
mathChars = concat <$>
many ( many1 (satisfy (\c -> c /= '$' && c /='\\'))
<|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
)
quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
quoted' f starter ender = do
startchs <- starter
try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
double_quote :: LP Inlines
double_quote =
( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "") (void $ char '”')
-- the following is used by babel for localized quotes:
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
)
single_quote :: LP Inlines
single_quote =
( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
<|> quoted' singleQuoted (string "") (try $ char '' >> notFollowedBy letter)
)
inline :: LP Inlines
inline = (mempty <$ comment)
<|> (space <$ sp)
<|> inlineText
<|> inlineCommand
<|> inlineGroup
<|> (char '-' *> option (str "-")
((char '-') *> option (str "") (str "" <$ char '-')))
<|> double_quote
<|> single_quote
<|> (str "" <$ try (string "''"))
<|> (str "" <$ char '”')
<|> (str "" <$ char '\'')
<|> (str "" <$ char '')
<|> (str "\160" <$ char '~')
<|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
<|> (mathInline $ char '$' *> mathChars <* char '$')
<|> (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str . (:[]) <$> tildeEscape)
<|> (str . (:[]) <$> oneOf "[]")
<|> (str . (:[]) <$> oneOf "#&") -- TODO print warning?
-- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
inlines :: LP Inlines
inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
inlineGroup :: LP Inlines
inlineGroup = do
ils <- grouped inline
if isNull ils
then return mempty
else return $ spanWith nullAttr ils
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
block :: LP Blocks
block = (mempty <$ comment)
<|> (mempty <$ ((spaceChar <|> newline) *> spaces))
<|> environment
<|> macro
<|> blockCommand
<|> paragraph
<|> grouped block
<|> (mempty <$ char '&') -- loose & in table environment
blocks :: LP Blocks
blocks = mconcat <$> many block
blockCommand :: LP Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
case M.lookup name' blockCommands of
Just p -> p
Nothing -> case M.lookup name blockCommands of
Just p -> p
Nothing -> mzero
inBrackets :: Inlines -> Inlines
inBrackets x = (str "[") <> x <> (str "]")
-- eat an optional argument and one or more arguments in braces
ignoreInlines :: String -> (String, LP Inlines)
ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
(getOption readerParseRaw >>= guard >> (withRaw optargs))
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
(getOption readerParseRaw >>= guard >> (withRaw optargs))
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
, ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
, ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
, ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
, ("signature", mempty <$ (skipopts *> authors))
, ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-- sectioning
, ("chapter", updateState (\s -> s{ stateHasChapters = True })
*> section nullAttr 0)
, ("chapter*", updateState (\s -> s{ stateHasChapters = True })
*> section ("",["unnumbered"],[]) 0)
, ("section", section nullAttr 1)
, ("section*", section ("",["unnumbered"],[]) 1)
, ("subsection", section nullAttr 2)
, ("subsection*", section ("",["unnumbered"],[]) 2)
, ("subsubsection", section nullAttr 3)
, ("subsubsection*", section ("",["unnumbered"],[]) 3)
, ("paragraph", section nullAttr 4)
, ("paragraph*", section ("",["unnumbered"],[]) 4)
, ("subparagraph", section nullAttr 5)
, ("subparagraph*", section ("",["unnumbered"],[]) 5)
-- beamer slides
, ("frametitle", section nullAttr 3)
, ("framesubtitle", section nullAttr 4)
-- letters
, ("opening", (para . trimInlines) <$> (skipopts *> tok))
, ("closing", skipopts *> closing)
--
, ("hrule", pure horizontalRule)
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
, ("item", skipopts *> loose_item)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> tok >>= setCaption)
, ("PandocStartInclude", startInclude)
, ("PandocEndInclude", endInclude)
, ("bibliography", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs))
] ++ map ignoreBlocks
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks
[ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
-- newcommand, etc. should be parsed by macro, but we need this
-- here so these aren't parsed as inline commands to ignore
, "special", "pdfannot", "pdfstringdef"
, "bibliographystyle"
, "maketitle", "makeindex", "makeglossary"
, "addcontentsline", "addtocontents", "addtocounter"
-- \ignore{} is used conventionally in literate haskell for definitions
-- that are to be processed by the compiler but not printed.
, "ignore"
, "hyperdef"
, "markboth", "markright", "markleft"
, "hspace", "vspace"
]
addMeta :: ToMetaValue a => String -> a -> LP ()
addMeta field val = updateState $ \st ->
st{ stateMeta = addMetaField field val $ stateMeta st }
splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
setCaption :: Inlines -> LP Blocks
setCaption ils = do
updateState $ \st -> st{ stateCaption = Just ils }
return mempty
resetCaption :: LP ()
resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
authors :: LP ()
authors = try $ do
char '{'
let oneAuthor = mconcat <$>
many1 (notFollowedBy' (controlSeq "and") >>
(inline <|> mempty <$ blockCommand))
-- skip e.g. \vspace{10pt}
auths <- sepBy oneAuthor (controlSeq "and")
char '}'
addMeta "author" (map trimInlines auths)
section :: Attr -> Int -> LP Blocks
section (ident, classes, kvs) lvl = do
hasChapters <- stateHasChapters `fmap` getState
let lvl' = if hasChapters then lvl + 1 else lvl
skipopts
contents <- grouped inline
lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced)
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
inlineCommand :: LP Inlines
inlineCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
guard $ not $ isBlockCommand name
parseRaw <- getOption readerParseRaw
star <- option "" (string "*")
let name' = name ++ star
let raw = do
rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
let rawcommand = '\\' : name ++ star ++ snd rawargs
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
case M.lookup name' inlineCommands of
Just p -> p <|> raw
Nothing -> case M.lookup name inlineCommands of
Just p -> p <|> raw
Nothing -> raw
unlessParseRaw :: LP ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
inlineCommands :: M.Map String (LP Inlines)
inlineCommands = M.fromList $
[ ("emph", extractSpaces emph <$> tok)
, ("textit", extractSpaces emph <$> tok)
, ("textsl", extractSpaces emph <$> tok)
, ("textsc", extractSpaces smallcaps <$> tok)
, ("sout", extractSpaces strikeout <$> tok)
, ("textsuperscript", extractSpaces superscript <$> tok)
, ("textsubscript", extractSpaces subscript <$> tok)
, ("textbackslash", lit "\\")
, ("backslash", lit "\\")
, ("slash", lit "/")
, ("textbf", extractSpaces strong <$> tok)
, ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
, ("ldots", lit "")
, ("dots", lit "")
, ("mdots", lit "")
, ("sim", lit "~")
, ("label", unlessParseRaw >> (inBrackets <$> tok))
, ("ref", unlessParseRaw >> (inBrackets <$> tok))
, ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
, ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
, ("ensuremath", mathInline $ braced)
, ("P", lit "")
, ("S", lit "§")
, ("$", lit "$")
, ("%", lit "%")
, ("&", lit "&")
, ("#", lit "#")
, ("_", lit "_")
, ("{", lit "{")
, ("}", lit "}")
-- old TeX commands
, ("em", extractSpaces emph <$> inlines)
, ("it", extractSpaces emph <$> inlines)
, ("sl", extractSpaces emph <$> inlines)
, ("bf", extractSpaces strong <$> inlines)
, ("rm", inlines)
, ("itshape", extractSpaces emph <$> inlines)
, ("slshape", extractSpaces emph <$> inlines)
, ("scshape", extractSpaces smallcaps <$> inlines)
, ("bfseries", extractSpaces strong <$> inlines)
, ("/", pure mempty) -- italic correction
, ("aa", lit "å")
, ("AA", lit "Å")
, ("ss", lit "ß")
, ("o", lit "ø")
, ("O", lit "Ø")
, ("L", lit "Ł")
, ("l", lit "ł")
, ("ae", lit "æ")
, ("AE", lit "Æ")
, ("oe", lit "œ")
, ("OE", lit "Œ")
, ("pounds", lit "£")
, ("euro", lit "")
, ("copyright", lit "©")
, ("textasciicircum", lit "^")
, ("textasciitilde", lit "~")
, ("`", option (str "`") $ try $ tok >>= accent grave)
, ("'", option (str "'") $ try $ tok >>= accent acute)
, ("^", option (str "^") $ try $ tok >>= accent circ)
, ("~", option (str "~") $ try $ tok >>= accent tilde)
, ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
, (".", option (str ".") $ try $ tok >>= accent dot)
, ("=", option (str "=") $ try $ tok >>= accent macron)
, ("c", option (str "c") $ try $ tok >>= accent cedilla)
, ("v", option (str "v") $ try $ tok >>= accent hacek)
, ("u", option (str "u") $ try $ tok >>= accent breve)
, ("i", lit "i")
, ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
, (",", pure mempty)
, ("@", pure mempty)
, (" ", lit "\160")
, ("ps", pure $ str "PS." <> space)
, ("TeX", lit "TeX")
, ("LaTeX", lit "LaTeX")
, ("bar", lit "|")
, ("textless", lit "<")
, ("textgreater", lit ">")
, ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
, ("lstinline", doverb)
, ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
pure (link url "" (str url)))
, ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
tok >>= \lab ->
pure (link url "" lab))
, ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage)
, ("enquote", enquote)
, ("cite", citation "cite" AuthorInText False)
, ("citep", citation "citep" NormalCitation False)
, ("citep*", citation "citep*" NormalCitation False)
, ("citeal", citation "citeal" NormalCitation False)
, ("citealp", citation "citealp" NormalCitation False)
, ("citealp*", citation "citealp*" NormalCitation False)
, ("autocite", citation "autocite" NormalCitation False)
, ("footcite", inNote <$> citation "footcite" NormalCitation False)
, ("parencite", citation "parencite" NormalCitation False)
, ("supercite", citation "supercite" NormalCitation False)
, ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
, ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
, ("citeyear", citation "citeyear" SuppressAuthor False)
, ("autocite*", citation "autocite*" SuppressAuthor False)
, ("cite*", citation "cite*" SuppressAuthor False)
, ("parencite*", citation "parencite*" SuppressAuthor False)
, ("textcite", citation "textcite" AuthorInText False)
, ("citet", citation "citet" AuthorInText False)
, ("citet*", citation "citet*" AuthorInText False)
, ("citealt", citation "citealt" AuthorInText False)
, ("citealt*", citation "citealt*" AuthorInText False)
, ("textcites", citation "textcites" AuthorInText True)
, ("cites", citation "cites" NormalCitation True)
, ("autocites", citation "autocites" NormalCitation True)
, ("footcites", inNote <$> citation "footcites" NormalCitation True)
, ("parencites", citation "parencites" NormalCitation True)
, ("supercites", citation "supercites" NormalCitation True)
, ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
, ("Autocite", citation "Autocite" NormalCitation False)
, ("Footcite", citation "Footcite" NormalCitation False)
, ("Parencite", citation "Parencite" NormalCitation False)
, ("Supercite", citation "Supercite" NormalCitation False)
, ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
, ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
, ("Citeyear", citation "Citeyear" SuppressAuthor False)
, ("Autocite*", citation "Autocite*" SuppressAuthor False)
, ("Cite*", citation "Cite*" SuppressAuthor False)
, ("Parencite*", citation "Parencite*" SuppressAuthor False)
, ("Textcite", citation "Textcite" AuthorInText False)
, ("Textcites", citation "Textcites" AuthorInText True)
, ("Cites", citation "Cites" NormalCitation True)
, ("Autocites", citation "Autocites" NormalCitation True)
, ("Footcites", citation "Footcites" NormalCitation True)
, ("Parencites", citation "Parencites" NormalCitation True)
, ("Supercites", citation "Supercites" NormalCitation True)
, ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
, ("citetext", complexNatbibCitation NormalCitation)
, ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
complexNatbibCitation AuthorInText)
<|> citation "citeauthor" AuthorInText False)
, ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
addMeta "nocite"))
] ++ map ignoreInlines
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks:
[ "noindent", "index" ]
mkImage :: String -> LP Inlines
mkImage src = do
let alt = str "image"
case takeExtension src of
"" -> do
defaultExt <- getOption readerDefaultImageExtension
return $ image (addExtension src defaultExt) "" alt
_ -> return $ image src "" alt
inNote :: Inlines -> Inlines
inNote ils =
note $ para $ ils <> str "."
unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
where isEscapable c = c `elem` "#$%&~_^\\{}"
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
enquote :: LP Inlines
enquote = do
skipopts
context <- stateQuoteContext <$> getState
if context == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
doverb :: LP Inlines
doverb = do
marker <- anyChar
code <$> manyTill (satisfy (/='\n')) (char marker)
doLHSverb :: LP Inlines
doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
lit :: String -> LP Inlines
lit = pure . str
accent :: (Char -> String) -> Inlines -> LP Inlines
accent f ils =
case toList ils of
(Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys)
[] -> mzero
_ -> return ils
grave :: Char -> String
grave 'A' = "À"
grave 'E' = "È"
grave 'I' = "Ì"
grave 'O' = "Ò"
grave 'U' = "Ù"
grave 'a' = "à"
grave 'e' = "è"
grave 'i' = "ì"
grave 'o' = "ò"
grave 'u' = "ù"
grave c = [c]
acute :: Char -> String
acute 'A' = "Á"
acute 'E' = "É"
acute 'I' = "Í"
acute 'O' = "Ó"
acute 'U' = "Ú"
acute 'Y' = "Ý"
acute 'a' = "á"
acute 'e' = "é"
acute 'i' = "í"
acute 'o' = "ó"
acute 'u' = "ú"
acute 'y' = "ý"
acute 'C' = "Ć"
acute 'c' = "ć"
acute 'L' = "Ĺ"
acute 'l' = "ĺ"
acute 'N' = "Ń"
acute 'n' = "ń"
acute 'R' = "Ŕ"
acute 'r' = "ŕ"
acute 'S' = "Ś"
acute 's' = "ś"
acute 'Z' = "Ź"
acute 'z' = "ź"
acute c = [c]
circ :: Char -> String
circ 'A' = "Â"
circ 'E' = "Ê"
circ 'I' = "Î"
circ 'O' = "Ô"
circ 'U' = "Û"
circ 'a' = "â"
circ 'e' = "ê"
circ 'i' = "î"
circ 'o' = "ô"
circ 'u' = "û"
circ 'C' = "Ĉ"
circ 'c' = "ĉ"
circ 'G' = "Ĝ"
circ 'g' = "ĝ"
circ 'H' = "Ĥ"
circ 'h' = "ĥ"
circ 'J' = "Ĵ"
circ 'j' = "ĵ"
circ 'S' = "Ŝ"
circ 's' = "ŝ"
circ 'W' = "Ŵ"
circ 'w' = "ŵ"
circ 'Y' = "Ŷ"
circ 'y' = "ŷ"
circ c = [c]
tilde :: Char -> String
tilde 'A' = "Ã"
tilde 'a' = "ã"
tilde 'O' = "Õ"
tilde 'o' = "õ"
tilde 'I' = "Ĩ"
tilde 'i' = "ĩ"
tilde 'U' = "Ũ"
tilde 'u' = "ũ"
tilde 'N' = "Ñ"
tilde 'n' = "ñ"
tilde c = [c]
umlaut :: Char -> String
umlaut 'A' = "Ä"
umlaut 'E' = "Ë"
umlaut 'I' = "Ï"
umlaut 'O' = "Ö"
umlaut 'U' = "Ü"
umlaut 'a' = "ä"
umlaut 'e' = "ë"
umlaut 'i' = "ï"
umlaut 'o' = "ö"
umlaut 'u' = "ü"
umlaut c = [c]
dot :: Char -> String
dot 'C' = "Ċ"
dot 'c' = "ċ"
dot 'E' = "Ė"
dot 'e' = "ė"
dot 'G' = "Ġ"
dot 'g' = "ġ"
dot 'I' = "İ"
dot 'Z' = "Ż"
dot 'z' = "ż"
dot c = [c]
macron :: Char -> String
macron 'A' = "Ā"
macron 'E' = "Ē"
macron 'I' = "Ī"
macron 'O' = "Ō"
macron 'U' = "Ū"
macron 'a' = "ā"
macron 'e' = "ē"
macron 'i' = "ī"
macron 'o' = "ō"
macron 'u' = "ū"
macron c = [c]
cedilla :: Char -> String
cedilla 'c' = "ç"
cedilla 'C' = "Ç"
cedilla 's' = "ş"
cedilla 'S' = "Ş"
cedilla 't' = "ţ"
cedilla 'T' = "Ţ"
cedilla 'e' = "ȩ"
cedilla 'E' = "Ȩ"
cedilla 'h' = ""
cedilla 'H' = ""
cedilla 'o' = ""
cedilla 'O' = ""
cedilla c = [c]
hacek :: Char -> String
hacek 'A' = "Ǎ"
hacek 'a' = "ǎ"
hacek 'C' = "Č"
hacek 'c' = "č"
hacek 'D' = "Ď"
hacek 'd' = "ď"
hacek 'E' = "Ě"
hacek 'e' = "ě"
hacek 'G' = "Ǧ"
hacek 'g' = "ǧ"
hacek 'H' = "Ȟ"
hacek 'h' = "ȟ"
hacek 'I' = "Ǐ"
hacek 'i' = "ǐ"
hacek 'j' = "ǰ"
hacek 'K' = "Ǩ"
hacek 'k' = "ǩ"
hacek 'L' = "Ľ"
hacek 'l' = "ľ"
hacek 'N' = "Ň"
hacek 'n' = "ň"
hacek 'O' = "Ǒ"
hacek 'o' = "ǒ"
hacek 'R' = "Ř"
hacek 'r' = "ř"
hacek 'S' = "Š"
hacek 's' = "š"
hacek 'T' = "Ť"
hacek 't' = "ť"
hacek 'U' = "Ǔ"
hacek 'u' = "ǔ"
hacek 'Z' = "Ž"
hacek 'z' = "ž"
hacek c = [c]
breve :: Char -> String
breve 'A' = "Ă"
breve 'a' = "ă"
breve 'E' = "Ĕ"
breve 'e' = "ĕ"
breve 'G' = "Ğ"
breve 'g' = "ğ"
breve 'I' = "Ĭ"
breve 'i' = "ĭ"
breve 'O' = "Ŏ"
breve 'o' = "ŏ"
breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
tok :: LP Inlines
tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
opt :: LP Inlines
opt = bracketed inline <* optional sp
skipopts :: LP ()
skipopts = skipMany opt
inlineText :: LP Inlines
inlineText = str <$> many1 inlineChar
inlineChar :: LP Char
inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n"
environment :: LP Blocks
environment = do
controlSeq "begin"
name <- braced
case M.lookup name environments of
Just p -> p <|> rawEnv name
Nothing -> rawEnv name
rawEnv :: String -> LP Blocks
rawEnv name = do
let addBegin x = "\\begin{" ++ name ++ "}" ++ x
parseRaw <- getOption readerParseRaw
if parseRaw
then (rawBlock "latex" . addBegin) <$>
(withRaw (env name blocks) >>= applyMacros' . snd)
else env name blocks
----
type IncludeParser = ParserT [Char] [String] IO String
-- | Replace "include" commands with file contents.
handleIncludes :: String -> IO String
handleIncludes s = do
res <- runParserT includeParser' [] "input" s
case res of
Right s' -> return s'
Left e -> error $ show e
includeParser' :: IncludeParser
includeParser' =
concat <$> many (comment' <|> escaped' <|> blob' <|> include'
<|> startMarker' <|> endMarker'
<|> verbCmd' <|> verbatimEnv' <|> backslash')
comment' :: IncludeParser
comment' = do
char '%'
xs <- manyTill anyChar newline
return ('%':xs ++ "\n")
escaped' :: IncludeParser
escaped' = try $ string "\\%" <|> string "\\\\"
verbCmd' :: IncludeParser
verbCmd' = fmap snd <$>
withRaw $ try $ do
string "\\verb"
c <- anyChar
manyTill anyChar (char c)
verbatimEnv' :: IncludeParser
verbatimEnv' = fmap snd <$>
withRaw $ try $ do
string "\\begin"
name <- braced'
guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
"minted", "alltt"]
manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
blob' :: IncludeParser
blob' = try $ many1 (noneOf "\\%")
backslash' :: IncludeParser
backslash' = string "\\"
braced' :: IncludeParser
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
include' :: IncludeParser
include' = do
fs' <- try $ do
char '\\'
name <- try (string "include")
<|> try (string "input")
<|> string "usepackage"
-- skip options
skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
then map (flip replaceExtension ".sty") fs
else map (flip replaceExtension ".tex") fs
pos <- getPosition
containers <- getState
let fn = case containers of
(f':_) -> f'
[] -> "input"
-- now process each include file in order...
rest <- getInput
results' <- forM fs' (\f -> do
when (f `elem` containers) $
fail "Include file loop!"
contents <- lift $ readTeXFile f
return $ "\\PandocStartInclude{" ++ f ++ "}" ++
contents ++ "\\PandocEndInclude{" ++
fn ++ "}{" ++ show (sourceLine pos) ++ "}{"
++ show (sourceColumn pos) ++ "}")
setInput $ concat results' ++ rest
return ""
startMarker' :: IncludeParser
startMarker' = try $ do
string "\\PandocStartInclude"
fn <- braced'
updateState (fn:)
setPosition $ newPos fn 1 1
return $ "\\PandocStartInclude{" ++ fn ++ "}"
endMarker' :: IncludeParser
endMarker' = try $ do
string "\\PandocEndInclude"
fn <- braced'
ln <- braced'
co <- braced'
updateState tail
setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++
co ++ "}"
readTeXFile :: FilePath -> IO String
readTeXFile f = do
texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) ->
return "."
let ds = splitBy (==':') texinputs
readFileFromDirs ds f
readFileFromDirs :: [FilePath] -> FilePath -> IO String
readFileFromDirs [] _ = return ""
readFileFromDirs (d:ds) f =
E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) ->
readFileFromDirs ds f
----
keyval :: LP (String, String)
keyval = try $ do
key <- many1 alphaNum
val <- option "" $ char '=' >> many1 alphaNum
skipMany spaceChar
optional (char ',')
skipMany spaceChar
return (key, val)
keyvals :: LP [(String, String)]
keyvals = try $ char '[' *> manyTill keyval (char ']')
alltt :: String -> LP Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
concat $ intersperse "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
rawLaTeXBlock :: Parser [Char] ParserState String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
rawLaTeXInline :: Parser [Char] ParserState Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
addImageCaption :: Blocks -> LP Blocks
addImageCaption = walkM go
where go (Image alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
case mbcapt of
Just ils -> return (Image (toList ils) (src, "fig:"))
Nothing -> return (Image alt (src,tit))
go x = return x
addTableCaption :: Blocks -> LP Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
case mbcapt of
Just ils -> return (Table (toList ils) als ws hs rs)
Nothing -> return (Table c als ws hs rs)
go x = return x
environments :: M.Map String (LP Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
, ("letter", env "letter" letter_contents)
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
, ("tabular", env "tabular" simpTable)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
, ("enumerate", ordered_list)
, ("alltt", alltt =<< verbEnv "alltt")
, ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
, ("verbatim", codeBlock <$> (verbEnv "verbatim"))
, ("Verbatim", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
let attr = ("",classes,kvs)
codeBlockWith attr <$> (verbEnv "Verbatim"))
, ("lstlisting", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
++ maybe [] (:[]) (lookup "language" options
>>= fromListingsLanguage)
let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
codeBlockWith attr <$> (verbEnv "lstlisting"))
, ("minted", do options <- option [] keyvals
lang <- grouped (many1 $ satisfy (/='}'))
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ lang | not (null lang) ] ++
[ "numberLines" |
lookup "linenos" options == Just "true" ]
let attr = ("",classes,kvs)
codeBlockWith attr <$> (verbEnv "minted"))
, ("obeylines", parseFromString
(para . trimInlines . mconcat <$> many inline) =<<
intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
, ("displaymath", mathEnv Nothing "displaymath")
, ("equation", mathEnv Nothing "equation")
, ("equation*", mathEnv Nothing "equation*")
, ("gather", mathEnv (Just "gathered") "gather")
, ("gather*", mathEnv (Just "gathered") "gather*")
, ("multline", mathEnv (Just "gathered") "multline")
, ("multline*", mathEnv (Just "gathered") "multline*")
, ("eqnarray", mathEnv (Just "aligned") "eqnarray")
, ("eqnarray*", mathEnv (Just "aligned") "eqnarray*")
, ("align", mathEnv (Just "aligned") "align")
, ("align*", mathEnv (Just "aligned") "align*")
, ("alignat", mathEnv (Just "aligned") "alignat")
, ("alignat*", mathEnv (Just "aligned") "alignat*")
]
letter_contents :: LP Blocks
letter_contents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
let addr = case lookupMeta "address" (stateMeta st) of
Just (MetaBlocks [Plain xs]) ->
para $ trimInlines $ fromList xs
_ -> mempty
return $ addr <> bs -- sig added by \closing
closing :: LP Blocks
closing = do
contents <- tok
st <- getState
let extractInlines (MetaBlocks [Plain ys]) = ys
extractInlines (MetaBlocks [Para ys ]) = ys
extractInlines _ = []
let sigs = case lookupMeta "author" (stateMeta st) of
Just (MetaList xs) ->
para $ trimInlines $ fromList $
intercalate [LineBreak] $ map extractInlines xs
_ -> mempty
return $ para (trimInlines contents) <> sigs
item :: LP Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
loose_item :: LP Blocks
loose_item = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
else return mempty
descItem :: LP (Inlines, [Blocks])
descItem = do
blocks -- skip blocks before item
controlSeq "item"
optional sp
ils <- opt
bs <- blocks
return (ils, [bs])
env :: String -> LP a -> LP a
env name p = p <*
(try (controlSeq "end" *> braced >>= guard . (== name))
<?> ("\\end{" ++ name ++ "}"))
listenv :: String -> LP a -> LP a
listenv name p = try $ do
oldCtx <- stateParserContext `fmap` getState
updateState $ \st -> st{ stateParserContext = ListItemState }
res <- env name p
updateState $ \st -> st{ stateParserContext = oldCtx }
return res
mathEnv :: Maybe String -> String -> LP Blocks
mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
where inner x = case innerEnv of
Nothing -> x
Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
"\\end{" ++ y ++ "}"
verbEnv :: String -> LP String
verbEnv name = do
skipopts
optional blankline
let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
ordered_list :: LP Blocks
ordered_list = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
spaces
start <- option 1 $ try $ do controlSeq "setcounter"
grouped (string "enum" *> many1 (oneOf "iv"))
optional sp
num <- grouped (many1 digit)
spaces
return $ (read num + 1 :: Int)
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
paragraph :: LP Blocks
paragraph = do
x <- trimInlines . mconcat <$> many1 inline
if x == mempty
then return mempty
else return $ para x
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
preambleBlock = (void comment)
<|> (void sp)
<|> (void blanklines)
<|> (void macro)
<|> (void blockCommand)
<|> (void anyControlSeq)
<|> (void braced)
<|> (void anyChar)
-------
-- citations
addPrefix :: [Inline] -> [Citation] -> [Citation]
addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
addPrefix _ _ = []
addSuffix :: [Inline] -> [Citation] -> [Citation]
addSuffix s ks@(_:_) =
let k = last ks
in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
addSuffix _ _ = []
simpleCiteArgs :: LP [Citation]
simpleCiteArgs = try $ do
first <- optionMaybe $ toList <$> opt
second <- optionMaybe $ toList <$> opt
char '{'
optional sp
keys <- manyTill citationLabel (char '}')
let (pre, suf) = case (first , second ) of
(Just s , Nothing) -> (mempty, s )
(Just s , Just t ) -> (s , t )
_ -> (mempty, mempty)
conv k = Citation { citationId = k
, citationPrefix = []
, citationSuffix = []
, citationMode = NormalCitation
, citationHash = 0
, citationNoteNum = 0
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
citationLabel :: LP String
citationLabel = optional sp *>
(many1 (satisfy isBibtexKeyChar)
<* optional sp
<* optional (char ',')
<* optional sp)
where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
cites :: CitationMode -> Bool -> LP [Citation]
cites mode multi = try $ do
cits <- if multi
then many1 simpleCiteArgs
else count 1 simpleCiteArgs
let cs = concat cits
return $ case mode of
AuthorInText -> case cs of
(c:rest) -> c {citationMode = mode} : rest
[] -> []
_ -> map (\a -> a {citationMode = mode}) cs
citation :: String -> CitationMode -> Bool -> LP Inlines
citation name mode multi = do
(c,raw) <- withRaw $ cites mode multi
return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
complexNatbibCitation :: CitationMode -> LP Inlines
complexNatbibCitation mode = try $ do
let ils = (toList . trimInlines . mconcat) <$>
many (notFollowedBy (oneOf "\\};") >> inline)
let parseOne = try $ do
skipSpaces
pref <- ils
cit' <- inline -- expect a citation
let citlist = toList cit'
cits' <- case citlist of
[Cite cs _] -> return cs
_ -> mzero
suff <- ils
skipSpaces
optional $ char ';'
return $ addPrefix pref $ addSuffix suff $ cits'
(c:cits, raw) <- withRaw $ grouped parseOne
return $ cite (c{ citationMode = mode }:cits)
(rawInline "latex" $ "\\citetext" ++ raw)
-- tables
parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}")
maybeBar
let cAlign = AlignCenter <$ char 'c'
let lAlign = AlignLeft <$ char 'l'
let rAlign = AlignRight <$ char 'r'
let parAlign = AlignLeft <$ (char 'p' >> braced)
let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
aligns' <- sepEndBy alignChar maybeBar
spaces
char '}'
spaces
return aligns'
hline :: LP ()
hline = () <$ (try $ spaces >> controlSeq "hline")
lbreak :: LP ()
lbreak = () <$ (try $ spaces *> controlSeq "\\")
amp :: LP ()
amp = () <$ (try $ spaces *> char '&')
parseTableRow :: Int -- ^ number of columns
-> LP [Blocks]
parseTableRow cols = try $ do
let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline
let tableCell = (plain . trimInlines . mconcat) <$> many tableCellInline
cells' <- sepBy1 tableCell amp
let numcells = length cells'
guard $ numcells <= cols && numcells >= 1
guard $ cells' /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
let cells'' = cells' ++ replicate (cols - numcells) mempty
spaces
return cells''
simpTable :: LP Blocks
simpTable = try $ do
spaces
aligns <- parseAligns
let cols = length aligns
optional hline
header' <- option [] $ try (parseTableRow cols <* lbreak <* hline)
rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline)
spaces
skipMany (comment *> spaces)
let header'' = if null header'
then replicate cols mempty
else header'
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns (repeat 0)) header'' rows
startInclude :: LP Blocks
startInclude = do
fn <- braced
setPosition $ newPos fn 1 1
return mempty
endInclude :: LP Blocks
endInclude = do
fn <- braced
ln <- braced
co <- braced
setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
return mempty