LaTeX reader: Proper include file processing.

* Removed handleIncludes from LaTeX reader [API change].
* Now the ordinary LaTeX reader handles includes in a way
  that is appropriate to the monad it is run in.
This commit is contained in:
John MacFarlane 2016-12-03 21:55:31 +01:00
parent dc1bbaf58d
commit 1a0d93a1d3
3 changed files with 36 additions and 122 deletions

View file

@ -34,7 +34,6 @@ import Text.Pandoc
import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.PDF (makePDF) import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walk)
import Text.Pandoc.Readers.LaTeX (handleIncludes)
import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
safeRead, headerShift, normalize, err, warn, safeRead, headerShift, normalize, err, warn,
openURL ) openURL )
@ -1410,11 +1409,6 @@ convertWithOpts opts args = do
then 0 then 0
else tabStop) else tabStop)
let handleIncludes' :: String -> IO (Either PandocError String)
handleIncludes' = if readerName' `elem` ["latex", "latex+lhs"]
then handleIncludes
else return . Right
let runIO' = runIOorExplode . let runIO' = runIOorExplode .
(if quiet (if quiet
then id then id
@ -1424,12 +1418,8 @@ convertWithOpts opts args = do
sourceToDoc sources' = sourceToDoc sources' =
case reader of case reader of
StringReader r-> do StringReader r-> do
srcs <- convertTabs . intercalate "\n" <$> readSources sources' doc <- convertTabs . intercalate "\n" <$> readSources sources'
doc <- handleIncludes' srcs runIO' $ withMediaBag $ r readerOpts doc
case doc of
Right doc' -> runIO' $ withMediaBag
$ r readerOpts doc'
Left e -> error $ show e
ByteStringReader r -> readFiles sources' >>= ByteStringReader r -> readFiles sources' >>=
(\bs -> runIO' $ withMediaBag (\bs -> runIO' $ withMediaBag
$ r readerOpts bs) $ r readerOpts bs)

View file

@ -929,6 +929,7 @@ data ParserState = ParserState
-- roles), 3) Additional classes (rest of Attr is unused)). -- roles), 3) Additional classes (rest of Attr is unused)).
stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateCaption :: Maybe Inlines, -- ^ Caption in current environment
stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
stateContainers :: [String], -- ^ parent include files
stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context
} }
@ -1024,6 +1025,7 @@ defaultParserState =
stateRstCustomRoles = M.empty, stateRstCustomRoles = M.empty,
stateCaption = Nothing, stateCaption = Nothing,
stateInHtmlBlock = Nothing, stateInHtmlBlock = Nothing,
stateContainers = [],
stateMarkdownAttribute = False stateMarkdownAttribute = False
} }

View file

@ -32,7 +32,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
rawLaTeXInline, rawLaTeXInline,
rawLaTeXBlock, rawLaTeXBlock,
inlineCommand, inlineCommand,
handleIncludes
) where ) where
import Text.Pandoc.Definition import Text.Pandoc.Definition
@ -48,16 +47,15 @@ import Control.Monad
import Text.Pandoc.Builder import Text.Pandoc.Builder
import Control.Applicative ((<|>), many, optional) import Control.Applicative ((<|>), many, optional)
import Data.Maybe (fromMaybe, maybeToList) import Data.Maybe (fromMaybe, maybeToList)
import System.Environment (getEnv)
import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
import Data.List (intercalate) import Data.List (intercalate)
import qualified Data.Map as M import qualified Data.Map as M
import qualified Control.Exception as E
import Text.Pandoc.Highlighting (fromListingsLanguage) import Text.Pandoc.Highlighting (fromListingsLanguage)
import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Error import Text.Pandoc.Error
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError, catchError)
import Text.Pandoc.Class (PandocMonad, PandocPure) import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileLazy,
warning)
-- | Parse LaTeX from string and return 'Pandoc' document. -- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m readLaTeX :: PandocMonad m
@ -258,6 +256,7 @@ block :: PandocMonad m => LP m Blocks
block = (mempty <$ comment) block = (mempty <$ comment)
<|> (mempty <$ ((spaceChar <|> newline) *> spaces)) <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
<|> environment <|> environment
<|> include
<|> macro <|> macro
<|> blockCommand <|> blockCommand
<|> paragraph <|> paragraph
@ -353,8 +352,6 @@ blockCommands = M.fromList $
, ("documentclass", skipopts *> braced *> preamble) , ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> setCaption) , ("caption", skipopts *> setCaption)
, ("PandocStartInclude", startInclude)
, ("PandocEndInclude", endInclude)
, ("bibliography", mempty <$ (skipopts *> braced >>= , ("bibliography", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs)) addMeta "bibliography" . splitBibs))
, ("addbibresource", mempty <$ (skipopts *> braced >>= , ("addbibresource", mempty <$ (skipopts *> braced >>=
@ -935,50 +932,7 @@ rawEnv name = do
---- ----
type IncludeParser = ParserT String [String] IO String braced' :: PandocMonad m => LP m String
-- | Replace "include" commands with file contents.
handleIncludes :: String -> IO (Either PandocError String)
handleIncludes s = mapLeft (PandocParsecError s) <$> runParserT includeParser' [] "input" s
includeParser' :: IncludeParser
includeParser' =
concat <$> many (comment' <|> escaped' <|> blob' <|> include'
<|> startMarker' <|> endMarker'
<|> verbCmd' <|> verbatimEnv' <|> backslash')
comment' :: IncludeParser
comment' = do
char '%'
xs <- manyTill anyChar newline
return ('%':xs ++ "\n")
escaped' :: IncludeParser
escaped' = try $ string "\\%" <|> string "\\\\"
verbCmd' :: IncludeParser
verbCmd' = fmap snd <$>
withRaw $ try $ do
string "\\verb"
c <- anyChar
manyTill anyChar (char c)
verbatimEnv' :: IncludeParser
verbatimEnv' = fmap snd <$>
withRaw $ try $ do
string "\\begin"
name <- braced'
guard $ name `elem` ["verbatim", "Verbatim", "BVerbatim",
"lstlisting", "minted", "alltt", "comment"]
manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
blob' :: IncludeParser
blob' = try $ many1 (noneOf "\\%")
backslash' :: IncludeParser
backslash' = string "\\"
braced' :: IncludeParser
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
maybeAddExtension :: String -> FilePath -> FilePath maybeAddExtension :: String -> FilePath -> FilePath
@ -987,8 +941,8 @@ maybeAddExtension ext fp =
then addExtension fp ext then addExtension fp ext
else fp else fp
include' :: IncludeParser include :: PandocMonad m => LP m Blocks
include' = do include = do
fs' <- try $ do fs' <- try $ do
char '\\' char '\\'
name <- try (string "include") name <- try (string "include")
@ -1000,55 +954,37 @@ include' = do
return $ if name == "usepackage" return $ if name == "usepackage"
then map (maybeAddExtension ".sty") fs then map (maybeAddExtension ".sty") fs
else map (maybeAddExtension ".tex") fs else map (maybeAddExtension ".tex") fs
pos <- getPosition oldPos <- getPosition
containers <- getState oldInput <- getInput
let fn = case containers of
(f':_) -> f'
[] -> "input"
-- now process each include file in order... -- now process each include file in order...
rest <- getInput mconcat <$> forM fs' (\f -> do
results' <- forM fs' (\f -> do containers <- stateContainers <$> getState
when (f `elem` containers) $ when (f `elem` containers) $
fail "Include file loop!" throwError $ PandocParseError $ "Include file loop in " ++ f
updateState $ \s -> s{ stateContainers = f : stateContainers s }
contents <- lift $ readTeXFile f contents <- lift $ readTeXFile f
return $ "\\PandocStartInclude{" ++ f ++ "}" ++ setPosition $ newPos f 1 1
contents ++ "\\PandocEndInclude{" ++ setInput contents
fn ++ "}{" ++ show (sourceLine pos) ++ "}{" bs <- blocks
++ show (sourceColumn pos) ++ "}") setInput oldInput
setInput $ concat results' ++ rest setPosition oldPos
return "" updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
return bs)
startMarker' :: IncludeParser readTeXFile :: PandocMonad m => FilePath -> m String
startMarker' = try $ do
string "\\PandocStartInclude"
fn <- braced'
updateState (fn:)
setPosition $ newPos fn 1 1
return $ "\\PandocStartInclude{" ++ fn ++ "}"
endMarker' :: IncludeParser
endMarker' = try $ do
string "\\PandocEndInclude"
fn <- braced'
ln <- braced'
co <- braced'
updateState tail
setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++
co ++ "}"
readTeXFile :: FilePath -> IO String
readTeXFile f = do readTeXFile f = do
texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) -> texinputs <- fromMaybe "." <$> lookupEnv "TEXINPUTS"
return "." readFileFromDirs (splitBy (==':') texinputs) f
let ds = splitBy (==':') texinputs
readFileFromDirs ds f
readFileFromDirs :: [FilePath] -> FilePath -> IO String readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m String
readFileFromDirs [] _ = return "" readFileFromDirs ds f =
readFileFromDirs (d:ds) f = mconcat <$> mapM (\d -> readFileLazy' (d </> f)) ds
E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) ->
readFileFromDirs ds f readFileLazy' :: PandocMonad m => FilePath -> m String
readFileLazy' f = catchError (UTF8.toStringLazy <$> readFileLazy f) $
\(e :: PandocError) -> do
warning $ "Could not load include file " ++ f ++ ", skipping.\n" ++ show e
return ""
---- ----
@ -1449,20 +1385,6 @@ simpTable hasWidthParameter = try $ do
lookAhead $ controlSeq "end" -- make sure we're at end lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns (repeat 0)) header'' rows return $ table mempty (zip aligns (repeat 0)) header'' rows
startInclude :: PandocMonad m => LP m Blocks
startInclude = do
fn <- braced
setPosition $ newPos fn 1 1
return mempty
endInclude :: PandocMonad m => LP m Blocks
endInclude = do
fn <- braced
ln <- braced
co <- braced
setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
return mempty
removeDoubleQuotes :: String -> String removeDoubleQuotes :: String -> String
removeDoubleQuotes ('"':xs) = removeDoubleQuotes ('"':xs) =
case reverse xs of case reverse xs of