LaTeX reader: Proper include file processing.

* Removed handleIncludes from LaTeX reader [API change]. * Now the ordinary LaTeX reader handles includes in a way that is appropriate to the monad it is run in.
2016-12-03 21:55:31 +01:00 · 2016-12-03 21:55:31 +01:00 · 1a0d93a1d3
commit 1a0d93a1d3
parent dc1bbaf58d
3 changed files with 36 additions and 122 deletions
--- a/pandoc.hs
+++ b/pandoc.hs
@ -34,7 +34,6 @@ import Text.Pandoc
 import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.Readers.LaTeX (handleIncludes)
 import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
                            safeRead, headerShift, normalize, err, warn,
                            openURL )
@ -1410,11 +1409,6 @@ convertWithOpts opts args = do
                                 then 0
                                 else tabStop)
  let handleIncludes' :: String -> IO (Either PandocError String)
      handleIncludes' = if readerName' `elem`  ["latex", "latex+lhs"]
                               then handleIncludes
                               else return . Right
  let runIO' = runIOorExplode .
               (if quiet
                   then id
@ -1424,12 +1418,8 @@ convertWithOpts opts args = do
      sourceToDoc sources' = 
        case reader of
          StringReader r-> do
-            srcs <- convertTabs . intercalate "\n" <$> readSources sources'
+            doc <- convertTabs . intercalate "\n" <$> readSources sources'
-            doc <- handleIncludes' srcs
+            runIO' $ withMediaBag $ r readerOpts doc
            case doc of
              Right doc' -> runIO' $ withMediaBag
                                   $ r readerOpts doc'
              Left e -> error $ show e
          ByteStringReader r -> readFiles sources' >>=
                                (\bs -> runIO' $ withMediaBag
                                               $ r readerOpts bs)
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@ -929,6 +929,7 @@ data ParserState = ParserState
      -- roles), 3) Additional classes (rest of Attr is unused)).
      stateCaption         :: Maybe Inlines, -- ^ Caption in current environment
      stateInHtmlBlock     :: Maybe String,  -- ^ Tag type of HTML block being parsed
      stateContainers      :: [String],      -- ^ parent include files
      stateMarkdownAttribute :: Bool         -- ^ True if in markdown=1 context
    }
@ -1024,6 +1025,7 @@ defaultParserState =
                  stateRstCustomRoles  = M.empty,
                  stateCaption         = Nothing,
                  stateInHtmlBlock     = Nothing,
                  stateContainers      = [],
                  stateMarkdownAttribute = False
                  }
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@ -32,7 +32,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                   rawLaTeXInline,
                                   rawLaTeXBlock,
                                   inlineCommand,
                                   handleIncludes
                                 ) where
 import Text.Pandoc.Definition
@ -48,16 +47,15 @@ import Control.Monad
 import Text.Pandoc.Builder
 import Control.Applicative ((<|>), many, optional)
 import Data.Maybe (fromMaybe, maybeToList)
 import System.Environment (getEnv)
 import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
 import Data.List (intercalate)
 import qualified Data.Map as M
 import qualified Control.Exception as E
 import Text.Pandoc.Highlighting (fromListingsLanguage)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Error
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
-import Text.Pandoc.Class (PandocMonad, PandocPure)
+import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileLazy,
                          warning)
 -- | Parse LaTeX from string and return 'Pandoc' document.
 readLaTeX :: PandocMonad m
@ -258,6 +256,7 @@ block :: PandocMonad m => LP m Blocks
 block = (mempty <$ comment)
    <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
    <|> environment
    <|> include
    <|> macro
    <|> blockCommand
    <|> paragraph
@ -353,8 +352,6 @@ blockCommands = M.fromList $
  , ("documentclass", skipopts *> braced *> preamble)
  , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
  , ("caption", skipopts *> setCaption)
  , ("PandocStartInclude", startInclude)
  , ("PandocEndInclude", endInclude)
  , ("bibliography", mempty <$ (skipopts *> braced >>=
                                addMeta "bibliography" . splitBibs))
  , ("addbibresource", mempty <$ (skipopts *> braced >>=
@ -935,50 +932,7 @@ rawEnv name = do
 ----
-type IncludeParser = ParserT String [String] IO String
+braced' :: PandocMonad m => LP m String
 -- | Replace "include" commands with file contents.
 handleIncludes :: String -> IO (Either PandocError String)
 handleIncludes s =  mapLeft (PandocParsecError s) <$> runParserT includeParser' [] "input" s
 includeParser' :: IncludeParser
 includeParser' =
  concat <$> many (comment' <|> escaped' <|> blob' <|> include'
                   <|> startMarker' <|> endMarker'
                   <|> verbCmd' <|> verbatimEnv' <|> backslash')
 comment' :: IncludeParser
 comment' = do
  char '%'
  xs <- manyTill anyChar newline
  return ('%':xs ++ "\n")
 escaped' :: IncludeParser
 escaped' = try $ string "\\%" <|> string "\\\\"
 verbCmd' :: IncludeParser
 verbCmd' = fmap snd <$>
  withRaw $ try $ do
             string "\\verb"
             c <- anyChar
             manyTill anyChar (char c)
 verbatimEnv' :: IncludeParser
 verbatimEnv' = fmap snd <$>
  withRaw $ try $ do
             string "\\begin"
             name <- braced'
             guard $ name `elem` ["verbatim", "Verbatim", "BVerbatim",
                                  "lstlisting", "minted", "alltt", "comment"]
             manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
 blob' :: IncludeParser
 blob' = try $ many1 (noneOf "\\%")
 backslash' :: IncludeParser
 backslash' = string "\\"
 braced' :: IncludeParser
 braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
 maybeAddExtension :: String -> FilePath -> FilePath
@ -987,8 +941,8 @@ maybeAddExtension ext fp =
     then addExtension fp ext
     else fp
-include' :: IncludeParser
+include :: PandocMonad m => LP m Blocks
-include' = do
+include = do
  fs' <- try $ do
              char '\\'
              name <- try (string "include")
@ -1000,55 +954,37 @@ include' = do
              return $ if name == "usepackage"
                          then map (maybeAddExtension ".sty") fs
                          else map (maybeAddExtension ".tex") fs
-  pos <- getPosition
+  oldPos <- getPosition
-  containers <- getState
+  oldInput <- getInput
  let fn = case containers of
                (f':_) -> f'
                []     -> "input"
  -- now process each include file in order...
-  rest <- getInput
+  mconcat <$> forM fs' (\f -> do
-  results' <- forM fs' (\f -> do
+    containers <- stateContainers <$> getState
    when (f `elem` containers) $
-      fail "Include file loop!"
+      throwError $ PandocParseError $ "Include file loop in " ++ f
    updateState $ \s -> s{ stateContainers = f : stateContainers s }
    contents <- lift $ readTeXFile f
-    return $ "\\PandocStartInclude{" ++ f ++ "}" ++
+    setPosition $ newPos f 1 1
-             contents ++ "\\PandocEndInclude{" ++
+    setInput contents
-             fn ++ "}{" ++ show (sourceLine pos) ++ "}{"
+    bs <- blocks
-             ++ show (sourceColumn pos) ++ "}")
+    setInput oldInput
-  setInput $ concat results' ++ rest
+    setPosition oldPos
-  return ""
+    updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
    return bs)
-startMarker' :: IncludeParser
+readTeXFile :: PandocMonad m => FilePath -> m String
 startMarker' = try $ do
  string "\\PandocStartInclude"
  fn <- braced'
  updateState (fn:)
  setPosition $ newPos fn 1 1
  return $ "\\PandocStartInclude{" ++ fn ++ "}"
 endMarker' :: IncludeParser
 endMarker' = try $ do
  string "\\PandocEndInclude"
  fn <- braced'
  ln <- braced'
  co <- braced'
  updateState tail
  setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
  return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++
               co ++ "}"
 readTeXFile :: FilePath -> IO String
 readTeXFile f = do
-  texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) ->
+  texinputs <- fromMaybe "." <$> lookupEnv "TEXINPUTS"
-                   return "."
+  readFileFromDirs (splitBy (==':') texinputs) f
  let ds = splitBy (==':') texinputs
  readFileFromDirs ds f
-readFileFromDirs :: [FilePath] -> FilePath -> IO String
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m String
-readFileFromDirs [] _ = return ""
+readFileFromDirs ds f =
-readFileFromDirs (d:ds) f =
+  mconcat <$> mapM (\d -> readFileLazy' (d </> f)) ds
-  E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) ->
+
-    readFileFromDirs ds f
+readFileLazy' :: PandocMonad m => FilePath -> m String
 readFileLazy' f = catchError (UTF8.toStringLazy <$> readFileLazy f) $
  \(e :: PandocError) -> do
    warning $ "Could not load include file " ++ f ++ ", skipping.\n" ++ show e
    return ""
 ----
@ -1449,20 +1385,6 @@ simpTable hasWidthParameter = try $ do
  lookAhead $ controlSeq "end" -- make sure we're at end
  return $ table mempty (zip aligns (repeat 0)) header'' rows
 startInclude :: PandocMonad m => LP m Blocks
 startInclude = do
  fn <- braced
  setPosition $ newPos fn 1 1
  return mempty
 endInclude :: PandocMonad m => LP m Blocks
 endInclude = do
  fn <- braced
  ln <- braced
  co <- braced
  setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
  return mempty
 removeDoubleQuotes :: String -> String
 removeDoubleQuotes ('"':xs) =
  case reverse xs of