Improved handling of include files in LaTeX reader.
Previously `\include` wouldn't work if the included file contained, e.g., a begin without a matching end. We've changed the Tok type so that it stores a full SourcePos, rather than just a line and column. So tokens keeep track of the file they came from. This allows us to use a simpler method for includes, which doesn't require parsing the included document as a whole. Closes #3971.
This commit is contained in:
parent
9cf9a64923
commit
c40857b389
5 changed files with 93 additions and 61 deletions
|
@ -148,6 +148,7 @@ extra-source-files:
|
|||
test/command/3510-subdoc.org
|
||||
test/command/3510-export.latex
|
||||
test/command/3510-src.hs
|
||||
test/command/3971b.tex
|
||||
test/docbook-reader.docbook
|
||||
test/docbook-xref.docbook
|
||||
test/html-reader.html
|
||||
|
|
|
@ -71,7 +71,9 @@ import Text.Pandoc.Shared
|
|||
import Text.Pandoc.Readers.LaTeX.Types (Macro(..), ExpansionPoint(..), Tok(..),
|
||||
TokType(..))
|
||||
import Text.Pandoc.Walk
|
||||
import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop))
|
||||
import Text.Pandoc.Error
|
||||
(PandocError(PandocParsecError, PandocParseError, PandocMacroLoop))
|
||||
import Text.Parsec.Pos
|
||||
|
||||
-- for debugging:
|
||||
-- import Text.Pandoc.Extensions (getDefaultExtensions)
|
||||
|
@ -85,7 +87,7 @@ readLaTeX :: PandocMonad m
|
|||
-> m Pandoc
|
||||
readLaTeX opts ltx = do
|
||||
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
|
||||
(tokenize (crFilter ltx))
|
||||
(tokenize "source" (crFilter ltx))
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError (T.unpack ltx) e
|
||||
|
@ -127,7 +129,7 @@ resolveRefs _ x = x
|
|||
-- res <- runIOorExplode (runParserT p defaultLaTeXState{
|
||||
-- sOptions = def{ readerExtensions =
|
||||
-- enableExtension Ext_raw_tex $
|
||||
-- getDefaultExtensions "latex" }} "source" (tokenize t))
|
||||
-- getDefaultExtensions "latex" }} "source" (tokenize "source" t))
|
||||
-- case res of
|
||||
-- Left e -> error (show e)
|
||||
-- Right r -> return r
|
||||
|
@ -238,7 +240,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
|
|||
=> LP m a -> ParserT String s m String
|
||||
rawLaTeXParser parser = do
|
||||
inp <- getInput
|
||||
let toks = tokenize $ T.pack inp
|
||||
let toks = tokenize "source" $ T.pack inp
|
||||
pstate <- getState
|
||||
let lstate = def{ sOptions = extractReaderOptions pstate }
|
||||
res <- lift $ runParserT ((,) <$> try (snd <$> withRaw parser) <*> getState)
|
||||
|
@ -257,7 +259,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
|
|||
pstate <- getState
|
||||
let lstate = def{ sOptions = extractReaderOptions pstate
|
||||
, sMacros = extractMacros pstate }
|
||||
res <- runParserT retokenize lstate "math" (tokenize (T.pack s))
|
||||
res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
|
||||
case res of
|
||||
Left e -> fail (show e)
|
||||
Right s' -> return s'
|
||||
|
@ -278,7 +280,7 @@ inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
|
|||
inlineCommand = do
|
||||
lookAhead (try (char '\\' >> letter) <|> char '$')
|
||||
inp <- getInput
|
||||
let toks = tokenize $ T.pack inp
|
||||
let toks = tokenize "chunk" $ T.pack inp
|
||||
let rawinline = do
|
||||
(il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
|
||||
st <- getState
|
||||
|
@ -294,32 +296,33 @@ inlineCommand = do
|
|||
takeP (T.length (untokenize raw))
|
||||
return il
|
||||
|
||||
tokenize :: Text -> [Tok]
|
||||
tokenize = totoks (1, 1)
|
||||
tokenize :: SourceName -> Text -> [Tok]
|
||||
tokenize sourcename = totoks (initialPos sourcename)
|
||||
|
||||
totoks :: (Line, Column) -> Text -> [Tok]
|
||||
totoks (lin,col) t =
|
||||
totoks :: SourcePos -> Text -> [Tok]
|
||||
totoks pos t =
|
||||
case T.uncons t of
|
||||
Nothing -> []
|
||||
Just (c, rest)
|
||||
| c == '\n' ->
|
||||
Tok (lin, col) Newline "\n"
|
||||
: totoks (lin + 1,1) rest
|
||||
Tok pos Newline "\n"
|
||||
: totoks (setSourceColumn (incSourceLine pos 1) 1) rest
|
||||
| isSpaceOrTab c ->
|
||||
let (sps, rest') = T.span isSpaceOrTab t
|
||||
in Tok (lin, col) Spaces sps
|
||||
: totoks (lin, col + T.length sps) rest'
|
||||
in Tok pos Spaces sps
|
||||
: totoks (incSourceColumn pos (T.length sps))
|
||||
rest'
|
||||
| isAlphaNum c ->
|
||||
let (ws, rest') = T.span isAlphaNum t
|
||||
in Tok (lin, col) Word ws
|
||||
: totoks (lin, col + T.length ws) rest'
|
||||
in Tok pos Word ws
|
||||
: totoks (incSourceColumn pos (T.length ws)) rest'
|
||||
| c == '%' ->
|
||||
let (cs, rest') = T.break (== '\n') rest
|
||||
in Tok (lin, col) Comment ("%" <> cs)
|
||||
: totoks (lin, col + 1 + T.length cs) rest'
|
||||
in Tok pos Comment ("%" <> cs)
|
||||
: totoks (incSourceColumn pos (1 + T.length cs)) rest'
|
||||
| c == '\\' ->
|
||||
case T.uncons rest of
|
||||
Nothing -> [Tok (lin, col) Symbol (T.singleton c)]
|
||||
Nothing -> [Tok pos Symbol (T.singleton c)]
|
||||
Just (d, rest')
|
||||
| isLetterOrAt d ->
|
||||
-- \makeatletter is common in macro defs;
|
||||
|
@ -328,24 +331,24 @@ totoks (lin,col) t =
|
|||
-- probably best for now
|
||||
let (ws, rest'') = T.span isLetterOrAt rest
|
||||
(ss, rest''') = T.span isSpaceOrTab rest''
|
||||
in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss)
|
||||
: totoks (lin,
|
||||
col + 1 + T.length ws + T.length ss) rest'''
|
||||
in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
|
||||
: totoks (incSourceColumn pos
|
||||
(1 + T.length ws + T.length ss)) rest'''
|
||||
| d == '\t' || d == '\n' ->
|
||||
Tok (lin, col) Symbol ("\\")
|
||||
: totoks (lin, col + 1) rest
|
||||
Tok pos Symbol ("\\")
|
||||
: totoks (incSourceColumn pos 1) rest
|
||||
| otherwise ->
|
||||
Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d])
|
||||
: totoks (lin, col + 2) rest'
|
||||
Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
|
||||
: totoks (incSourceColumn pos 2) rest'
|
||||
| c == '#' ->
|
||||
let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
|
||||
in case safeRead (T.unpack t1) of
|
||||
Just i ->
|
||||
Tok (lin, col) (Arg i) ("#" <> t1)
|
||||
: totoks (lin, col + 1 + T.length t1) t2
|
||||
Tok pos (Arg i) ("#" <> t1)
|
||||
: totoks (incSourceColumn pos (1 + T.length t1)) t2
|
||||
Nothing ->
|
||||
Tok (lin, col) Symbol ("#")
|
||||
: totoks (lin, col + 1) t2
|
||||
Tok pos Symbol ("#")
|
||||
: totoks (incSourceColumn pos 1) t2
|
||||
| c == '^' ->
|
||||
case T.uncons rest of
|
||||
Just ('^', rest') ->
|
||||
|
@ -354,20 +357,20 @@ totoks (lin,col) t =
|
|||
| isLowerHex d ->
|
||||
case T.uncons rest'' of
|
||||
Just (e, rest''') | isLowerHex e ->
|
||||
Tok (lin, col) Esc2 (T.pack ['^','^',d,e])
|
||||
: totoks (lin, col + 4) rest'''
|
||||
Tok pos Esc2 (T.pack ['^','^',d,e])
|
||||
: totoks (incSourceColumn pos 4) rest'''
|
||||
_ ->
|
||||
Tok (lin, col) Esc1 (T.pack ['^','^',d])
|
||||
: totoks (lin, col + 3) rest''
|
||||
Tok pos Esc1 (T.pack ['^','^',d])
|
||||
: totoks (incSourceColumn pos 3) rest''
|
||||
| d < '\128' ->
|
||||
Tok (lin, col) Esc1 (T.pack ['^','^',d])
|
||||
: totoks (lin, col + 3) rest''
|
||||
_ -> [Tok (lin, col) Symbol ("^"),
|
||||
Tok (lin, col + 1) Symbol ("^")]
|
||||
_ -> Tok (lin, col) Symbol ("^")
|
||||
: totoks (lin, col + 1) rest
|
||||
Tok pos Esc1 (T.pack ['^','^',d])
|
||||
: totoks (incSourceColumn pos 3) rest''
|
||||
_ -> [Tok pos Symbol ("^"),
|
||||
Tok (incSourceColumn pos 1) Symbol ("^")]
|
||||
_ -> Tok pos Symbol ("^")
|
||||
: totoks (incSourceColumn pos 1) rest
|
||||
| otherwise ->
|
||||
Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest
|
||||
Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
|
||||
|
||||
where isSpaceOrTab ' ' = True
|
||||
isSpaceOrTab '\t' = True
|
||||
|
@ -393,8 +396,7 @@ satisfyTok f =
|
|||
where matcher t | f t = Just t
|
||||
| otherwise = Nothing
|
||||
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
|
||||
updatePos spos _ (Tok (lin,col) _ _ : _) =
|
||||
setSourceColumn (setSourceLine spos lin) col
|
||||
updatePos _spos _ (Tok pos _ _ : _) = pos
|
||||
updatePos spos _ [] = spos
|
||||
|
||||
doMacros :: PandocMonad m => Int -> LP m ()
|
||||
|
@ -437,7 +439,7 @@ doMacros n = do
|
|||
else doMacros (n + 1)
|
||||
ExpandWhenDefined -> return ()
|
||||
|
||||
setpos :: (Line, Column) -> Tok -> Tok
|
||||
setpos :: SourcePos -> Tok -> Tok
|
||||
setpos spos (Tok _ tt txt) = Tok spos tt txt
|
||||
|
||||
anyControlSeq :: PandocMonad m => LP m Tok
|
||||
|
@ -728,15 +730,15 @@ doverb = do
|
|||
|
||||
verbTok :: PandocMonad m => Char -> LP m Tok
|
||||
verbTok stopchar = do
|
||||
t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok)
|
||||
t@(Tok pos toktype txt) <- satisfyTok (not . isNewlineTok)
|
||||
case T.findIndex (== stopchar) txt of
|
||||
Nothing -> return t
|
||||
Just i -> do
|
||||
let (t1, t2) = T.splitAt i txt
|
||||
inp <- getInput
|
||||
setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar)
|
||||
: (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp
|
||||
return $ Tok (lin, col) toktype t1
|
||||
setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
|
||||
: (totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2)) ++ inp
|
||||
return $ Tok pos toktype t1
|
||||
|
||||
dolstinline :: PandocMonad m => LP m Inlines
|
||||
dolstinline = do
|
||||
|
@ -1117,16 +1119,16 @@ tok = grouped inline <|> inlineCommand' <|> singleChar'
|
|||
|
||||
singleChar :: PandocMonad m => LP m Tok
|
||||
singleChar = try $ do
|
||||
Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
|
||||
Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
|
||||
guard $ not $ toktype == Symbol &&
|
||||
T.any (`Set.member` specialChars) t
|
||||
if T.length t > 1
|
||||
then do
|
||||
let (t1, t2) = (T.take 1 t, T.drop 1 t)
|
||||
inp <- getInput
|
||||
setInput $ (Tok (lin, col + 1) toktype t2) : inp
|
||||
return $ Tok (lin,col) toktype t1
|
||||
else return $ Tok (lin,col) toktype t
|
||||
setInput $ (Tok (incSourceColumn pos 1) toktype t2) : inp
|
||||
return $ Tok pos toktype t1
|
||||
else return $ Tok pos toktype t
|
||||
|
||||
opt :: PandocMonad m => LP m Inlines
|
||||
opt = bracketed inline
|
||||
|
@ -1159,7 +1161,7 @@ withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
|
|||
withRaw parser = do
|
||||
inp <- getInput
|
||||
result <- parser
|
||||
nxt <- option (Tok (0,0) Word "") (lookAhead anyTok)
|
||||
nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
|
||||
let raw = takeWhile (/= nxt) inp
|
||||
return (result, raw)
|
||||
|
||||
|
@ -1739,7 +1741,27 @@ include = do
|
|||
then map (maybeAddExtension ".sty") fs
|
||||
else map (maybeAddExtension ".tex") fs
|
||||
dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
|
||||
mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs'
|
||||
mapM_ (insertIncluded dirs) fs'
|
||||
return mempty
|
||||
|
||||
insertIncluded :: PandocMonad m
|
||||
=> [FilePath]
|
||||
-> FilePath
|
||||
-> LP m ()
|
||||
insertIncluded dirs f = do
|
||||
pos <- getPosition
|
||||
containers <- getIncludeFiles <$> getState
|
||||
when (f `elem` containers) $ do
|
||||
throwError $ PandocParseError $ "Include file loop at " ++ show pos
|
||||
updateState $ addIncludeFile f
|
||||
mbcontents <- readFileFromDirs dirs f
|
||||
contents <- case mbcontents of
|
||||
Just s -> return s
|
||||
Nothing -> do
|
||||
report $ CouldNotLoadIncludeFile f pos
|
||||
return ""
|
||||
getInput >>= setInput . (tokenize f (T.pack contents) ++)
|
||||
updateState dropLatestIncludeFile
|
||||
|
||||
maybeAddExtension :: String -> FilePath -> FilePath
|
||||
maybeAddExtension ext fp =
|
||||
|
@ -2394,9 +2416,7 @@ parseTableRow envname prefsufs = do
|
|||
>> anyTok)
|
||||
suffpos <- getPosition
|
||||
option [] (count 1 amp)
|
||||
return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref
|
||||
++ contents ++
|
||||
map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff
|
||||
return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
|
||||
rawcells <- sequence (map celltoks prefsufs)
|
||||
oldInput <- getInput
|
||||
cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells
|
||||
|
|
|
@ -31,17 +31,17 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
|
|||
, TokType(..)
|
||||
, Macro(..)
|
||||
, ExpansionPoint(..)
|
||||
, Line
|
||||
, Column )
|
||||
, SourcePos
|
||||
)
|
||||
where
|
||||
import Data.Text (Text)
|
||||
import Text.Parsec.Pos (Line, Column)
|
||||
import Text.Parsec.Pos (SourcePos)
|
||||
|
||||
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
|
||||
Esc1 | Esc2 | Arg Int
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data Tok = Tok (Line, Column) TokType Text
|
||||
data Tok = Tok SourcePos TokType Text
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
|
||||
|
|
9
test/command/3971.md
Normal file
9
test/command/3971.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
```
|
||||
% TEXINPUTS=command pandoc -f latex -t native
|
||||
\documentclass{article}
|
||||
\include{3971b}
|
||||
\code{f}
|
||||
\end{document}
|
||||
^D
|
||||
[Para [Code ("",[],[]) "f"]]
|
||||
```
|
2
test/command/3971b.tex
Normal file
2
test/command/3971b.tex
Normal file
|
@ -0,0 +1,2 @@
|
|||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
\begin{document}
|
Loading…
Add table
Reference in a new issue