Roff reader: improve lexing of conditionals.

Partially addreses #5039.
This commit is contained in:
John MacFarlane 2018-11-02 17:23:11 -07:00
parent e01ca77313
commit 9e369e9016

View file

@ -48,7 +48,7 @@ where
import Prelude import Prelude
import Safe (lastDef) import Safe (lastDef)
import Control.Monad (void, mzero, guard, when, mplus) import Control.Monad (void, mzero, mplus)
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Text.Pandoc.Class import Text.Pandoc.Class
(getResourcePath, readFileFromDirs, PandocMonad(..), report) (getResourcePath, readFileFromDirs, PandocMonad(..), report)
@ -56,7 +56,7 @@ import Data.Char (isLower, toLower, toUpper, chr,
isAscii, isAlphaNum, isSpace) isAscii, isAlphaNum, isSpace)
import Data.Default (Default) import Data.Default (Default)
import qualified Data.Map as M import qualified Data.Map as M
import Data.List (intercalate, isSuffixOf) import Data.List (intercalate)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options import Text.Pandoc.Options
@ -122,6 +122,7 @@ data RoffState = RoffState { customMacros :: M.Map String RoffTokens
, currentFont :: FontSpec , currentFont :: FontSpec
, tableTabChar :: Char , tableTabChar :: Char
, roffMode :: RoffMode , roffMode :: RoffMode
, lastExpression :: Maybe Bool
} deriving Show } deriving Show
instance Default RoffState where instance Default RoffState where
@ -137,6 +138,7 @@ instance Default RoffState where
, currentFont = defaultFontSpec , currentFont = defaultFontSpec
, tableTabChar = '\t' , tableTabChar = '\t'
, roffMode = NormalMode , roffMode = NormalMode
, lastExpression = Nothing
} }
type RoffLexer m = ParserT [Char] RoffState m type RoffLexer m = ParserT [Char] RoffState m
@ -349,15 +351,16 @@ lexComment = do
lexMacro :: PandocMonad m => RoffLexer m RoffTokens lexMacro :: PandocMonad m => RoffLexer m RoffTokens
lexMacro = do lexMacro = do
pos <- getPosition pos <- getPosition
guard $ sourceColumn pos == 1 -- we don't want this because of '.ie .B foo':
-- guard $ sourceColumn pos == 1
char '.' <|> char '\'' char '.' <|> char '\''
skipMany spacetab skipMany spacetab
macroName <- many (satisfy (not . isSpace)) macroName <- many (satisfy (not . isSpace))
case macroName of case macroName of
"nop" -> return mempty "nop" -> return mempty
"ie" -> lexConditional "ie" -> lexConditional "ie"
"if" -> lexConditional "if" -> lexConditional "if"
"el" -> skipConditional "el" -> lexConditional "el"
"TS" -> lexTable pos "TS" -> lexTable pos
_ -> do _ -> do
@ -484,18 +487,43 @@ tableColFormat = do
-- We don't fully handle the conditional. But we do -- We don't fully handle the conditional. But we do
-- include everything under '.ie n', which occurs commonly -- include everything under '.ie n', which occurs commonly
-- in man pages. We always skip the '.el' part. -- in man pages.
lexConditional :: PandocMonad m => RoffLexer m RoffTokens lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens
lexConditional = do lexConditional mname = do
pos <- getPosition
skipMany spacetab skipMany spacetab
lexNCond <|> skipConditional mbtest <- if mname == "el"
then fmap not . lastExpression <$> getState
else expression
skipMany spacetab
st <- getState -- save state, so we can reset it
ifPart <- lexGroup
<|> (char '\\' >> newline >> manToken)
<|> manToken
case mbtest of
Nothing -> do
putState st -- reset state, so we don't record macros in skipped section
report $ SkippedContent ('.':mname) pos
return mempty
Just True -> return ifPart
Just False -> do
putState st
return mempty
-- n means nroff mode expression :: PandocMonad m => RoffLexer m (Maybe Bool)
lexNCond :: PandocMonad m => RoffLexer m RoffTokens expression = do
lexNCond = do raw <- charsInBalanced '(' ')' (satisfy (/= '\n'))
newline <|> many1 nonspaceChar
many1 spacetab returnValue $
lexGroup <|> manToken case raw of
"1" -> Just True
"n" -> Just True -- nroff mode
"t" -> Just False -- troff mode
_ -> Nothing
where
returnValue v = do
modifyState $ \st -> st{ lastExpression = v }
return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens lexGroup :: PandocMonad m => RoffLexer m RoffTokens
lexGroup = do lexGroup = do
@ -505,13 +533,6 @@ lexGroup = do
groupstart = try $ string "\\{\\" >> newline groupstart = try $ string "\\{\\" >> newline
groupend = try $ string "\\}" >> eofline groupend = try $ string "\\}" >> eofline
skipConditional :: PandocMonad m => RoffLexer m RoffTokens
skipConditional = do
rest <- anyLine
when ("\\{\\" `isSuffixOf` rest) $
void $ manyTill anyChar (try (string "\\}"))
return mempty
lexIncludeFile :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens lexIncludeFile :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexIncludeFile args = do lexIncludeFile args = do
pos <- getPosition pos <- getPosition