From e4cca4cf67af31ce574ef2871d08f0719d4a50a5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 4 Sep 2019 09:24:42 -0700 Subject: [PATCH] Roff readers: better parsing of groups. We now allow groups where the closing `\\}` isn't at the beginning of a line. Closes #5410. --- src/Text/Pandoc/Readers/Roff.hs | 14 +++++--------- test/Tests/Readers/Man.hs | 2 +- test/command/5410.md | 7 +++++++ 3 files changed, 13 insertions(+), 10 deletions(-) create mode 100644 test/command/5410.md diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 3ed7f05a6..18535353e 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -33,7 +33,7 @@ import Control.Monad.Except (throwError) import Text.Pandoc.Class (getResourcePath, readFileFromDirs, PandocMonad(..), report) import Data.Char (isLower, toLower, toUpper, chr, - isAscii, isAlphaNum, isSpace) + isAscii, isAlphaNum) import Data.Default (Default) import qualified Data.Map as M import Data.List (intercalate) @@ -130,7 +130,7 @@ type RoffLexer m = ParserT [Char] RoffState m -- eofline :: Stream s m Char => ParsecT s u m () -eofline = void newline <|> eof +eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") spacetab :: Stream s m Char => ParsecT s u m Char spacetab = char ' ' <|> char '\t' @@ -144,7 +144,7 @@ combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents escape :: PandocMonad m => RoffLexer m [LinePart] -escape = do +escape = try $ do backslash escapeGlyph <|> escapeNormal @@ -193,7 +193,7 @@ readUnicodeChar _ = Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do - c <- anyChar + c <- noneOf "{}" optional expandString case c of ' ' -> return [RoffStr " "] @@ -256,9 +256,7 @@ escapeNormal = do 'w' -> escIgnore 'w' [quoteArg] 'x' -> escIgnore 'x' [quoteArg] 'z' -> escIgnore 'z' [count 1 anyChar] - '{' -> return mempty '|' -> return [RoffStr "\x2006"] --1/6 em space - '}' -> return mempty '~' -> return [RoffStr "\160"] -- nonbreaking space '\\' -> do mode <- roffMode <$> getState @@ -350,7 +348,7 @@ lexMacro = do guard $ sourceColumn pos == 1 || afterConditional st char '.' <|> char '\'' skipMany spacetab - macroName <- many (satisfy (not . isSpace)) + macroName <- many (satisfy isAlphaNum) case macroName of "nop" -> return mempty "ie" -> lexConditional "ie" @@ -361,8 +359,6 @@ lexMacro = do args <- lexArgs case macroName of "" -> return mempty - "\\\"" -> return mempty - "\\#" -> return mempty "TS" -> lexTable pos "de" -> lexMacroDef args "de1" -> lexMacroDef args diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 8c5d428e5..c3d2582ca 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -74,7 +74,7 @@ tests = [ =?> para (text "hi " <> emph (text "there " <> strong (text "bold")) <> text " ok") , "skip" =: - "a\\%\\{\\}\\\n\\:b\\0" + "a\\%\\\n\\:b\\0" =?> (para $ str "ab\8199") , "replace" =: "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq" diff --git a/test/command/5410.md b/test/command/5410.md new file mode 100644 index 000000000..fee90a1a4 --- /dev/null +++ b/test/command/5410.md @@ -0,0 +1,7 @@ +``` +% pandoc -f man -t native +.ie n \{\ +'br\} +^D +[Para [LineBreak]] +```