Revert "Roff reader: use LineParts abstraction."

This reverts commit 42ba3c0a0b.
This commit is contained in:
John MacFarlane 2018-10-27 12:29:54 -07:00
parent 42ba3c0a0b
commit cd93faddbf
3 changed files with 65 additions and 75 deletions

View file

@ -645,7 +645,7 @@ test-suite test-pandoc
tasty-quickcheck >= 0.8 && < 0.11, tasty-quickcheck >= 0.8 && < 0.11,
tasty-golden >= 2.3 && < 2.4, tasty-golden >= 2.3 && < 2.4,
QuickCheck >= 2.4 && < 2.13, QuickCheck >= 2.4 && < 2.13,
containers >= 0.5.8 && < 0.7, containers >= 0.4.2.1 && < 0.7,
executable-path >= 0.0 && < 0.1, executable-path >= 0.0 && < 0.1,
zip-archive >= 0.2.3.4 && < 0.4, zip-archive >= 0.2.3.4 && < 0.4,
xml >= 1.3.12 && < 1.4, xml >= 1.3.12 && < 1.4,

View file

@ -53,7 +53,6 @@ import Text.Pandoc.Readers.Roff -- TODO explicit imports
import Text.Parsec hiding (tokenPrim) import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString, initialPos) import Text.Parsec.Pos (updatePosString, initialPos)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable import qualified Data.Foldable as Foldable
data ManState = ManState { readerOptions :: ReaderOptions data ManState = ManState { readerOptions :: ReaderOptions
@ -150,9 +149,8 @@ parseTable = do
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [RoffTokens ss]) = isHrule (_, [RoffTokens ss]) =
case Foldable.toList ss of case Foldable.toList ss of
[MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))] [MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
-> c `elem` ['_','-','='] _ -> False
_ -> False
isHrule _ = False isHrule _ = False
fallback pos = do fallback pos = do
@ -231,8 +229,8 @@ parseTitle = do
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st } modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty return mempty
linePartsToInlines :: LineParts -> Inlines linePartsToInlines :: [LinePart] -> Inlines
linePartsToInlines = go False . Foldable.toList . unLineParts linePartsToInlines = go False
where where
go :: Bool -> [LinePart] -> Inlines go :: Bool -> [LinePart] -> Inlines
@ -368,10 +366,10 @@ parseCodeBlock = try $ do
where where
extractText :: RoffToken -> Maybe String extractText :: RoffToken -> Maybe String
extractText (MLine (LineParts ss)) extractText (MLine ss)
| not (Seq.null ss) | not (null ss)
, all isFontToken ss = Nothing , all isFontToken ss = Nothing
| otherwise = Just $ linePartsToString (LineParts ss) | otherwise = Just $ linePartsToString ss
where isFontToken (FontSize{}) = True where isFontToken (FontSize{}) = True
isFontToken (Font{}) = True isFontToken (Font{}) = True
isFontToken _ = False isFontToken _ = False

View file

@ -36,7 +36,6 @@ module Text.Pandoc.Readers.Roff
, FontSpec(..) , FontSpec(..)
, defaultFontSpec , defaultFontSpec
, LinePart(..) , LinePart(..)
, LineParts(..)
, Arg , Arg
, TableOption , TableOption
, CellFormat(..) , CellFormat(..)
@ -57,7 +56,7 @@ import Text.Pandoc.Class
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace) import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
import Data.Default (Default) import Data.Default (Default)
import qualified Data.Map as M import qualified Data.Map as M
import Data.List (intersperse, isSuffixOf) import Data.List (intercalate, isSuffixOf)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options import Text.Pandoc.Options
@ -92,13 +91,7 @@ data LinePart = RoffStr String
| MacroArg Int | MacroArg Int
deriving Show deriving Show
newtype LineParts = LineParts { unLineParts :: Seq LinePart } type Arg = [LinePart]
deriving (Show, Semigroup, Monoid)
singleLinePart :: LinePart -> LineParts
singleLinePart t = LineParts (Seq.singleton t)
type Arg = LineParts
type TableOption = (String, String) type TableOption = (String, String)
@ -112,7 +105,7 @@ data CellFormat =
type TableRow = ([CellFormat], [RoffTokens]) type TableRow = ([CellFormat], [RoffTokens])
data RoffToken = MLine LineParts data RoffToken = MLine [LinePart]
| MEmptyLine | MEmptyLine
| MMacro MacroKind [Arg] SourcePos | MMacro MacroKind [Arg] SourcePos
| MTable [TableOption] [TableRow] SourcePos | MTable [TableOption] [TableRow] SourcePos
@ -134,7 +127,7 @@ instance Default RoffState where
def = RoffState { customMacros = M.fromList def = RoffState { customMacros = M.fromList
$ map (\(n, s) -> $ map (\(n, s) ->
(n, singleTok (n, singleTok
(MLine $ singleLinePart $ RoffStr s))) (MLine [RoffStr s])))
[ ("Tm", "\x2122") [ ("Tm", "\x2122")
, ("lq", "\x201C") , ("lq", "\x201C")
, ("rq", "\x201D") , ("rq", "\x201D")
@ -164,7 +157,7 @@ combiningAccentsMap :: M.Map String Char
combiningAccentsMap = combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
escape :: PandocMonad m => RoffLexer m (LineParts) escape :: PandocMonad m => RoffLexer m (Seq LinePart)
escape = do escape = do
char '\\' char '\\'
c <- anyChar c <- anyChar
@ -184,18 +177,18 @@ escape = do
':' -> return mempty ':' -> return mempty
'0' -> return mempty '0' -> return mempty
'c' -> return mempty 'c' -> return mempty
'-' -> return $ singleLinePart $ RoffStr "-" '-' -> return $ Seq.singleton $ RoffStr "-"
'_' -> return $ singleLinePart $ RoffStr "_" '_' -> return $ Seq.singleton $ RoffStr "_"
' ' -> return $ singleLinePart $ RoffStr " " ' ' -> return $ Seq.singleton $ RoffStr " "
'\\' -> return $ singleLinePart $ RoffStr "\\" '\\' -> return $ Seq.singleton $ RoffStr "\\"
't' -> return $ singleLinePart $ RoffStr "\t" 't' -> return $ Seq.singleton $ RoffStr "\t"
'e' -> return $ singleLinePart $ RoffStr "\\" 'e' -> return $ Seq.singleton $ RoffStr "\\"
'`' -> return $ singleLinePart $ RoffStr "`" '`' -> return $ Seq.singleton $ RoffStr "`"
'^' -> return $ singleLinePart $ RoffStr " " '^' -> return $ Seq.singleton $ RoffStr " "
'|' -> return $ singleLinePart $ RoffStr " " '|' -> return $ Seq.singleton $ RoffStr " "
'\'' -> return $ singleLinePart $ RoffStr "`" '\'' -> return $ Seq.singleton $ RoffStr "`"
'.' -> return $ singleLinePart $ RoffStr "`" '.' -> return $ Seq.singleton $ RoffStr "`"
'~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space '~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
_ -> escUnknown ['\\',c] _ -> escUnknown ['\\',c]
where where
@ -203,7 +196,7 @@ escape = do
twoCharGlyph = do twoCharGlyph = do
cs <- count 2 anyChar cs <- count 2 anyChar
case M.lookup cs characterCodeMap of case M.lookup cs characterCodeMap of
Just c -> return $ singleLinePart $ RoffStr [c] Just c -> return $ Seq.singleton $ RoffStr [c]
Nothing -> escUnknown ('\\':'(':cs) Nothing -> escUnknown ('\\':'(':cs)
bracketedGlyph = unicodeGlyph <|> charGlyph bracketedGlyph = unicodeGlyph <|> charGlyph
@ -214,7 +207,7 @@ escape = do
[] -> mzero [] -> mzero
[s] -> case M.lookup s characterCodeMap of [s] -> case M.lookup s characterCodeMap of
Nothing -> mzero Nothing -> mzero
Just c -> return $ singleLinePart $ RoffStr [c] Just c -> return $ Seq.singleton $ RoffStr [c]
(s:ss) -> do (s:ss) -> do
basechar <- case M.lookup cs characterCodeMap of basechar <- case M.lookup cs characterCodeMap of
Nothing -> Nothing ->
@ -231,12 +224,12 @@ escape = do
Just x -> addAccents as (x:xs) Just x -> addAccents as (x:xs)
Nothing -> mzero Nothing -> mzero
addAccents ss [basechar] >>= \xs -> addAccents ss [basechar] >>= \xs ->
return (singleLinePart $ RoffStr xs)) return (Seq.singleton $ RoffStr xs))
<|> escUnknown ("\\[" ++ cs ++ "]") <|> escUnknown ("\\[" ++ cs ++ "]")
unicodeGlyph = try $ do unicodeGlyph = try $ do
xs <- ucharCode `sepBy1` (char '_') <* char ']' xs <- ucharCode `sepBy1` (char '_') <* char ']'
return $ singleLinePart $ RoffStr xs return $ Seq.singleton $ RoffStr xs
ucharCode = try $ do ucharCode = try $ do
char 'u' char 'u'
@ -247,20 +240,20 @@ escape = do
Nothing -> mzero Nothing -> mzero
Just c -> return c Just c -> return c
escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts) escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
escUnknown s = do escUnknown s = do
pos <- getPosition pos <- getPosition
report $ SkippedContent ("Unknown escape sequence " ++ s) pos report $ SkippedContent ("Unknown escape sequence " ++ s) pos
return $ singleLinePart $ RoffStr "\xFFFD" return $ Seq.singleton $ RoffStr "\xFFFD"
-- \s-1 \s0 -- \s-1 \s0
escFontSize :: PandocMonad m => RoffLexer m (LineParts) escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
escFontSize = do escFontSize = do
let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+') let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
let toFontSize xs = let toFontSize xs =
case safeRead xs of case safeRead xs of
Nothing -> mzero Nothing -> mzero
Just n -> return $ singleLinePart $ FontSize n Just n -> return $ Seq.singleton $ FontSize n
choice choice
[ do char '(' [ do char '('
s <- sign s <- sign
@ -276,7 +269,7 @@ escFontSize = do
toFontSize (s ++ ds) toFontSize (s ++ ds)
] ]
escFont :: PandocMonad m => RoffLexer m (LineParts) escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
escFont = do escFont = do
font <- choice font <- choice
[ digit >> return defaultFontSpec [ digit >> return defaultFontSpec
@ -286,7 +279,7 @@ escFont = do
] ]
modifyState $ \st -> st{ prevFont = currentFont st modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font } , currentFont = font }
return $ singleLinePart $ Font font return $ Seq.singleton $ Font font
lettersFont :: PandocMonad m => RoffLexer m FontSpec lettersFont :: PandocMonad m => RoffLexer m FontSpec
lettersFont = try $ do lettersFont = try $ do
@ -503,10 +496,10 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs = let fillLP (MacroArg i) zs =
case drop (i - 1) args of case drop (i - 1) args of
[] -> zs [] -> zs
(LineParts ys:_) -> ys <> zs (ys:_) -> ys ++ zs
fillLP z zs = z Seq.<| zs fillLP z zs = z : zs
let fillMacroArg (MLine (LineParts lineparts)) = let fillMacroArg (MLine lineparts) =
MLine (LineParts (foldr fillLP mempty lineparts)) MLine (foldr fillLP [] lineparts)
fillMacroArg x = x fillMacroArg x = x
return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
@ -515,8 +508,7 @@ lexStringDef args = do -- string definition
case args of case args of
[] -> fail "No argument to .ds" [] -> fail "No argument to .ds"
(x:ys) -> do (x:ys) -> do
let ts = singleTok $ MLine $ mconcat let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
$ intersperse (singleLinePart $ RoffStr " " ) ys
let stringName = linePartsToString x let stringName = linePartsToString x
modifyState $ \st -> modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) } st{ customMacros = M.insert stringName ts (customMacros st) }
@ -546,24 +538,24 @@ lexArgs = do
args <- many $ try oneArg args <- many $ try oneArg
skipMany spacetab skipMany spacetab
eofline eofline
return args return $ map Foldable.toList args
where where
oneArg :: PandocMonad m => RoffLexer m (LineParts) oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
oneArg = do oneArg = do
skipMany $ try $ string "\\\n" -- continuation line skipMany $ try $ string "\\\n" -- continuation line
try quotedArg <|> plainArg try quotedArg <|> plainArg
-- try, because there are some erroneous files, e.g. linux/bpf.2 -- try, because there are some erroneous files, e.g. linux/bpf.2
plainArg :: PandocMonad m => RoffLexer m (LineParts) plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
plainArg = do plainArg = do
skipMany spacetab skipMany spacetab
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote) mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
where where
unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"") unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
quotedArg :: PandocMonad m => RoffLexer m (LineParts) quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
quotedArg = do quotedArg = do
skipMany spacetab skipMany spacetab
char '"' char '"'
@ -576,9 +568,9 @@ lexArgs = do
escapedQuote = try $ do escapedQuote = try $ do
char '"' char '"'
char '"' char '"'
return $ singleLinePart $ RoffStr "\"" return $ Seq.singleton $ RoffStr "\""
escStar :: PandocMonad m => RoffLexer m (LineParts) escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
escStar = try $ do escStar = try $ do
pos <- getPosition pos <- getPosition
c <- anyChar c <- anyChar
@ -599,46 +591,46 @@ escStar = try $ do
resolveString stringname pos = do resolveString stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of case Foldable.toList ts of
[MLine xs] -> return xs [MLine xs] -> return $ Seq.fromList xs
_ -> do _ -> do
report $ SkippedContent ("unknown string " ++ stringname) pos report $ SkippedContent ("unknown string " ++ stringname) pos
return mempty return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens lexLine :: PandocMonad m => RoffLexer m RoffTokens
lexLine = do lexLine = do
lnparts <- mconcat <$> many1 linePart lnparts <- Foldable.toList . mconcat <$> many1 linePart
eofline eofline
go (unLineParts lnparts) go lnparts
where -- return mempty if we only have empty strings; where -- return empty line if we only have empty strings;
-- this can happen if the line just contains \f[C], for example. -- this can happen if the line just contains \f[C], for example.
go Seq.Empty = return mempty go [] = return mempty
go ((RoffStr "") Seq.:<| xs) = go xs go (RoffStr "" : xs) = go xs
go xs = return $ singleTok $ MLine (LineParts xs) go xs = return $ singleTok $ MLine xs
linePart :: PandocMonad m => RoffLexer m (LineParts) linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
linePart = macroArg <|> escape <|> linePart = macroArg <|> escape <|>
regularText <|> quoteChar <|> spaceTabChar regularText <|> quoteChar <|> spaceTabChar
macroArg :: PandocMonad m => RoffLexer m (LineParts) macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
macroArg = try $ do macroArg = try $ do
string "\\\\$" string "\\\\$"
x <- digit x <- digit
return $ singleLinePart $ MacroArg $ ord x - ord '0' return $ Seq.singleton $ MacroArg $ ord x - ord '0'
regularText :: PandocMonad m => RoffLexer m (LineParts) regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
regularText = do regularText = do
s <- many1 $ noneOf "\n\r\t \\\"" s <- many1 $ noneOf "\n\r\t \\\""
return $ singleLinePart $ RoffStr s return $ Seq.singleton $ RoffStr s
quoteChar :: PandocMonad m => RoffLexer m (LineParts) quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
quoteChar = do quoteChar = do
char '"' char '"'
return $ singleLinePart $ RoffStr "\"" return $ Seq.singleton $ RoffStr "\""
spaceTabChar :: PandocMonad m => RoffLexer m (LineParts) spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
spaceTabChar = do spaceTabChar = do
c <- spacetab c <- spacetab
return $ singleLinePart $ RoffStr [c] return $ Seq.singleton $ RoffStr [c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok MEmptyLine) lexEmptyLine = newline >> return (singleTok MEmptyLine)
@ -646,8 +638,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
linePartsToString :: LineParts -> String linePartsToString :: [LinePart] -> String
linePartsToString = Foldable.foldMap go . unLineParts linePartsToString = mconcat . map go
where where
go (RoffStr s) = s go (RoffStr s) = s
go _ = mempty go _ = mempty