Revert "Roff reader: use LineParts abstraction."

This reverts commit 42ba3c0a0b.
This commit is contained in:
John MacFarlane 2018-10-27 12:29:54 -07:00
parent 42ba3c0a0b
commit cd93faddbf
3 changed files with 65 additions and 75 deletions

View file

@ -645,7 +645,7 @@ test-suite test-pandoc
tasty-quickcheck >= 0.8 && < 0.11,
tasty-golden >= 2.3 && < 2.4,
QuickCheck >= 2.4 && < 2.13,
containers >= 0.5.8 && < 0.7,
containers >= 0.4.2.1 && < 0.7,
executable-path >= 0.0 && < 0.1,
zip-archive >= 0.2.3.4 && < 0.4,
xml >= 1.3.12 && < 1.4,

View file

@ -53,7 +53,6 @@ import Text.Pandoc.Readers.Roff -- TODO explicit imports
import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString, initialPos)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
data ManState = ManState { readerOptions :: ReaderOptions
@ -150,8 +149,7 @@ parseTable = do
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [RoffTokens ss]) =
case Foldable.toList ss of
[MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))]
-> c `elem` ['_','-','=']
[MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
_ -> False
isHrule _ = False
@ -231,8 +229,8 @@ parseTitle = do
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
linePartsToInlines :: LineParts -> Inlines
linePartsToInlines = go False . Foldable.toList . unLineParts
linePartsToInlines :: [LinePart] -> Inlines
linePartsToInlines = go False
where
go :: Bool -> [LinePart] -> Inlines
@ -368,10 +366,10 @@ parseCodeBlock = try $ do
where
extractText :: RoffToken -> Maybe String
extractText (MLine (LineParts ss))
| not (Seq.null ss)
extractText (MLine ss)
| not (null ss)
, all isFontToken ss = Nothing
| otherwise = Just $ linePartsToString (LineParts ss)
| otherwise = Just $ linePartsToString ss
where isFontToken (FontSize{}) = True
isFontToken (Font{}) = True
isFontToken _ = False

View file

@ -36,7 +36,6 @@ module Text.Pandoc.Readers.Roff
, FontSpec(..)
, defaultFontSpec
, LinePart(..)
, LineParts(..)
, Arg
, TableOption
, CellFormat(..)
@ -57,7 +56,7 @@ import Text.Pandoc.Class
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
import Data.Default (Default)
import qualified Data.Map as M
import Data.List (intersperse, isSuffixOf)
import Data.List (intercalate, isSuffixOf)
import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
@ -92,13 +91,7 @@ data LinePart = RoffStr String
| MacroArg Int
deriving Show
newtype LineParts = LineParts { unLineParts :: Seq LinePart }
deriving (Show, Semigroup, Monoid)
singleLinePart :: LinePart -> LineParts
singleLinePart t = LineParts (Seq.singleton t)
type Arg = LineParts
type Arg = [LinePart]
type TableOption = (String, String)
@ -112,7 +105,7 @@ data CellFormat =
type TableRow = ([CellFormat], [RoffTokens])
data RoffToken = MLine LineParts
data RoffToken = MLine [LinePart]
| MEmptyLine
| MMacro MacroKind [Arg] SourcePos
| MTable [TableOption] [TableRow] SourcePos
@ -134,7 +127,7 @@ instance Default RoffState where
def = RoffState { customMacros = M.fromList
$ map (\(n, s) ->
(n, singleTok
(MLine $ singleLinePart $ RoffStr s)))
(MLine [RoffStr s])))
[ ("Tm", "\x2122")
, ("lq", "\x201C")
, ("rq", "\x201D")
@ -164,7 +157,7 @@ combiningAccentsMap :: M.Map String Char
combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
escape :: PandocMonad m => RoffLexer m (LineParts)
escape :: PandocMonad m => RoffLexer m (Seq LinePart)
escape = do
char '\\'
c <- anyChar
@ -184,18 +177,18 @@ escape = do
':' -> return mempty
'0' -> return mempty
'c' -> return mempty
'-' -> return $ singleLinePart $ RoffStr "-"
'_' -> return $ singleLinePart $ RoffStr "_"
' ' -> return $ singleLinePart $ RoffStr " "
'\\' -> return $ singleLinePart $ RoffStr "\\"
't' -> return $ singleLinePart $ RoffStr "\t"
'e' -> return $ singleLinePart $ RoffStr "\\"
'`' -> return $ singleLinePart $ RoffStr "`"
'^' -> return $ singleLinePart $ RoffStr " "
'|' -> return $ singleLinePart $ RoffStr " "
'\'' -> return $ singleLinePart $ RoffStr "`"
'.' -> return $ singleLinePart $ RoffStr "`"
'~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space
'-' -> return $ Seq.singleton $ RoffStr "-"
'_' -> return $ Seq.singleton $ RoffStr "_"
' ' -> return $ Seq.singleton $ RoffStr " "
'\\' -> return $ Seq.singleton $ RoffStr "\\"
't' -> return $ Seq.singleton $ RoffStr "\t"
'e' -> return $ Seq.singleton $ RoffStr "\\"
'`' -> return $ Seq.singleton $ RoffStr "`"
'^' -> return $ Seq.singleton $ RoffStr " "
'|' -> return $ Seq.singleton $ RoffStr " "
'\'' -> return $ Seq.singleton $ RoffStr "`"
'.' -> return $ Seq.singleton $ RoffStr "`"
'~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
_ -> escUnknown ['\\',c]
where
@ -203,7 +196,7 @@ escape = do
twoCharGlyph = do
cs <- count 2 anyChar
case M.lookup cs characterCodeMap of
Just c -> return $ singleLinePart $ RoffStr [c]
Just c -> return $ Seq.singleton $ RoffStr [c]
Nothing -> escUnknown ('\\':'(':cs)
bracketedGlyph = unicodeGlyph <|> charGlyph
@ -214,7 +207,7 @@ escape = do
[] -> mzero
[s] -> case M.lookup s characterCodeMap of
Nothing -> mzero
Just c -> return $ singleLinePart $ RoffStr [c]
Just c -> return $ Seq.singleton $ RoffStr [c]
(s:ss) -> do
basechar <- case M.lookup cs characterCodeMap of
Nothing ->
@ -231,12 +224,12 @@ escape = do
Just x -> addAccents as (x:xs)
Nothing -> mzero
addAccents ss [basechar] >>= \xs ->
return (singleLinePart $ RoffStr xs))
return (Seq.singleton $ RoffStr xs))
<|> escUnknown ("\\[" ++ cs ++ "]")
unicodeGlyph = try $ do
xs <- ucharCode `sepBy1` (char '_') <* char ']'
return $ singleLinePart $ RoffStr xs
return $ Seq.singleton $ RoffStr xs
ucharCode = try $ do
char 'u'
@ -247,20 +240,20 @@ escape = do
Nothing -> mzero
Just c -> return c
escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts)
escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
escUnknown s = do
pos <- getPosition
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
return $ singleLinePart $ RoffStr "\xFFFD"
return $ Seq.singleton $ RoffStr "\xFFFD"
-- \s-1 \s0
escFontSize :: PandocMonad m => RoffLexer m (LineParts)
escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
escFontSize = do
let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
let toFontSize xs =
case safeRead xs of
Nothing -> mzero
Just n -> return $ singleLinePart $ FontSize n
Just n -> return $ Seq.singleton $ FontSize n
choice
[ do char '('
s <- sign
@ -276,7 +269,7 @@ escFontSize = do
toFontSize (s ++ ds)
]
escFont :: PandocMonad m => RoffLexer m (LineParts)
escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
escFont = do
font <- choice
[ digit >> return defaultFontSpec
@ -286,7 +279,7 @@ escFont = do
]
modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font }
return $ singleLinePart $ Font font
return $ Seq.singleton $ Font font
lettersFont :: PandocMonad m => RoffLexer m FontSpec
lettersFont = try $ do
@ -503,10 +496,10 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs =
case drop (i - 1) args of
[] -> zs
(LineParts ys:_) -> ys <> zs
fillLP z zs = z Seq.<| zs
let fillMacroArg (MLine (LineParts lineparts)) =
MLine (LineParts (foldr fillLP mempty lineparts))
(ys:_) -> ys ++ zs
fillLP z zs = z : zs
let fillMacroArg (MLine lineparts) =
MLine (foldr fillLP [] lineparts)
fillMacroArg x = x
return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
@ -515,8 +508,7 @@ lexStringDef args = do -- string definition
case args of
[] -> fail "No argument to .ds"
(x:ys) -> do
let ts = singleTok $ MLine $ mconcat
$ intersperse (singleLinePart $ RoffStr " " ) ys
let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToString x
modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
@ -546,24 +538,24 @@ lexArgs = do
args <- many $ try oneArg
skipMany spacetab
eofline
return args
return $ map Foldable.toList args
where
oneArg :: PandocMonad m => RoffLexer m (LineParts)
oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
oneArg = do
skipMany $ try $ string "\\\n" -- continuation line
try quotedArg <|> plainArg
-- try, because there are some erroneous files, e.g. linux/bpf.2
plainArg :: PandocMonad m => RoffLexer m (LineParts)
plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
plainArg = do
skipMany spacetab
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
where
unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"")
unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
quotedArg :: PandocMonad m => RoffLexer m (LineParts)
quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
quotedArg = do
skipMany spacetab
char '"'
@ -576,9 +568,9 @@ lexArgs = do
escapedQuote = try $ do
char '"'
char '"'
return $ singleLinePart $ RoffStr "\""
return $ Seq.singleton $ RoffStr "\""
escStar :: PandocMonad m => RoffLexer m (LineParts)
escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
escStar = try $ do
pos <- getPosition
c <- anyChar
@ -599,46 +591,46 @@ escStar = try $ do
resolveString stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of
[MLine xs] -> return xs
[MLine xs] -> return $ Seq.fromList xs
_ -> do
report $ SkippedContent ("unknown string " ++ stringname) pos
return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens
lexLine = do
lnparts <- mconcat <$> many1 linePart
lnparts <- Foldable.toList . mconcat <$> many1 linePart
eofline
go (unLineParts lnparts)
where -- return mempty if we only have empty strings;
go lnparts
where -- return empty line if we only have empty strings;
-- this can happen if the line just contains \f[C], for example.
go Seq.Empty = return mempty
go ((RoffStr "") Seq.:<| xs) = go xs
go xs = return $ singleTok $ MLine (LineParts xs)
go [] = return mempty
go (RoffStr "" : xs) = go xs
go xs = return $ singleTok $ MLine xs
linePart :: PandocMonad m => RoffLexer m (LineParts)
linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
linePart = macroArg <|> escape <|>
regularText <|> quoteChar <|> spaceTabChar
macroArg :: PandocMonad m => RoffLexer m (LineParts)
macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
macroArg = try $ do
string "\\\\$"
x <- digit
return $ singleLinePart $ MacroArg $ ord x - ord '0'
return $ Seq.singleton $ MacroArg $ ord x - ord '0'
regularText :: PandocMonad m => RoffLexer m (LineParts)
regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
regularText = do
s <- many1 $ noneOf "\n\r\t \\\""
return $ singleLinePart $ RoffStr s
return $ Seq.singleton $ RoffStr s
quoteChar :: PandocMonad m => RoffLexer m (LineParts)
quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
quoteChar = do
char '"'
return $ singleLinePart $ RoffStr "\""
return $ Seq.singleton $ RoffStr "\""
spaceTabChar :: PandocMonad m => RoffLexer m (LineParts)
spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
spaceTabChar = do
c <- spacetab
return $ singleLinePart $ RoffStr [c]
return $ Seq.singleton $ RoffStr [c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok MEmptyLine)
@ -646,8 +638,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
linePartsToString :: LineParts -> String
linePartsToString = Foldable.foldMap go . unLineParts
linePartsToString :: [LinePart] -> String
linePartsToString = mconcat . map go
where
go (RoffStr s) = s
go _ = mempty