Revert "Roff reader: use LineParts abstraction."
This reverts commit 42ba3c0a0b
.
This commit is contained in:
parent
42ba3c0a0b
commit
cd93faddbf
3 changed files with 65 additions and 75 deletions
|
@ -645,7 +645,7 @@ test-suite test-pandoc
|
|||
tasty-quickcheck >= 0.8 && < 0.11,
|
||||
tasty-golden >= 2.3 && < 2.4,
|
||||
QuickCheck >= 2.4 && < 2.13,
|
||||
containers >= 0.5.8 && < 0.7,
|
||||
containers >= 0.4.2.1 && < 0.7,
|
||||
executable-path >= 0.0 && < 0.1,
|
||||
zip-archive >= 0.2.3.4 && < 0.4,
|
||||
xml >= 1.3.12 && < 1.4,
|
||||
|
|
|
@ -53,7 +53,6 @@ import Text.Pandoc.Readers.Roff -- TODO explicit imports
|
|||
import Text.Parsec hiding (tokenPrim)
|
||||
import qualified Text.Parsec as Parsec
|
||||
import Text.Parsec.Pos (updatePosString, initialPos)
|
||||
import qualified Data.Sequence as Seq
|
||||
import qualified Data.Foldable as Foldable
|
||||
|
||||
data ManState = ManState { readerOptions :: ReaderOptions
|
||||
|
@ -150,8 +149,7 @@ parseTable = do
|
|||
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
|
||||
isHrule (_, [RoffTokens ss]) =
|
||||
case Foldable.toList ss of
|
||||
[MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))]
|
||||
-> c `elem` ['_','-','=']
|
||||
[MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
|
||||
_ -> False
|
||||
isHrule _ = False
|
||||
|
||||
|
@ -231,8 +229,8 @@ parseTitle = do
|
|||
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
|
||||
return mempty
|
||||
|
||||
linePartsToInlines :: LineParts -> Inlines
|
||||
linePartsToInlines = go False . Foldable.toList . unLineParts
|
||||
linePartsToInlines :: [LinePart] -> Inlines
|
||||
linePartsToInlines = go False
|
||||
|
||||
where
|
||||
go :: Bool -> [LinePart] -> Inlines
|
||||
|
@ -368,10 +366,10 @@ parseCodeBlock = try $ do
|
|||
where
|
||||
|
||||
extractText :: RoffToken -> Maybe String
|
||||
extractText (MLine (LineParts ss))
|
||||
| not (Seq.null ss)
|
||||
extractText (MLine ss)
|
||||
| not (null ss)
|
||||
, all isFontToken ss = Nothing
|
||||
| otherwise = Just $ linePartsToString (LineParts ss)
|
||||
| otherwise = Just $ linePartsToString ss
|
||||
where isFontToken (FontSize{}) = True
|
||||
isFontToken (Font{}) = True
|
||||
isFontToken _ = False
|
||||
|
|
|
@ -36,7 +36,6 @@ module Text.Pandoc.Readers.Roff
|
|||
, FontSpec(..)
|
||||
, defaultFontSpec
|
||||
, LinePart(..)
|
||||
, LineParts(..)
|
||||
, Arg
|
||||
, TableOption
|
||||
, CellFormat(..)
|
||||
|
@ -57,7 +56,7 @@ import Text.Pandoc.Class
|
|||
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
|
||||
import Data.Default (Default)
|
||||
import qualified Data.Map as M
|
||||
import Data.List (intersperse, isSuffixOf)
|
||||
import Data.List (intercalate, isSuffixOf)
|
||||
import qualified Data.Text as T
|
||||
import Text.Pandoc.Logging (LogMessage(..))
|
||||
import Text.Pandoc.Options
|
||||
|
@ -92,13 +91,7 @@ data LinePart = RoffStr String
|
|||
| MacroArg Int
|
||||
deriving Show
|
||||
|
||||
newtype LineParts = LineParts { unLineParts :: Seq LinePart }
|
||||
deriving (Show, Semigroup, Monoid)
|
||||
|
||||
singleLinePart :: LinePart -> LineParts
|
||||
singleLinePart t = LineParts (Seq.singleton t)
|
||||
|
||||
type Arg = LineParts
|
||||
type Arg = [LinePart]
|
||||
|
||||
type TableOption = (String, String)
|
||||
|
||||
|
@ -112,7 +105,7 @@ data CellFormat =
|
|||
|
||||
type TableRow = ([CellFormat], [RoffTokens])
|
||||
|
||||
data RoffToken = MLine LineParts
|
||||
data RoffToken = MLine [LinePart]
|
||||
| MEmptyLine
|
||||
| MMacro MacroKind [Arg] SourcePos
|
||||
| MTable [TableOption] [TableRow] SourcePos
|
||||
|
@ -134,7 +127,7 @@ instance Default RoffState where
|
|||
def = RoffState { customMacros = M.fromList
|
||||
$ map (\(n, s) ->
|
||||
(n, singleTok
|
||||
(MLine $ singleLinePart $ RoffStr s)))
|
||||
(MLine [RoffStr s])))
|
||||
[ ("Tm", "\x2122")
|
||||
, ("lq", "\x201C")
|
||||
, ("rq", "\x201D")
|
||||
|
@ -164,7 +157,7 @@ combiningAccentsMap :: M.Map String Char
|
|||
combiningAccentsMap =
|
||||
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
|
||||
|
||||
escape :: PandocMonad m => RoffLexer m (LineParts)
|
||||
escape :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
escape = do
|
||||
char '\\'
|
||||
c <- anyChar
|
||||
|
@ -184,18 +177,18 @@ escape = do
|
|||
':' -> return mempty
|
||||
'0' -> return mempty
|
||||
'c' -> return mempty
|
||||
'-' -> return $ singleLinePart $ RoffStr "-"
|
||||
'_' -> return $ singleLinePart $ RoffStr "_"
|
||||
' ' -> return $ singleLinePart $ RoffStr " "
|
||||
'\\' -> return $ singleLinePart $ RoffStr "\\"
|
||||
't' -> return $ singleLinePart $ RoffStr "\t"
|
||||
'e' -> return $ singleLinePart $ RoffStr "\\"
|
||||
'`' -> return $ singleLinePart $ RoffStr "`"
|
||||
'^' -> return $ singleLinePart $ RoffStr " "
|
||||
'|' -> return $ singleLinePart $ RoffStr " "
|
||||
'\'' -> return $ singleLinePart $ RoffStr "`"
|
||||
'.' -> return $ singleLinePart $ RoffStr "`"
|
||||
'~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space
|
||||
'-' -> return $ Seq.singleton $ RoffStr "-"
|
||||
'_' -> return $ Seq.singleton $ RoffStr "_"
|
||||
' ' -> return $ Seq.singleton $ RoffStr " "
|
||||
'\\' -> return $ Seq.singleton $ RoffStr "\\"
|
||||
't' -> return $ Seq.singleton $ RoffStr "\t"
|
||||
'e' -> return $ Seq.singleton $ RoffStr "\\"
|
||||
'`' -> return $ Seq.singleton $ RoffStr "`"
|
||||
'^' -> return $ Seq.singleton $ RoffStr " "
|
||||
'|' -> return $ Seq.singleton $ RoffStr " "
|
||||
'\'' -> return $ Seq.singleton $ RoffStr "`"
|
||||
'.' -> return $ Seq.singleton $ RoffStr "`"
|
||||
'~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
|
||||
_ -> escUnknown ['\\',c]
|
||||
|
||||
where
|
||||
|
@ -203,7 +196,7 @@ escape = do
|
|||
twoCharGlyph = do
|
||||
cs <- count 2 anyChar
|
||||
case M.lookup cs characterCodeMap of
|
||||
Just c -> return $ singleLinePart $ RoffStr [c]
|
||||
Just c -> return $ Seq.singleton $ RoffStr [c]
|
||||
Nothing -> escUnknown ('\\':'(':cs)
|
||||
|
||||
bracketedGlyph = unicodeGlyph <|> charGlyph
|
||||
|
@ -214,7 +207,7 @@ escape = do
|
|||
[] -> mzero
|
||||
[s] -> case M.lookup s characterCodeMap of
|
||||
Nothing -> mzero
|
||||
Just c -> return $ singleLinePart $ RoffStr [c]
|
||||
Just c -> return $ Seq.singleton $ RoffStr [c]
|
||||
(s:ss) -> do
|
||||
basechar <- case M.lookup cs characterCodeMap of
|
||||
Nothing ->
|
||||
|
@ -231,12 +224,12 @@ escape = do
|
|||
Just x -> addAccents as (x:xs)
|
||||
Nothing -> mzero
|
||||
addAccents ss [basechar] >>= \xs ->
|
||||
return (singleLinePart $ RoffStr xs))
|
||||
return (Seq.singleton $ RoffStr xs))
|
||||
<|> escUnknown ("\\[" ++ cs ++ "]")
|
||||
|
||||
unicodeGlyph = try $ do
|
||||
xs <- ucharCode `sepBy1` (char '_') <* char ']'
|
||||
return $ singleLinePart $ RoffStr xs
|
||||
return $ Seq.singleton $ RoffStr xs
|
||||
|
||||
ucharCode = try $ do
|
||||
char 'u'
|
||||
|
@ -247,20 +240,20 @@ escape = do
|
|||
Nothing -> mzero
|
||||
Just c -> return c
|
||||
|
||||
escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts)
|
||||
escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
|
||||
escUnknown s = do
|
||||
pos <- getPosition
|
||||
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
|
||||
return $ singleLinePart $ RoffStr "\xFFFD"
|
||||
return $ Seq.singleton $ RoffStr "\xFFFD"
|
||||
|
||||
-- \s-1 \s0
|
||||
escFontSize :: PandocMonad m => RoffLexer m (LineParts)
|
||||
escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
escFontSize = do
|
||||
let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
|
||||
let toFontSize xs =
|
||||
case safeRead xs of
|
||||
Nothing -> mzero
|
||||
Just n -> return $ singleLinePart $ FontSize n
|
||||
Just n -> return $ Seq.singleton $ FontSize n
|
||||
choice
|
||||
[ do char '('
|
||||
s <- sign
|
||||
|
@ -276,7 +269,7 @@ escFontSize = do
|
|||
toFontSize (s ++ ds)
|
||||
]
|
||||
|
||||
escFont :: PandocMonad m => RoffLexer m (LineParts)
|
||||
escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
escFont = do
|
||||
font <- choice
|
||||
[ digit >> return defaultFontSpec
|
||||
|
@ -286,7 +279,7 @@ escFont = do
|
|||
]
|
||||
modifyState $ \st -> st{ prevFont = currentFont st
|
||||
, currentFont = font }
|
||||
return $ singleLinePart $ Font font
|
||||
return $ Seq.singleton $ Font font
|
||||
|
||||
lettersFont :: PandocMonad m => RoffLexer m FontSpec
|
||||
lettersFont = try $ do
|
||||
|
@ -503,10 +496,10 @@ resolveMacro macroName args pos = do
|
|||
let fillLP (MacroArg i) zs =
|
||||
case drop (i - 1) args of
|
||||
[] -> zs
|
||||
(LineParts ys:_) -> ys <> zs
|
||||
fillLP z zs = z Seq.<| zs
|
||||
let fillMacroArg (MLine (LineParts lineparts)) =
|
||||
MLine (LineParts (foldr fillLP mempty lineparts))
|
||||
(ys:_) -> ys ++ zs
|
||||
fillLP z zs = z : zs
|
||||
let fillMacroArg (MLine lineparts) =
|
||||
MLine (foldr fillLP [] lineparts)
|
||||
fillMacroArg x = x
|
||||
return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
|
||||
|
||||
|
@ -515,8 +508,7 @@ lexStringDef args = do -- string definition
|
|||
case args of
|
||||
[] -> fail "No argument to .ds"
|
||||
(x:ys) -> do
|
||||
let ts = singleTok $ MLine $ mconcat
|
||||
$ intersperse (singleLinePart $ RoffStr " " ) ys
|
||||
let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
|
||||
let stringName = linePartsToString x
|
||||
modifyState $ \st ->
|
||||
st{ customMacros = M.insert stringName ts (customMacros st) }
|
||||
|
@ -546,24 +538,24 @@ lexArgs = do
|
|||
args <- many $ try oneArg
|
||||
skipMany spacetab
|
||||
eofline
|
||||
return args
|
||||
return $ map Foldable.toList args
|
||||
|
||||
where
|
||||
|
||||
oneArg :: PandocMonad m => RoffLexer m (LineParts)
|
||||
oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
oneArg = do
|
||||
skipMany $ try $ string "\\\n" -- continuation line
|
||||
try quotedArg <|> plainArg
|
||||
-- try, because there are some erroneous files, e.g. linux/bpf.2
|
||||
|
||||
plainArg :: PandocMonad m => RoffLexer m (LineParts)
|
||||
plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
plainArg = do
|
||||
skipMany spacetab
|
||||
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
|
||||
where
|
||||
unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"")
|
||||
unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
|
||||
|
||||
quotedArg :: PandocMonad m => RoffLexer m (LineParts)
|
||||
quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
quotedArg = do
|
||||
skipMany spacetab
|
||||
char '"'
|
||||
|
@ -576,9 +568,9 @@ lexArgs = do
|
|||
escapedQuote = try $ do
|
||||
char '"'
|
||||
char '"'
|
||||
return $ singleLinePart $ RoffStr "\""
|
||||
return $ Seq.singleton $ RoffStr "\""
|
||||
|
||||
escStar :: PandocMonad m => RoffLexer m (LineParts)
|
||||
escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
escStar = try $ do
|
||||
pos <- getPosition
|
||||
c <- anyChar
|
||||
|
@ -599,46 +591,46 @@ escStar = try $ do
|
|||
resolveString stringname pos = do
|
||||
RoffTokens ts <- resolveMacro stringname [] pos
|
||||
case Foldable.toList ts of
|
||||
[MLine xs] -> return xs
|
||||
[MLine xs] -> return $ Seq.fromList xs
|
||||
_ -> do
|
||||
report $ SkippedContent ("unknown string " ++ stringname) pos
|
||||
return mempty
|
||||
|
||||
lexLine :: PandocMonad m => RoffLexer m RoffTokens
|
||||
lexLine = do
|
||||
lnparts <- mconcat <$> many1 linePart
|
||||
lnparts <- Foldable.toList . mconcat <$> many1 linePart
|
||||
eofline
|
||||
go (unLineParts lnparts)
|
||||
where -- return mempty if we only have empty strings;
|
||||
go lnparts
|
||||
where -- return empty line if we only have empty strings;
|
||||
-- this can happen if the line just contains \f[C], for example.
|
||||
go Seq.Empty = return mempty
|
||||
go ((RoffStr "") Seq.:<| xs) = go xs
|
||||
go xs = return $ singleTok $ MLine (LineParts xs)
|
||||
go [] = return mempty
|
||||
go (RoffStr "" : xs) = go xs
|
||||
go xs = return $ singleTok $ MLine xs
|
||||
|
||||
linePart :: PandocMonad m => RoffLexer m (LineParts)
|
||||
linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
linePart = macroArg <|> escape <|>
|
||||
regularText <|> quoteChar <|> spaceTabChar
|
||||
|
||||
macroArg :: PandocMonad m => RoffLexer m (LineParts)
|
||||
macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
macroArg = try $ do
|
||||
string "\\\\$"
|
||||
x <- digit
|
||||
return $ singleLinePart $ MacroArg $ ord x - ord '0'
|
||||
return $ Seq.singleton $ MacroArg $ ord x - ord '0'
|
||||
|
||||
regularText :: PandocMonad m => RoffLexer m (LineParts)
|
||||
regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
regularText = do
|
||||
s <- many1 $ noneOf "\n\r\t \\\""
|
||||
return $ singleLinePart $ RoffStr s
|
||||
return $ Seq.singleton $ RoffStr s
|
||||
|
||||
quoteChar :: PandocMonad m => RoffLexer m (LineParts)
|
||||
quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
quoteChar = do
|
||||
char '"'
|
||||
return $ singleLinePart $ RoffStr "\""
|
||||
return $ Seq.singleton $ RoffStr "\""
|
||||
|
||||
spaceTabChar :: PandocMonad m => RoffLexer m (LineParts)
|
||||
spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
|
||||
spaceTabChar = do
|
||||
c <- spacetab
|
||||
return $ singleLinePart $ RoffStr [c]
|
||||
return $ Seq.singleton $ RoffStr [c]
|
||||
|
||||
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
|
||||
lexEmptyLine = newline >> return (singleTok MEmptyLine)
|
||||
|
@ -646,8 +638,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
|
|||
manToken :: PandocMonad m => RoffLexer m RoffTokens
|
||||
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
|
||||
|
||||
linePartsToString :: LineParts -> String
|
||||
linePartsToString = Foldable.foldMap go . unLineParts
|
||||
linePartsToString :: [LinePart] -> String
|
||||
linePartsToString = mconcat . map go
|
||||
where
|
||||
go (RoffStr s) = s
|
||||
go _ = mempty
|
||||
|
|
Loading…
Reference in a new issue