Groff tokenizer: introduce TableRow type, handle .T&.

Closes #5020.
This commit is contained in:
John MacFarlane 2018-10-26 21:22:39 -07:00
parent 7f70aaa5fa
commit 52df18f476
2 changed files with 41 additions and 23 deletions

View file

@ -38,7 +38,8 @@ module Text.Pandoc.Readers.Groff
, LinePart(..) , LinePart(..)
, Arg , Arg
, TableOption , TableOption
, TableFormat(..) , CellFormat(..)
, TableRow
, GroffToken(..) , GroffToken(..)
, GroffTokens(..) , GroffTokens(..)
, linePartsToString , linePartsToString
@ -47,6 +48,7 @@ module Text.Pandoc.Readers.Groff
where where
import Prelude import Prelude
import Safe (lastDef)
import Control.Monad (void, mzero, guard, when) import Control.Monad (void, mzero, guard, when)
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Text.Pandoc.Class import Text.Pandoc.Class
@ -92,20 +94,21 @@ type Arg = [LinePart]
type TableOption = (String, String) type TableOption = (String, String)
data TableFormat = data CellFormat =
TableFormat CellFormat
{ columnType :: Char { columnType :: Char
, pipePrefix :: Bool , pipePrefix :: Bool
, pipeSuffix :: Bool , pipeSuffix :: Bool
, columnSuffixes :: [String] , columnSuffixes :: [String]
} deriving (Show, Eq, Ord) } deriving (Show, Eq, Ord)
type TableRow = ([CellFormat], [GroffTokens])
-- TODO parse tables (see man tbl) -- TODO parse tables (see man tbl)
data GroffToken = MLine [LinePart] data GroffToken = MLine [LinePart]
| MEmptyLine | MEmptyLine
| MMacro MacroKind [Arg] SourcePos | MMacro MacroKind [Arg] SourcePos
| MTable [TableOption] [[TableFormat]] [[GroffTokens]] SourcePos | MTable [TableOption] [TableRow] SourcePos
deriving Show deriving Show
newtype GroffTokens = GroffTokens { unGroffTokens :: Seq.Seq GroffToken } newtype GroffTokens = GroffTokens { unGroffTokens :: Seq.Seq GroffToken }
@ -346,13 +349,27 @@ lexTable pos = do
spaces spaces
skipMany lexComment skipMany lexComment
spaces spaces
rows <- lexTableRows
morerows <- many $ try $ do
string ".T&"
skipMany spacetab
newline
lexTableRows
string ".TE"
skipMany spacetab
eofline
return $ singleTok $ MTable opts (rows ++ concat morerows) pos
lexTableRows :: PandocMonad m => GroffLexer m [TableRow]
lexTableRows = do
aligns <- tableFormatSpec aligns <- tableFormatSpec
spaces spaces
skipMany lexComment skipMany lexComment
spaces spaces
rows <- manyTill tableRow (try (string ".TE" >> skipMany spacetab >> eofline)) rows <- many (notFollowedBy (try (string ".TE") <|> try (string ".T&")) >>
return $ singleTok $ MTable opts aligns rows pos tableRow)
return $ zip aligns rows
tableCell :: PandocMonad m => GroffLexer m GroffTokens tableCell :: PandocMonad m => GroffLexer m GroffTokens
tableCell = (enclosedCell <|> simpleCell) >>= lexGroff . T.pack tableCell = (enclosedCell <|> simpleCell) >>= lexGroff . T.pack
where where
@ -387,18 +404,18 @@ tableOption = do
skipMany spacetab skipMany spacetab
return (k,v) return (k,v)
tableFormatSpec :: PandocMonad m => GroffLexer m [[TableFormat]] tableFormatSpec :: PandocMonad m => GroffLexer m [[CellFormat]]
tableFormatSpec = do tableFormatSpec = do
speclines <- tableFormatSpecLine `sepBy1` (newline <|> char ',') speclines <- tableFormatSpecLine `sepBy1` (newline <|> char ',')
skipMany spacetab skipMany spacetab
char '.' char '.'
return speclines return $ speclines ++ repeat (lastDef [] speclines) -- last line is default
tableFormatSpecLine :: PandocMonad m => GroffLexer m [TableFormat] tableFormatSpecLine :: PandocMonad m => GroffLexer m [CellFormat]
tableFormatSpecLine = tableFormatSpecLine =
many1 $ try $ skipMany spacetab >> tableColFormat many1 $ try $ skipMany spacetab >> tableColFormat
tableColFormat :: PandocMonad m => GroffLexer m TableFormat tableColFormat :: PandocMonad m => GroffLexer m CellFormat
tableColFormat = do tableColFormat = do
pipePrefix' <- option False pipePrefix' <- option False
$ True <$ (try $ string "|" <* notFollowedBy spacetab) $ True <$ (try $ string "|" <* notFollowedBy spacetab)
@ -416,7 +433,7 @@ tableColFormat = do
else return "" else return ""
return $ x : num return $ x : num
pipeSuffix' <- option False $ True <$ string "|" pipeSuffix' <- option False $ True <$ string "|"
return $ TableFormat return $ CellFormat
{ columnType = c { columnType = c
, pipePrefix = pipePrefix' , pipePrefix = pipePrefix'
, pipeSuffix = pipeSuffix' , pipeSuffix = pipeSuffix'

View file

@ -34,7 +34,6 @@ Conversion of man to 'Pandoc' document.
module Text.Pandoc.Readers.Man (readMan) where module Text.Pandoc.Readers.Man (readMan) where
import Prelude import Prelude
import Safe (lastMay)
import Data.Char (toLower) import Data.Char (toLower)
import Data.Default (Default) import Data.Default (Default)
import Control.Monad (liftM, mzero, guard) import Control.Monad (liftM, mzero, guard)
@ -114,22 +113,22 @@ parseTable :: PandocMonad m => ManParser m Blocks
parseTable = do parseTable = do
let isMTable (MTable{}) = True let isMTable (MTable{}) = True
isMTable _ = False isMTable _ = False
MTable _opts aligns rows pos <- msatisfy isMTable MTable _opts rows pos <- msatisfy isMTable
case lastMay aligns of case rows of
Just as -> try (do ((as,_):_) -> try (do
let as' = map (columnTypeToAlignment . columnType) as let as' = map (columnTypeToAlignment . columnType) as
guard $ all isJust as' guard $ all isJust as'
let alignments = catMaybes as' let alignments = catMaybes as'
let (headerRow', bodyRows') = let (headerRow', bodyRows') =
case rows of case rows of
(h:[x]:bs) (h:x:bs)
| isHrule x -> (h, bs) | isHrule x -> (h, bs)
_ -> ([], rows) _ -> (([],[]), rows)
headerRow <- mapM parseTableCell headerRow' headerRow <- mapM parseTableCell $ snd headerRow'
bodyRows <- mapM (mapM parseTableCell) bodyRows' bodyRows <- mapM (mapM parseTableCell . snd) bodyRows'
return $ B.table mempty (zip alignments (repeat 0.0)) return $ B.table mempty (zip alignments (repeat 0.0))
headerRow bodyRows) <|> fallback pos headerRow bodyRows) <|> fallback pos
Nothing -> fallback pos [] -> fallback pos
where where
@ -146,11 +145,13 @@ parseTable = do
Left e -> throwError e Left e -> throwError e
Right x -> return x Right x -> return x
isHrule :: GroffTokens -> Bool isHrule :: TableRow -> Bool
isHrule (GroffTokens ss) = isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [GroffTokens ss]) =
case Foldable.toList ss of case Foldable.toList ss of
[MLine [RoffStr [c]]] -> c `elem` ['_','-','='] [MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
_ -> False _ -> False
isHrule _ = False
fallback pos = do fallback pos = do
report $ SkippedContent "TABLE" pos report $ SkippedContent "TABLE" pos