From 52df18f476b7eb7935c2c01f566fa1adee4a8621 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Oct 2018 21:22:39 -0700 Subject: [PATCH] Groff tokenizer: introduce TableRow type, handle .T&. Closes #5020. --- src/Text/Pandoc/Readers/Groff.hs | 41 ++++++++++++++++++++++---------- src/Text/Pandoc/Readers/Man.hs | 23 +++++++++--------- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/Text/Pandoc/Readers/Groff.hs b/src/Text/Pandoc/Readers/Groff.hs index a752c6445..ed0b3a1ca 100644 --- a/src/Text/Pandoc/Readers/Groff.hs +++ b/src/Text/Pandoc/Readers/Groff.hs @@ -38,7 +38,8 @@ module Text.Pandoc.Readers.Groff , LinePart(..) , Arg , TableOption - , TableFormat(..) + , CellFormat(..) + , TableRow , GroffToken(..) , GroffTokens(..) , linePartsToString @@ -47,6 +48,7 @@ module Text.Pandoc.Readers.Groff where import Prelude +import Safe (lastDef) import Control.Monad (void, mzero, guard, when) import Control.Monad.Except (throwError) import Text.Pandoc.Class @@ -92,20 +94,21 @@ type Arg = [LinePart] type TableOption = (String, String) -data TableFormat = - TableFormat +data CellFormat = + CellFormat { columnType :: Char , pipePrefix :: Bool , pipeSuffix :: Bool , columnSuffixes :: [String] } deriving (Show, Eq, Ord) +type TableRow = ([CellFormat], [GroffTokens]) -- TODO parse tables (see man tbl) data GroffToken = MLine [LinePart] | MEmptyLine | MMacro MacroKind [Arg] SourcePos - | MTable [TableOption] [[TableFormat]] [[GroffTokens]] SourcePos + | MTable [TableOption] [TableRow] SourcePos deriving Show newtype GroffTokens = GroffTokens { unGroffTokens :: Seq.Seq GroffToken } @@ -346,13 +349,27 @@ lexTable pos = do spaces skipMany lexComment spaces + rows <- lexTableRows + morerows <- many $ try $ do + string ".T&" + skipMany spacetab + newline + lexTableRows + string ".TE" + skipMany spacetab + eofline + return $ singleTok $ MTable opts (rows ++ concat morerows) pos + +lexTableRows :: PandocMonad m => GroffLexer m [TableRow] +lexTableRows = do aligns <- tableFormatSpec spaces skipMany lexComment spaces - rows <- manyTill tableRow (try (string ".TE" >> skipMany spacetab >> eofline)) - return $ singleTok $ MTable opts aligns rows pos - + rows <- many (notFollowedBy (try (string ".TE") <|> try (string ".T&")) >> + tableRow) + return $ zip aligns rows + tableCell :: PandocMonad m => GroffLexer m GroffTokens tableCell = (enclosedCell <|> simpleCell) >>= lexGroff . T.pack where @@ -387,18 +404,18 @@ tableOption = do skipMany spacetab return (k,v) -tableFormatSpec :: PandocMonad m => GroffLexer m [[TableFormat]] +tableFormatSpec :: PandocMonad m => GroffLexer m [[CellFormat]] tableFormatSpec = do speclines <- tableFormatSpecLine `sepBy1` (newline <|> char ',') skipMany spacetab char '.' - return speclines + return $ speclines ++ repeat (lastDef [] speclines) -- last line is default -tableFormatSpecLine :: PandocMonad m => GroffLexer m [TableFormat] +tableFormatSpecLine :: PandocMonad m => GroffLexer m [CellFormat] tableFormatSpecLine = many1 $ try $ skipMany spacetab >> tableColFormat -tableColFormat :: PandocMonad m => GroffLexer m TableFormat +tableColFormat :: PandocMonad m => GroffLexer m CellFormat tableColFormat = do pipePrefix' <- option False $ True <$ (try $ string "|" <* notFollowedBy spacetab) @@ -416,7 +433,7 @@ tableColFormat = do else return "" return $ x : num pipeSuffix' <- option False $ True <$ string "|" - return $ TableFormat + return $ CellFormat { columnType = c , pipePrefix = pipePrefix' , pipeSuffix = pipeSuffix' diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 5007aaab2..90f266e6d 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -34,7 +34,6 @@ Conversion of man to 'Pandoc' document. module Text.Pandoc.Readers.Man (readMan) where import Prelude -import Safe (lastMay) import Data.Char (toLower) import Data.Default (Default) import Control.Monad (liftM, mzero, guard) @@ -114,22 +113,22 @@ parseTable :: PandocMonad m => ManParser m Blocks parseTable = do let isMTable (MTable{}) = True isMTable _ = False - MTable _opts aligns rows pos <- msatisfy isMTable - case lastMay aligns of - Just as -> try (do + MTable _opts rows pos <- msatisfy isMTable + case rows of + ((as,_):_) -> try (do let as' = map (columnTypeToAlignment . columnType) as guard $ all isJust as' let alignments = catMaybes as' let (headerRow', bodyRows') = case rows of - (h:[x]:bs) + (h:x:bs) | isHrule x -> (h, bs) - _ -> ([], rows) - headerRow <- mapM parseTableCell headerRow' - bodyRows <- mapM (mapM parseTableCell) bodyRows' + _ -> (([],[]), rows) + headerRow <- mapM parseTableCell $ snd headerRow' + bodyRows <- mapM (mapM parseTableCell . snd) bodyRows' return $ B.table mempty (zip alignments (repeat 0.0)) headerRow bodyRows) <|> fallback pos - Nothing -> fallback pos + [] -> fallback pos where @@ -146,11 +145,13 @@ parseTable = do Left e -> throwError e Right x -> return x - isHrule :: GroffTokens -> Bool - isHrule (GroffTokens ss) = + isHrule :: TableRow -> Bool + isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] + isHrule (_, [GroffTokens ss]) = case Foldable.toList ss of [MLine [RoffStr [c]]] -> c `elem` ['_','-','='] _ -> False + isHrule _ = False fallback pos = do report $ SkippedContent "TABLE" pos