This commit is contained in:
Yan Pas 2018-10-16 03:12:06 +03:00
parent 1684e918b2
commit ce27bf9a02
2 changed files with 62 additions and 68 deletions

View file

@ -38,22 +38,22 @@ import Control.Monad (liftM, void)
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Data.Char (isDigit, isUpper, isLower) import Data.Char (isDigit, isUpper, isLower)
import Data.Default (Default) import Data.Default (Default)
import Data.Functor (($>))
import Data.Map (insert) import Data.Map (insert)
import Data.Set (Set, singleton, fromList, toList) import Data.Set (Set, singleton)
import qualified Data.Set as S (fromList, toList)
import Data.Maybe (catMaybes, fromMaybe, isNothing) import Data.Maybe (catMaybes, fromMaybe, isNothing)
import Data.List (intersperse, intercalate) import Data.List (intersperse, intercalate)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition import Text.Pandoc.Builder as B hiding (singleton)
import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Parsing import Text.Pandoc.Parsing
import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Shared (crFilter)
import Text.Parsec hiding (tokenPrim) import Text.Parsec hiding (tokenPrim, space)
import Text.Parsec.Char () import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString) import Text.Parsec.Pos (updatePosString)
-- --
@ -158,14 +158,10 @@ parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do parseMan = do
let parsers = [ try parseList, parseTitle, parsePara, parseSkippedContent let parsers = [ try parseList, parseTitle, parsePara, parseSkippedContent
, try parseCodeBlock, parseHeader, parseSkipMacro] , try parseCodeBlock, parseHeader, parseSkipMacro]
blocks <- many $ choice parsers bs <- many $ choice parsers
parserst <- getState let (Pandoc _ blocks) = doc $ mconcat bs
return $ Pandoc (stateMeta parserst) (filter (not . isNull) blocks) meta <- stateMeta <$> getState
return $ Pandoc meta blocks
where
isNull Null = True
isNull _ = False
eofline :: Stream s m Char => ParsecT s u m () eofline :: Stream s m Char => ParsecT s u m ()
eofline = void newline <|> eof eofline = void newline <|> eof
@ -216,7 +212,7 @@ escapeLexer = do
fs <- many letterFontKind fs <- many letterFontKind
many letter many letter
char ']' char ']'
return $ fromList fs return $ S.fromList fs
letterFontKind :: PandocMonad m => ManLexer m FontKind letterFontKind :: PandocMonad m => ManLexer m FontKind
letterFontKind = choice [ letterFontKind = choice [
@ -240,7 +236,7 @@ currentFont = fontKind <$> getState
lexComment :: PandocMonad m => ManLexer m ManToken lexComment :: PandocMonad m => ManLexer m ManToken
lexComment = do lexComment = do
try $ string ".\\\"" try $ string ".\\\""
many space many Parsec.space
body <- many $ noneOf "\n" body <- many $ noneOf "\n"
char '\n' char '\n'
return $ MComment body return $ MComment body
@ -265,7 +261,7 @@ lexMacro = do
"fi" -> knownMacro KCodeBlEnd "fi" -> knownMacro KCodeBlEnd
"B" -> MStr (joinedArgs, singleton Bold) "B" -> MStr (joinedArgs, singleton Bold)
"BR" -> MMaybeLink joinedArgs "BR" -> MMaybeLink joinedArgs
x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, fromList [Italic, Bold]) x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, S.fromList [Italic, Bold])
x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, singleton Italic) x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, singleton Italic)
"SH" -> MHeader 2 args "SH" -> MHeader 2 args
"SS" -> MHeader 3 args "SS" -> MHeader 3 args
@ -403,15 +399,15 @@ mcomment = msatisfy isMComment where
-- ManToken -> Block functions -- ManToken -> Block functions
-- --
parseTitle :: PandocMonad m => ManParser m Block parseTitle :: PandocMonad m => ManParser m Blocks
parseTitle = do parseTitle = do
(MMacro _ args) <- mmacro KTitle (MMacro _ args) <- mmacro KTitle
if null args if null args
then return Null then return mempty
else do else do
let mantitle = fst $ head args let mantitle = fst $ head args
modifyState (changeTitle mantitle) modifyState (changeTitle mantitle)
return $ Header 1 nullAttr [Str mantitle] return $ header 1 $ str mantitle
where where
changeTitle title pst = changeTitle title pst =
let meta = stateMeta pst let meta = stateMeta pst
@ -419,11 +415,11 @@ parseTitle = do
in in
pst {stateMeta = metaUp} pst {stateMeta = metaUp}
parseSkippedContent :: PandocMonad m => ManParser m Block parseSkippedContent :: PandocMonad m => ManParser m Blocks
parseSkippedContent = do parseSkippedContent = do
tok <- munknownMacro <|> mcomment <|> memplyLine tok <- munknownMacro <|> mcomment <|> memplyLine
onToken tok onToken tok
return Null return mempty
where where
@ -433,50 +429,50 @@ parseSkippedContent = do
logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
onToken _ = return () onToken _ = return ()
strToInline :: RoffStr -> Inline strToInlines :: RoffStr -> Inlines
strToInline (s, fonts) = inner $ toList fonts where strToInlines (s, fonts) = inner $ S.toList fonts where
inner :: [FontKind] -> Inline inner :: [FontKind] -> Inlines
inner [] = Str s inner [] = str s
inner (Bold:fs) = Strong [inner fs] inner (Bold:fs) = strong $ inner fs
inner (Italic:fs) = Emph [inner fs] inner (Italic:fs) = emph $ inner fs
-- Monospace goes after Bold and Italic in ordered set -- Monospace goes after Bold and Italic in ordered set
inner (Monospace:_) = Code nullAttr s inner (Monospace:_) = code s
inner (Regular:fs) = inner fs inner (Regular:fs) = inner fs
parsePara :: PandocMonad m => ManParser m Block parsePara :: PandocMonad m => ManParser m Blocks
parsePara = Para <$> parseInlines parsePara = para <$> parseInlines
parseInlines :: PandocMonad m => ManParser m [Inline] parseInlines :: PandocMonad m => ManParser m Inlines
parseInlines = do parseInlines = do
inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment) inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment)
let withspaces = intersperse [Space] inls let withspaces = intersperse B.space inls
return $ concat withspaces return $ mconcat withspaces
where where
strInl :: PandocMonad m => ManParser m [Inline] strInl :: PandocMonad m => ManParser m Inlines
strInl = do strInl = do
(MStr rstr) <- mstr (MStr rstr) <- mstr
return [strToInline rstr] return $ strToInlines rstr
lineInl :: PandocMonad m => ManParser m [Inline] lineInl :: PandocMonad m => ManParser m Inlines
lineInl = do lineInl = do
(MLine fragments) <- mline (MLine fragments) <- mline
return $ strToInline <$> fragments return $ mconcat $ strToInlines <$> fragments
linkInl :: PandocMonad m => ManParser m [Inline] linkInl :: PandocMonad m => ManParser m Inlines
linkInl = do linkInl = do
(MMaybeLink txt) <- mmaybeLink (MMaybeLink txt) <- mmaybeLink
let inls = case runParser linkParser () "" txt of let inls = case runParser linkParser () "" txt of
Right lnk -> lnk Right lnk -> lnk
Left _ -> [Strong [Str txt]] Left _ -> strong $ str txt
return inls return inls
where where
-- assuming man pages are generated from Linux-like repository -- assuming man pages are generated from Linux-like repository
linkParser :: Parsec String () [Inline] linkParser :: Parsec String () Inlines
linkParser = do linkParser = do
mpage <- many1 (alphaNum <|> char '_') mpage <- many1 (alphaNum <|> char '_')
spacetab spacetab
@ -485,21 +481,19 @@ parseInlines = do
char ')' char ')'
other <- many anyChar other <- many anyChar
let manurl pagename section = "../"++section++"/"++pagename++"."++section let manurl pagename section = "../"++section++"/"++pagename++"."++section
return $ [ Link nullAttr [Strong [Str mpage]] (manurl mpage [mansect], mpage) lnkInls = link (manurl mpage [mansect]) mpage (strong $ str mpage)
, Strong [Str $ " ("++[mansect] ++ ")" return $ lnkInls <> strong (str (" ("++[mansect] ++ ")") <> str other)
, Str other]
]
comment :: PandocMonad m => ManParser m [Inline] comment :: PandocMonad m => ManParser m Inlines
comment = mcomment >> return [] comment = mcomment >> return mempty
parseCodeBlock :: PandocMonad m => ManParser m Block parseCodeBlock :: PandocMonad m => ManParser m Blocks
parseCodeBlock = do parseCodeBlock = do
mmacro KCodeBlStart mmacro KCodeBlStart
toks <- many (mstr <|> mline <|> mmaybeLink <|> memplyLine <|> munknownMacro <|> mcomment) toks <- many (mstr <|> mline <|> mmaybeLink <|> memplyLine <|> munknownMacro <|> mcomment)
mmacro KCodeBlEnd mmacro KCodeBlEnd
return $ CodeBlock nullAttr (intercalate "\n" . catMaybes $ extractText <$> toks) return $ codeBlock (intercalate "\n" . catMaybes $ extractText <$> toks)
where where
@ -510,14 +504,14 @@ parseCodeBlock = do
extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n' extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n'
extractText _ = Nothing extractText _ = Nothing
parseHeader :: PandocMonad m => ManParser m Block parseHeader :: PandocMonad m => ManParser m Blocks
parseHeader = do parseHeader = do
(MHeader lvl ss) <- mheader (MHeader lvl ss) <- mheader
return $ Header lvl nullAttr $ intersperse Space $ strToInline <$> ss return $ header lvl (mconcat $ intersperse B.space $ strToInlines <$> ss)
type ListBuilder = [[Block]] -> Block type ListBuilder = [Blocks] -> Blocks
parseList :: PandocMonad m => ManParser m Block parseList :: PandocMonad m => ManParser m Blocks
parseList = do parseList = do
xx <- many1 paras xx <- many1 paras
let bls = map snd xx let bls = map snd xx
@ -526,13 +520,13 @@ parseList = do
where where
macroIPInl :: [RoffStr] -> [Inline] macroIPInl :: [RoffStr] -> Inlines
macroIPInl (x:_:[]) = [strToInline x, Space] macroIPInl (x:_:[]) = strToInlines x <> B.space
macroIPInl _ = [] macroIPInl _ = mempty
listKind :: [RoffStr] -> Maybe ListBuilder listKind :: [RoffStr] -> Maybe ListBuilder
listKind (((c:_), _):_:[]) = listKind (((c:_), _):_:[]) =
let params style = OrderedList (1, style, DefaultDelim) let params style = orderedListWith (1, style, DefaultDelim)
in case c of in case c of
_ | isDigit c -> Just $ params Decimal _ | isDigit c -> Just $ params Decimal
_ | isUpper c -> Just $ params UpperAlpha _ | isUpper c -> Just $ params UpperAlpha
@ -541,18 +535,18 @@ parseList = do
listKind _ = Nothing listKind _ = Nothing
paras :: PandocMonad m => ManParser m (ListBuilder, [Block]) paras :: PandocMonad m => ManParser m (ListBuilder, Blocks)
paras = do paras = do
(MMacro _ args) <- mmacro KTab (MMacro _ args) <- mmacro KTab
let lbuilderOpt = listKind args let lbuilderOpt = listKind args
lbuilder = fromMaybe BulletList lbuilderOpt lbuilder = fromMaybe bulletList lbuilderOpt
macroinl = macroIPInl args macroinl = macroIPInl args
inls <- parseInlines inls <- parseInlines
let parainls = if isNothing lbuilderOpt then macroinl ++ inls else inls let parainls = if isNothing lbuilderOpt then macroinl <> inls else inls
subls <- many sublist subls <- mconcat <$> many sublist
return $ (lbuilder, (Plain parainls) : subls) return $ (lbuilder, plain parainls <> subls)
sublist :: PandocMonad m => ManParser m Block sublist :: PandocMonad m => ManParser m Blocks
sublist = do sublist = do
mmacro KSubTab mmacro KSubTab
bl <- parseList bl <- parseList
@ -560,5 +554,5 @@ parseList = do
return bl return bl
-- In case of weird man file it will be parsed succesfully -- In case of weird man file it will be parsed succesfully
parseSkipMacro :: PandocMonad m => ManParser m Block parseSkipMacro :: PandocMonad m => ManParser m Blocks
parseSkipMacro = mmacroAny >> return Null parseSkipMacro = mmacroAny >> mempty

View file

@ -45,7 +45,7 @@ tests = [
=?> (para $ space <> str "aaa") =?> (para $ space <> str "aaa")
, "link" =: , "link" =:
".BR aa (1)" ".BR aa (1)"
=?> (para $ fromList [Link nullAttr [Strong [Str "aa"]] ("../1/aa.1","aa"), Strong [Str " (1)",Str ""]]) =?> (para $ link "../1/aa.1" "aa" (strong $ str "aa") <> (strong $ str " (1)"))
], ],
testGroup "Escapes" [ testGroup "Escapes" [
"fonts" =: "fonts" =:
@ -53,13 +53,13 @@ tests = [
=?> (para $ str "aa" <> (emph $ str "bb") <> str "cc") =?> (para $ str "aa" <> (emph $ str "bb") <> str "cc")
, "skip" =: , "skip" =:
"a\\%\\{\\}\\\n\\:b\\0" "a\\%\\{\\}\\\n\\:b\\0"
=?> (para $ fromList $ map Str ["a", "b"]) =?> (para $ str "ab")
, "replace" =: , "replace" =:
"\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq" "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq"
=?> (para $ fromList $ map Str ["-", " ", "\\", "", "", "", "", "«", "»"]) =?> (para $ str "- \\“”—–«»")
, "replace2" =: , "replace2" =:
"\\t\\e\\`\\^\\|\\'" "\\t\\e\\`\\^\\|\\'"
=?> (para $ fromList $ map Str ["\t", "\\", "`", " ", " ", "`"]) =?> (para $ str "\t\\` `")
], ],
testGroup "Lists" [ testGroup "Lists" [
"bullet" =: "bullet" =: