nested lists
This commit is contained in:
parent
1d7c71189a
commit
9030c5ae46
1 changed files with 82 additions and 50 deletions
|
@ -36,6 +36,7 @@ module Text.Pandoc.Readers.Man (readMan, testFile) where
|
||||||
import Prelude
|
import Prelude
|
||||||
import Control.Monad (liftM)
|
import Control.Monad (liftM)
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
|
import Data.Char (isDigit, isUpper, isLower)
|
||||||
import Data.Default (Default)
|
import Data.Default (Default)
|
||||||
import Data.Map (insert)
|
import Data.Map (insert)
|
||||||
import Data.Maybe (catMaybes)
|
import Data.Maybe (catMaybes)
|
||||||
|
@ -64,18 +65,18 @@ data MacroKind = KTitle
|
||||||
| KCodeBlEnd
|
| KCodeBlEnd
|
||||||
| KTab
|
| KTab
|
||||||
| KTabEnd
|
| KTabEnd
|
||||||
|
| KSubTab
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- TODO header strings
|
type RoffStr = (String, FontKind)
|
||||||
-- TODO remove MStr
|
|
||||||
-- TODO filter skipped content
|
data ManToken = MStr RoffStr
|
||||||
data ManToken = MStr String FontKind
|
| MLine [RoffStr]
|
||||||
| MLine [(String, FontKind)]
|
|
||||||
| MMaybeLink String
|
| MMaybeLink String
|
||||||
| MEmptyLine
|
| MEmptyLine
|
||||||
| MHeader Int String
|
| MHeader Int [RoffStr]
|
||||||
| MMacro MacroKind [String]
|
| MMacro MacroKind [RoffStr]
|
||||||
| MUnknownMacro String [String]
|
| MUnknownMacro String [RoffStr]
|
||||||
| MComment String
|
| MComment String
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
|
@ -152,7 +153,7 @@ lexMan = many (lexMacro <|> lexLine <|> lexEmptyLine)
|
||||||
|
|
||||||
parseMan :: PandocMonad m => ManParser m Pandoc
|
parseMan :: PandocMonad m => ManParser m Pandoc
|
||||||
parseMan = do
|
parseMan = do
|
||||||
let parsers = [ parseBulletList, parseTitle, parsePara, parseSkippedContent
|
let parsers = [ try parseBulletList, parseTitle, parsePara, parseSkippedContent
|
||||||
, parseCodeBlock, parseHeader, parseSkipMacro]
|
, parseCodeBlock, parseHeader, parseSkipMacro]
|
||||||
blocks <- many $ choice parsers
|
blocks <- many $ choice parsers
|
||||||
parserst <- getState
|
parserst <- getState
|
||||||
|
@ -212,13 +213,18 @@ escapeLexer = do
|
||||||
, char 'P' >> return Regular
|
, char 'P' >> return Regular
|
||||||
]
|
]
|
||||||
|
|
||||||
|
currentFont :: PandocMonad m => ManLexer m FontKind
|
||||||
|
currentFont = do
|
||||||
|
RoffState {fontKind = fk} <- getState
|
||||||
|
return fk
|
||||||
|
|
||||||
lexMacro :: PandocMonad m => ManLexer m ManToken
|
lexMacro :: PandocMonad m => ManLexer m ManToken
|
||||||
lexMacro = do
|
lexMacro = do
|
||||||
char '.' <|> char '\''
|
char '.' <|> char '\''
|
||||||
many spacetab
|
many spacetab
|
||||||
macroName <- many1 (letter <|> oneOf ['\\', '"'])
|
macroName <- many1 (letter <|> oneOf ['\\', '"'])
|
||||||
args <- lexArgs
|
args <- lexArgs
|
||||||
let joinedArgs = unwords args
|
let joinedArgs = unwords $ fst <$> args
|
||||||
let knownMacro mkind = MMacro mkind args
|
let knownMacro mkind = MMacro mkind args
|
||||||
|
|
||||||
let tok = case macroName of
|
let tok = case macroName of
|
||||||
|
@ -227,21 +233,22 @@ lexMacro = do
|
||||||
"IP" -> knownMacro KTab
|
"IP" -> knownMacro KTab
|
||||||
"TP" -> knownMacro KTab
|
"TP" -> knownMacro KTab
|
||||||
"RE" -> knownMacro KTabEnd
|
"RE" -> knownMacro KTabEnd
|
||||||
|
"RS" -> knownMacro KSubTab
|
||||||
"nf" -> knownMacro KCodeBlStart
|
"nf" -> knownMacro KCodeBlStart
|
||||||
"fi" -> knownMacro KCodeBlEnd
|
"fi" -> knownMacro KCodeBlEnd
|
||||||
"B" -> MStr joinedArgs Bold
|
"B" -> MStr (joinedArgs,Bold)
|
||||||
"BR" -> MMaybeLink joinedArgs
|
"BR" -> MMaybeLink joinedArgs
|
||||||
x | x `elem` ["BI", "IB"] -> MStr joinedArgs ItalicBold
|
x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, ItalicBold)
|
||||||
x | x `elem` ["I", "IR", "RI"] -> MStr joinedArgs Italic
|
x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, Italic)
|
||||||
"SH" -> MHeader 2 joinedArgs
|
"SH" -> MHeader 2 args
|
||||||
"SS" -> MHeader 3 joinedArgs
|
"SS" -> MHeader 3 args
|
||||||
x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine
|
x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine
|
||||||
_ -> MUnknownMacro macroName args
|
_ -> MUnknownMacro macroName args
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
lexArgs :: PandocMonad m => ManLexer m [String]
|
lexArgs :: PandocMonad m => ManLexer m [RoffStr]
|
||||||
lexArgs = do
|
lexArgs = do
|
||||||
args <- many oneArg
|
args <- many oneArg
|
||||||
eofline
|
eofline
|
||||||
|
@ -249,20 +256,24 @@ lexMacro = do
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
oneArg :: PandocMonad m => ManLexer m String
|
oneArg :: PandocMonad m => ManLexer m RoffStr
|
||||||
oneArg = do
|
oneArg = do
|
||||||
many1 spacetab
|
many1 spacetab
|
||||||
quotedArg <|> plainArg
|
quotedArg <|> plainArg
|
||||||
|
|
||||||
plainArg :: PandocMonad m => ManLexer m String
|
plainArg :: PandocMonad m => ManLexer m RoffStr
|
||||||
plainArg = fmap catMaybes . many1 $ escChar <|> (Just <$> noneOf " \t\n")
|
plainArg = do
|
||||||
|
arg <- many1 $ escChar <|> (Just <$> noneOf " \t\n")
|
||||||
|
f <- currentFont
|
||||||
|
return (catMaybes arg, f)
|
||||||
|
|
||||||
quotedArg :: PandocMonad m => ManLexer m String
|
quotedArg :: PandocMonad m => ManLexer m RoffStr
|
||||||
quotedArg = do
|
quotedArg = do
|
||||||
char '"'
|
char '"'
|
||||||
val <- catMaybes <$> many quotedChar
|
val <- catMaybes <$> many quotedChar
|
||||||
char '"'
|
char '"'
|
||||||
return val
|
f <- currentFont
|
||||||
|
return (val, f)
|
||||||
|
|
||||||
quotedChar :: PandocMonad m => ManLexer m (Maybe Char)
|
quotedChar :: PandocMonad m => ManLexer m (Maybe Char)
|
||||||
quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"'))
|
quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"'))
|
||||||
|
@ -296,11 +307,6 @@ lexLine = do
|
||||||
font <- currentFont
|
font <- currentFont
|
||||||
return $ Just (lnpart, font)
|
return $ Just (lnpart, font)
|
||||||
|
|
||||||
currentFont :: PandocMonad m => ManLexer m FontKind
|
|
||||||
currentFont = do
|
|
||||||
RoffState {fontKind = fk} <- getState
|
|
||||||
return fk
|
|
||||||
|
|
||||||
|
|
||||||
lexEmptyLine :: PandocMonad m => ManLexer m ManToken
|
lexEmptyLine :: PandocMonad m => ManLexer m ManToken
|
||||||
lexEmptyLine = char '\n' >> return MEmptyLine
|
lexEmptyLine = char '\n' >> return MEmptyLine
|
||||||
|
@ -313,11 +319,11 @@ msatisfy :: (Show t, Stream s m t) => (t -> Bool) -> ParserT s st m t
|
||||||
msatisfy predic = tokenPrim show nextPos testTok
|
msatisfy predic = tokenPrim show nextPos testTok
|
||||||
where
|
where
|
||||||
testTok t = if predic t then Just t else Nothing
|
testTok t = if predic t then Just t else Nothing
|
||||||
nextPos pos x _xs = updatePosString pos (show x)
|
nextPos pos x _xs = updatePosString (setSourceLine pos $ sourceLine pos + (if predic x then 1 else 0)) (show x)
|
||||||
|
|
||||||
mstr :: PandocMonad m => ManParser m ManToken
|
mstr :: PandocMonad m => ManParser m ManToken
|
||||||
mstr = msatisfy isMStr where
|
mstr = msatisfy isMStr where
|
||||||
isMStr (MStr _ _) = True
|
isMStr (MStr _) = True
|
||||||
isMStr _ = False
|
isMStr _ = False
|
||||||
|
|
||||||
mline :: PandocMonad m => ManParser m ManToken
|
mline :: PandocMonad m => ManParser m ManToken
|
||||||
|
@ -371,7 +377,7 @@ parseTitle = do
|
||||||
if null args
|
if null args
|
||||||
then return Null
|
then return Null
|
||||||
else do
|
else do
|
||||||
let mantitle = head args
|
let mantitle = fst $ head args
|
||||||
modifyState (changeTitle mantitle)
|
modifyState (changeTitle mantitle)
|
||||||
return $ Header 1 nullAttr [Str mantitle]
|
return $ Header 1 nullAttr [Str mantitle]
|
||||||
where
|
where
|
||||||
|
@ -395,29 +401,32 @@ parseSkippedContent = do
|
||||||
logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
|
logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
|
||||||
onToken _ = return ()
|
onToken _ = return ()
|
||||||
|
|
||||||
strToInline :: String -> FontKind -> Inline
|
strToInline :: RoffStr -> Inline
|
||||||
strToInline s Regular = Str s
|
strToInline (s, Regular) = Str s
|
||||||
strToInline s Italic = Emph [Str s]
|
strToInline (s, Italic) = Emph [Str s]
|
||||||
strToInline s Bold = Strong [Str s]
|
strToInline (s, Bold) = Strong [Str s]
|
||||||
strToInline s ItalicBold = Strong [Emph [Str s]]
|
strToInline (s, ItalicBold) = Strong [Emph [Str s]]
|
||||||
|
|
||||||
parsePara :: PandocMonad m => ManParser m Block
|
parsePara :: PandocMonad m => ManParser m Block
|
||||||
parsePara = do
|
parsePara = Para <$> parseInlines
|
||||||
|
|
||||||
|
parseInlines :: PandocMonad m => ManParser m [Inline]
|
||||||
|
parseInlines = do
|
||||||
inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment)
|
inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment)
|
||||||
let withspaces = intersperse [Space] inls
|
let withspaces = intersperse [Space] inls
|
||||||
return $ Para (concat withspaces)
|
return $ concat withspaces
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
strInl :: PandocMonad m => ManParser m [Inline]
|
strInl :: PandocMonad m => ManParser m [Inline]
|
||||||
strInl = do
|
strInl = do
|
||||||
(MStr str fk) <- mstr
|
(MStr rstr) <- mstr
|
||||||
return [strToInline str fk]
|
return [strToInline rstr]
|
||||||
|
|
||||||
lineInl :: PandocMonad m => ManParser m [Inline]
|
lineInl :: PandocMonad m => ManParser m [Inline]
|
||||||
lineInl = do
|
lineInl = do
|
||||||
(MLine fragments) <- mline
|
(MLine fragments) <- mline
|
||||||
return $ fmap (\(s,f) -> strToInline s f) fragments
|
return $ strToInline <$> fragments
|
||||||
|
|
||||||
linkInl :: PandocMonad m => ManParser m [Inline]
|
linkInl :: PandocMonad m => ManParser m [Inline]
|
||||||
linkInl = do
|
linkInl = do
|
||||||
|
@ -458,7 +467,7 @@ parseCodeBlock = do
|
||||||
where
|
where
|
||||||
|
|
||||||
extractText :: ManToken -> Maybe String
|
extractText :: ManToken -> Maybe String
|
||||||
extractText (MStr s _) = Just s
|
extractText (MStr (s, _)) = Just s
|
||||||
extractText (MLine ss) = Just . concat $ map fst ss -- TODO maybe unwords?
|
extractText (MLine ss) = Just . concat $ map fst ss -- TODO maybe unwords?
|
||||||
extractText (MMaybeLink s) = Just s
|
extractText (MMaybeLink s) = Just s
|
||||||
extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n'
|
extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n'
|
||||||
|
@ -466,20 +475,43 @@ parseCodeBlock = do
|
||||||
|
|
||||||
parseHeader :: PandocMonad m => ManParser m Block
|
parseHeader :: PandocMonad m => ManParser m Block
|
||||||
parseHeader = do
|
parseHeader = do
|
||||||
(MHeader lvl s) <- mheader
|
(MHeader lvl ss) <- mheader
|
||||||
return $ Header lvl nullAttr [Str s]
|
return $ Header lvl nullAttr $ intersperse Space $ strToInline <$> ss
|
||||||
|
|
||||||
parseBulletList :: PandocMonad m => ManParser m Block
|
parseBulletList :: PandocMonad m => ManParser m Block
|
||||||
parseBulletList = BulletList . map (: []) <$> many1 block
|
parseBulletList = BulletList <$> many1 paras where
|
||||||
|
|
||||||
where
|
macroIPInl :: [RoffStr] -> [Inline]
|
||||||
|
macroIPInl (x:_:[]) = [strToInline x, Space]
|
||||||
|
macroIPInl _ = []
|
||||||
|
|
||||||
block :: PandocMonad m => ManParser m Block
|
listKind :: [RoffStr] -> Maybe ([[Block]] -> Block)
|
||||||
block = do
|
listKind (((c:_), _):_:[]) =
|
||||||
mmacro KTab
|
let params style = OrderedList (1, style, DefaultDelim)
|
||||||
pars <- parsePara
|
in Just $ case c of
|
||||||
many $ mmacro KTabEnd
|
_ | isDigit c -> params Decimal
|
||||||
return pars
|
_ | isUpper c -> params UpperAlpha
|
||||||
|
_ | isLower c -> params LowerAlpha
|
||||||
|
_ -> BulletList
|
||||||
|
|
||||||
|
listKind _ = Nothing
|
||||||
|
|
||||||
|
paras :: PandocMonad m => ManParser m [Block]
|
||||||
|
paras = do
|
||||||
|
(MMacro _ args) <- mmacro KTab
|
||||||
|
let lk = listKind args
|
||||||
|
inls <- parseInlines
|
||||||
|
let macroinl = macroIPInl args
|
||||||
|
let para = Plain $ macroinl ++ inls
|
||||||
|
subls <- many sublist
|
||||||
|
return $ para : subls
|
||||||
|
|
||||||
|
sublist :: PandocMonad m => ManParser m Block
|
||||||
|
sublist = do
|
||||||
|
mmacro KSubTab
|
||||||
|
bl <- parseBulletList
|
||||||
|
mmacro KTabEnd
|
||||||
|
return bl
|
||||||
|
|
||||||
-- In case of weird man file it will be parsed succesfully
|
-- In case of weird man file it will be parsed succesfully
|
||||||
parseSkipMacro :: PandocMonad m => ManParser m Block
|
parseSkipMacro :: PandocMonad m => ManParser m Block
|
||||||
|
|
Loading…
Add table
Reference in a new issue