T.P.Readers.Groff: improve LinePart.
Separate font change and font size change tokens. With this change, emphasis no longer works. This needs to be implemented in the parser, not the lexer.
This commit is contained in:
parent
6c71100fcf
commit
e4726518af
2 changed files with 120 additions and 138 deletions
|
@ -33,7 +33,6 @@ Tokenizer for groff formats (man, ms).
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Readers.Groff
|
module Text.Pandoc.Readers.Groff
|
||||||
( FontKind(..)
|
( FontKind(..)
|
||||||
, Font
|
|
||||||
, MacroKind
|
, MacroKind
|
||||||
, LinePart(..)
|
, LinePart(..)
|
||||||
, Arg
|
, Arg
|
||||||
|
@ -54,8 +53,6 @@ import Text.Pandoc.Class
|
||||||
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
|
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
|
||||||
import Data.Default (Default)
|
import Data.Default (Default)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Set (Set)
|
|
||||||
import qualified Data.Set as S
|
|
||||||
import Data.List (intercalate, isSuffixOf)
|
import Data.List (intercalate, isSuffixOf)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Text.Pandoc.Builder as B
|
import Text.Pandoc.Builder as B
|
||||||
|
@ -79,9 +76,9 @@ data FontKind = Bold | Italic | Monospace | Regular deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
type MacroKind = String
|
type MacroKind = String
|
||||||
|
|
||||||
type Font = Set FontKind
|
data LinePart = RoffStr String
|
||||||
|
| Font [FontKind]
|
||||||
data LinePart = RoffStr (String, Font)
|
| FontSize Int
|
||||||
| MacroArg Int
|
| MacroArg Int
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
|
@ -100,20 +97,19 @@ newtype ManTokens = ManTokens { unManTokens :: Seq.Seq ManToken }
|
||||||
singleTok :: ManToken -> ManTokens
|
singleTok :: ManToken -> ManTokens
|
||||||
singleTok t = ManTokens (Seq.singleton t)
|
singleTok t = ManTokens (Seq.singleton t)
|
||||||
|
|
||||||
data RoffState = RoffState { fontKind :: Font
|
data RoffState = RoffState { customMacros :: M.Map String ManTokens
|
||||||
, customMacros :: M.Map String ManTokens
|
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Default RoffState where
|
instance Default RoffState where
|
||||||
def = RoffState { customMacros = M.fromList
|
def = RoffState { customMacros = M.fromList
|
||||||
$ map (\(n, s) ->
|
$ map (\(n, s) ->
|
||||||
(n, singleTok
|
(n, singleTok
|
||||||
(MLine [RoffStr (s, mempty)])))
|
(MLine [RoffStr s])))
|
||||||
[ ("Tm", "\x2122")
|
[ ("Tm", "\x2122")
|
||||||
, ("lq", "\x201C")
|
, ("lq", "\x201C")
|
||||||
, ("rq", "\x201D")
|
, ("rq", "\x201D")
|
||||||
, ("R", "\x00AE") ]
|
, ("R", "\x00AE") ]
|
||||||
, fontKind = S.singleton Regular }
|
}
|
||||||
|
|
||||||
type ManLexer m = ParserT [Char] RoffState m
|
type ManLexer m = ParserT [Char] RoffState m
|
||||||
|
|
||||||
|
@ -135,15 +131,16 @@ combiningAccentsMap :: M.Map String Char
|
||||||
combiningAccentsMap =
|
combiningAccentsMap =
|
||||||
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
|
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
|
||||||
|
|
||||||
escapeLexer :: PandocMonad m => ManLexer m String
|
escape :: PandocMonad m => ManLexer m [LinePart]
|
||||||
escapeLexer = try $ do
|
escape = do
|
||||||
char '\\'
|
char '\\'
|
||||||
c <- noneOf ['*','$'] -- see escStar, macroArg
|
c <- anyChar
|
||||||
case c of
|
case c of
|
||||||
'(' -> twoCharGlyph
|
|
||||||
'[' -> bracketedGlyph
|
|
||||||
'f' -> escFont
|
'f' -> escFont
|
||||||
's' -> escFontSize
|
's' -> escFontSize
|
||||||
|
'*' -> escStar
|
||||||
|
'(' -> twoCharGlyph
|
||||||
|
'[' -> bracketedGlyph
|
||||||
'"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment
|
'"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment
|
||||||
'#' -> mempty <$ manyTill anyChar newline
|
'#' -> mempty <$ manyTill anyChar newline
|
||||||
'%' -> return mempty
|
'%' -> return mempty
|
||||||
|
@ -154,27 +151,27 @@ escapeLexer = try $ do
|
||||||
':' -> return mempty
|
':' -> return mempty
|
||||||
'0' -> return mempty
|
'0' -> return mempty
|
||||||
'c' -> return mempty
|
'c' -> return mempty
|
||||||
'-' -> return "-"
|
'-' -> return [RoffStr "-"]
|
||||||
'_' -> return "_"
|
'_' -> return [RoffStr "_"]
|
||||||
' ' -> return " "
|
' ' -> return [RoffStr " "]
|
||||||
'\\' -> return "\\"
|
'\\' -> return [RoffStr "\\"]
|
||||||
't' -> return "\t"
|
't' -> return [RoffStr "\t"]
|
||||||
'e' -> return "\\"
|
'e' -> return [RoffStr "\\"]
|
||||||
'`' -> return "`"
|
'`' -> return [RoffStr "`"]
|
||||||
'^' -> return " "
|
'^' -> return [RoffStr " "]
|
||||||
'|' -> return " "
|
'|' -> return [RoffStr " "]
|
||||||
'\'' -> return "`"
|
'\'' -> return [RoffStr "`"]
|
||||||
'.' -> return "`"
|
'.' -> return [RoffStr "`"]
|
||||||
'~' -> return "\160" -- nonbreaking space
|
'~' -> return [RoffStr "\160"] -- nonbreaking space
|
||||||
_ -> escUnknown ['\\',c] "\xFFFD"
|
_ -> escUnknown ['\\',c]
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
twoCharGlyph = do
|
twoCharGlyph = do
|
||||||
cs <- count 2 anyChar
|
cs <- count 2 anyChar
|
||||||
case M.lookup cs characterCodeMap of
|
case M.lookup cs characterCodeMap of
|
||||||
Just c -> return [c]
|
Just c -> return [RoffStr [c]]
|
||||||
Nothing -> escUnknown ('\\':'(':cs) "\xFFFD"
|
Nothing -> escUnknown ('\\':'(':cs)
|
||||||
|
|
||||||
bracketedGlyph = unicodeGlyph <|> charGlyph
|
bracketedGlyph = unicodeGlyph <|> charGlyph
|
||||||
|
|
||||||
|
@ -184,7 +181,7 @@ escapeLexer = try $ do
|
||||||
[] -> mzero
|
[] -> mzero
|
||||||
[s] -> case M.lookup s characterCodeMap of
|
[s] -> case M.lookup s characterCodeMap of
|
||||||
Nothing -> mzero
|
Nothing -> mzero
|
||||||
Just c -> return [c]
|
Just c -> return [RoffStr [c]]
|
||||||
(s:ss) -> do
|
(s:ss) -> do
|
||||||
basechar <- case M.lookup cs characterCodeMap of
|
basechar <- case M.lookup cs characterCodeMap of
|
||||||
Nothing ->
|
Nothing ->
|
||||||
|
@ -200,10 +197,12 @@ escapeLexer = try $ do
|
||||||
case M.lookup a combiningAccentsMap of
|
case M.lookup a combiningAccentsMap of
|
||||||
Just x -> addAccents as (x:xs)
|
Just x -> addAccents as (x:xs)
|
||||||
Nothing -> mzero
|
Nothing -> mzero
|
||||||
addAccents ss [basechar])
|
addAccents ss [basechar] >>= \xs -> return [RoffStr xs])
|
||||||
<|> escUnknown ("\\[" ++ cs ++ "]") "\xFFFD"
|
<|> escUnknown ("\\[" ++ cs ++ "]")
|
||||||
|
|
||||||
unicodeGlyph = try $ ucharCode `sepBy1` (char '_') <* char ']'
|
unicodeGlyph = try $ do
|
||||||
|
xs <- ucharCode `sepBy1` (char '_') <* char ']'
|
||||||
|
return [RoffStr xs]
|
||||||
|
|
||||||
ucharCode = try $ do
|
ucharCode = try $ do
|
||||||
char 'u'
|
char 'u'
|
||||||
|
@ -214,53 +213,66 @@ escapeLexer = try $ do
|
||||||
Nothing -> mzero
|
Nothing -> mzero
|
||||||
Just c -> return c
|
Just c -> return c
|
||||||
|
|
||||||
-- \s-1 \s0 -- we ignore these
|
escUnknown :: PandocMonad m => String -> ManLexer m [LinePart]
|
||||||
escFontSize :: PandocMonad m => ManLexer m String
|
escUnknown s = do
|
||||||
escFontSize = do
|
|
||||||
pos <- getPosition
|
|
||||||
pm <- option "" $ count 1 (oneOf "+-")
|
|
||||||
ds <- many1 digit
|
|
||||||
report $ SkippedContent ("\\s" ++ pm ++ ds) pos
|
|
||||||
return mempty
|
|
||||||
|
|
||||||
escFont :: PandocMonad m => ManLexer m String
|
|
||||||
escFont = do
|
|
||||||
font <- choice
|
|
||||||
[ S.singleton <$> letterFontKind
|
|
||||||
, char '(' >> anyChar >> anyChar >> return (S.singleton Regular)
|
|
||||||
, char 'S' >> return (S.singleton Regular)
|
|
||||||
, try lettersFont
|
|
||||||
, digit >> return (S.singleton Regular)
|
|
||||||
]
|
|
||||||
modifyState (\r -> r {fontKind = font})
|
|
||||||
return mempty
|
|
||||||
|
|
||||||
lettersFont :: PandocMonad m => ManLexer m Font
|
|
||||||
lettersFont = do
|
|
||||||
char '['
|
|
||||||
fs <- many letterFontKind
|
|
||||||
skipMany letter
|
|
||||||
char ']'
|
|
||||||
return $ S.fromList fs
|
|
||||||
|
|
||||||
letterFontKind :: PandocMonad m => ManLexer m FontKind
|
|
||||||
letterFontKind = choice [
|
|
||||||
oneOf ['B','b'] >> return Bold
|
|
||||||
, oneOf ['I','i'] >> return Italic
|
|
||||||
, oneOf ['C','c'] >> return Monospace
|
|
||||||
, oneOf ['P','p','R','r'] >> return Regular
|
|
||||||
]
|
|
||||||
|
|
||||||
escUnknown :: PandocMonad m => String -> a -> ManLexer m a
|
|
||||||
escUnknown s x = do
|
|
||||||
pos <- getPosition
|
pos <- getPosition
|
||||||
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
|
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
|
||||||
return x
|
return [RoffStr "\xFFFD"]
|
||||||
|
|
||||||
currentFont :: PandocMonad m => ManLexer m Font
|
-- \s-1 \s0
|
||||||
currentFont = fontKind <$> getState
|
escFontSize :: PandocMonad m => ManLexer m [LinePart]
|
||||||
|
escFontSize = do
|
||||||
|
let sign = option "" $ count 1 (oneOf "+-")
|
||||||
|
let toFontSize xs =
|
||||||
|
case safeRead xs of
|
||||||
|
Nothing -> mzero
|
||||||
|
Just n -> return [FontSize n]
|
||||||
|
choice
|
||||||
|
[ do char '('
|
||||||
|
s <- sign
|
||||||
|
ds <- count 2 digit
|
||||||
|
toFontSize (s ++ ds)
|
||||||
|
, do char '['
|
||||||
|
s <- sign
|
||||||
|
ds <- many1 digit
|
||||||
|
char ']'
|
||||||
|
toFontSize (s ++ ds)
|
||||||
|
, do s <- sign
|
||||||
|
ds <- count 1 digit
|
||||||
|
toFontSize (s ++ ds)
|
||||||
|
]
|
||||||
|
|
||||||
-- separate function from lexMacro since real man files sometimes do not follow the rules
|
escFont :: PandocMonad m => ManLexer m [LinePart]
|
||||||
|
escFont = do
|
||||||
|
font <- choice
|
||||||
|
[ char 'S' >> return [Regular]
|
||||||
|
, digit >> return [Regular]
|
||||||
|
, (:[]) <$> letterFontKind
|
||||||
|
, char '(' >> anyChar >> anyChar >> return [Regular]
|
||||||
|
, lettersFont
|
||||||
|
, digit >> return [Regular]
|
||||||
|
]
|
||||||
|
return [Font font]
|
||||||
|
|
||||||
|
lettersFont :: PandocMonad m => ManLexer m [FontKind]
|
||||||
|
lettersFont = try $ do
|
||||||
|
char '['
|
||||||
|
fs <- many letterFontKind
|
||||||
|
skipMany letter
|
||||||
|
char ']'
|
||||||
|
return fs
|
||||||
|
|
||||||
|
letterFontKind :: PandocMonad m => ManLexer m FontKind
|
||||||
|
letterFontKind = choice [
|
||||||
|
oneOf ['B','b'] >> return Bold
|
||||||
|
, oneOf ['I','i'] >> return Italic
|
||||||
|
, oneOf ['C','c'] >> return Monospace
|
||||||
|
, oneOf ['P','p','R','r'] >> return Regular
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
-- separate function from lexMacro since real man files sometimes do not
|
||||||
|
-- follow the rules
|
||||||
lexComment :: PandocMonad m => ManLexer m ManTokens
|
lexComment :: PandocMonad m => ManLexer m ManTokens
|
||||||
lexComment = do
|
lexComment = do
|
||||||
try $ string ".\\\""
|
try $ string ".\\\""
|
||||||
|
@ -310,11 +322,11 @@ lexTable = do
|
||||||
lexConditional :: PandocMonad m => ManLexer m ManTokens
|
lexConditional :: PandocMonad m => ManLexer m ManTokens
|
||||||
lexConditional = do
|
lexConditional = do
|
||||||
skipMany spacetab
|
skipMany spacetab
|
||||||
parseNCond <|> skipConditional
|
lexNCond <|> skipConditional
|
||||||
|
|
||||||
-- n means nroff mode
|
-- n means nroff mode
|
||||||
parseNCond :: PandocMonad m => ManLexer m ManTokens
|
lexNCond :: PandocMonad m => ManLexer m ManTokens
|
||||||
parseNCond = do
|
lexNCond = do
|
||||||
char '\n'
|
char '\n'
|
||||||
many1 spacetab
|
many1 spacetab
|
||||||
lexGroup <|> manToken
|
lexGroup <|> manToken
|
||||||
|
@ -355,11 +367,11 @@ resolveMacro macroName args pos = do
|
||||||
case M.lookup macroName macros of
|
case M.lookup macroName macros of
|
||||||
Nothing -> return $ singleTok $ MMacro macroName args pos
|
Nothing -> return $ singleTok $ MMacro macroName args pos
|
||||||
Just ts -> do
|
Just ts -> do
|
||||||
let fillLP (RoffStr (x,y)) zs = RoffStr (x,y) : zs
|
let fillLP (MacroArg i) zs =
|
||||||
fillLP (MacroArg i) zs =
|
|
||||||
case drop (i - 1) args of
|
case drop (i - 1) args of
|
||||||
[] -> zs
|
[] -> zs
|
||||||
(ys:_) -> ys ++ zs
|
(ys:_) -> ys ++ zs
|
||||||
|
fillLP z zs = z : zs
|
||||||
let fillMacroArg (MLine lineparts) =
|
let fillMacroArg (MLine lineparts) =
|
||||||
MLine (foldr fillLP [] lineparts)
|
MLine (foldr fillLP [] lineparts)
|
||||||
fillMacroArg x = x
|
fillMacroArg x = x
|
||||||
|
@ -370,7 +382,7 @@ lexStringDef args = do -- string definition
|
||||||
case args of
|
case args of
|
||||||
[] -> fail "No argument to .ds"
|
[] -> fail "No argument to .ds"
|
||||||
(x:ys) -> do
|
(x:ys) -> do
|
||||||
let ts = singleTok $ MLine (intercalate [RoffStr (" ", mempty)] ys)
|
let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
|
||||||
let stringName = linePartsToString x
|
let stringName = linePartsToString x
|
||||||
modifyState $ \st ->
|
modifyState $ \st ->
|
||||||
st{ customMacros = M.insert stringName ts (customMacros st) }
|
st{ customMacros = M.insert stringName ts (customMacros st) }
|
||||||
|
@ -413,21 +425,16 @@ lexArgs = do
|
||||||
plainArg :: PandocMonad m => ManLexer m [LinePart]
|
plainArg :: PandocMonad m => ManLexer m [LinePart]
|
||||||
plainArg = do
|
plainArg = do
|
||||||
skipMany spacetab
|
skipMany spacetab
|
||||||
mconcat <$> many1
|
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
|
||||||
(macroArg <|> esc <|> regularText <|> unescapedQuote <|> escStar)
|
|
||||||
where
|
where
|
||||||
unescapedQuote = do
|
unescapedQuote = char '"' >> return [RoffStr "\""]
|
||||||
char '"'
|
|
||||||
fonts <- currentFont
|
|
||||||
return [RoffStr ("\"", fonts)]
|
|
||||||
|
|
||||||
|
|
||||||
quotedArg :: PandocMonad m => ManLexer m [LinePart]
|
quotedArg :: PandocMonad m => ManLexer m [LinePart]
|
||||||
quotedArg = do
|
quotedArg = do
|
||||||
skipMany spacetab
|
skipMany spacetab
|
||||||
char '"'
|
char '"'
|
||||||
xs <- mconcat <$>
|
xs <- mconcat <$>
|
||||||
many (macroArg <|> esc <|> escStar <|> regularText
|
many (macroArg <|> escape <|> regularText
|
||||||
<|> spaceTabChar <|> escapedQuote)
|
<|> spaceTabChar <|> escapedQuote)
|
||||||
char '"'
|
char '"'
|
||||||
return xs
|
return xs
|
||||||
|
@ -435,14 +442,11 @@ lexArgs = do
|
||||||
escapedQuote = try $ do
|
escapedQuote = try $ do
|
||||||
char '"'
|
char '"'
|
||||||
char '"'
|
char '"'
|
||||||
fonts <- currentFont
|
return [RoffStr "\""]
|
||||||
return [RoffStr ("\"", fonts)]
|
|
||||||
|
|
||||||
escStar :: PandocMonad m => ManLexer m [LinePart]
|
escStar :: PandocMonad m => ManLexer m [LinePart]
|
||||||
escStar = try $ do
|
escStar = try $ do
|
||||||
pos <- getPosition
|
pos <- getPosition
|
||||||
char '\\'
|
|
||||||
char '*'
|
|
||||||
c <- anyChar
|
c <- anyChar
|
||||||
case c of
|
case c of
|
||||||
'(' -> do
|
'(' -> do
|
||||||
|
@ -474,11 +478,11 @@ lexLine = do
|
||||||
where -- return empty line if we only have empty strings;
|
where -- return empty line if we only have empty strings;
|
||||||
-- this can happen if the line just contains \f[C], for example.
|
-- this can happen if the line just contains \f[C], for example.
|
||||||
go [] = return mempty
|
go [] = return mempty
|
||||||
go (RoffStr ("",_):xs) = go xs
|
go (RoffStr "" : xs) = go xs
|
||||||
go xs = return $ singleTok $ MLine xs
|
go xs = return $ singleTok $ MLine xs
|
||||||
|
|
||||||
linePart :: PandocMonad m => ManLexer m [LinePart]
|
linePart :: PandocMonad m => ManLexer m [LinePart]
|
||||||
linePart = macroArg <|> esc <|> escStar <|>
|
linePart = macroArg <|> escape <|>
|
||||||
regularText <|> quoteChar <|> spaceTabChar
|
regularText <|> quoteChar <|> spaceTabChar
|
||||||
|
|
||||||
macroArg :: PandocMonad m => ManLexer m [LinePart]
|
macroArg :: PandocMonad m => ManLexer m [LinePart]
|
||||||
|
@ -487,29 +491,20 @@ macroArg = try $ do
|
||||||
x <- digit
|
x <- digit
|
||||||
return [MacroArg $ ord x - ord '0']
|
return [MacroArg $ ord x - ord '0']
|
||||||
|
|
||||||
esc :: PandocMonad m => ManLexer m [LinePart]
|
|
||||||
esc = do
|
|
||||||
s <- escapeLexer
|
|
||||||
font <- currentFont
|
|
||||||
return [RoffStr (s, font)]
|
|
||||||
|
|
||||||
regularText :: PandocMonad m => ManLexer m [LinePart]
|
regularText :: PandocMonad m => ManLexer m [LinePart]
|
||||||
regularText = do
|
regularText = do
|
||||||
s <- many1 $ noneOf "\n\r\t \\\""
|
s <- many1 $ noneOf "\n\r\t \\\""
|
||||||
font <- currentFont
|
return [RoffStr s]
|
||||||
return [RoffStr (s, font)]
|
|
||||||
|
|
||||||
quoteChar :: PandocMonad m => ManLexer m [LinePart]
|
quoteChar :: PandocMonad m => ManLexer m [LinePart]
|
||||||
quoteChar = do
|
quoteChar = do
|
||||||
char '"'
|
char '"'
|
||||||
font <- currentFont
|
return [RoffStr "\""]
|
||||||
return [RoffStr ("\"", font)]
|
|
||||||
|
|
||||||
spaceTabChar :: PandocMonad m => ManLexer m [LinePart]
|
spaceTabChar :: PandocMonad m => ManLexer m [LinePart]
|
||||||
spaceTabChar = do
|
spaceTabChar = do
|
||||||
c <- spacetab
|
c <- spacetab
|
||||||
font <- currentFont
|
return [RoffStr [c]]
|
||||||
return [RoffStr ([c], font)]
|
|
||||||
|
|
||||||
lexEmptyLine :: PandocMonad m => ManLexer m ManTokens
|
lexEmptyLine :: PandocMonad m => ManLexer m ManTokens
|
||||||
lexEmptyLine = char '\n' >> return (singleTok MEmptyLine)
|
lexEmptyLine = char '\n' >> return (singleTok MEmptyLine)
|
||||||
|
@ -520,5 +515,5 @@ manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
|
||||||
linePartsToString :: [LinePart] -> String
|
linePartsToString :: [LinePart] -> String
|
||||||
linePartsToString = mconcat . map go
|
linePartsToString = mconcat . map go
|
||||||
where
|
where
|
||||||
go (RoffStr (s, _)) = s
|
go (RoffStr s) = s
|
||||||
go _ = mempty
|
go _ = mempty
|
||||||
|
|
|
@ -39,7 +39,6 @@ import Control.Monad (liftM, mzero, guard)
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
import Text.Pandoc.Class (PandocMonad(..), report)
|
import Text.Pandoc.Class (PandocMonad(..), report)
|
||||||
import Data.Maybe (catMaybes)
|
import Data.Maybe (catMaybes)
|
||||||
import qualified Data.Set as S
|
|
||||||
import Data.List (intersperse, intercalate)
|
import Data.List (intersperse, intercalate)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Text.Pandoc.Builder as B
|
import Text.Pandoc.Builder as B
|
||||||
|
@ -174,34 +173,16 @@ parseTitle = do
|
||||||
return mempty
|
return mempty
|
||||||
|
|
||||||
linePartsToInlines :: [LinePart] -> Inlines
|
linePartsToInlines :: [LinePart] -> Inlines
|
||||||
linePartsToInlines = go
|
linePartsToInlines = go []
|
||||||
|
|
||||||
where
|
where
|
||||||
go [] = mempty
|
go :: [[FontKind]] -> [LinePart] -> Inlines
|
||||||
go (MacroArg _:xs) = go xs -- shouldn't happen
|
go _ [] = mempty
|
||||||
go xs@(RoffStr{} : _) =
|
go fs (MacroArg _:xs) = go fs xs -- shouldn't happen
|
||||||
if lb > 0 && lb >= li
|
go fs (RoffStr s : xs) = text s <> go fs xs
|
||||||
then strong (go (removeFont Bold bolds)) <> go (drop lb xs)
|
go (_:fs) (Font [] : xs) = go fs xs -- return to previous font
|
||||||
else if li > 0
|
go fs (Font _newfonts : xs) = go fs xs
|
||||||
then emph (go (removeFont Italic italics)) <> go (drop li xs)
|
go fonts (FontSize _fs : xs) = go fonts xs
|
||||||
else text (linePartsToString regulars) <> go (drop lr xs)
|
|
||||||
|
|
||||||
where
|
|
||||||
(lb, li, lr) = (length bolds, length italics, length regulars)
|
|
||||||
|
|
||||||
removeFont font = map (removeFont' font)
|
|
||||||
removeFont' font (RoffStr (s,f)) = RoffStr (s, S.delete font f)
|
|
||||||
removeFont' _ x = x
|
|
||||||
|
|
||||||
bolds = takeWhile isBold xs
|
|
||||||
italics = takeWhile isItalic xs
|
|
||||||
regulars = takeWhile (\x -> not (isBold x || isItalic x)) xs
|
|
||||||
|
|
||||||
isBold (RoffStr (_,f)) = Bold `S.member` f
|
|
||||||
isBold _ = False
|
|
||||||
|
|
||||||
isItalic (RoffStr (_,f)) = Italic `S.member` f
|
|
||||||
isItalic _ = False
|
|
||||||
|
|
||||||
parsePara :: PandocMonad m => ManParser m Blocks
|
parsePara :: PandocMonad m => ManParser m Blocks
|
||||||
parsePara = para . trimInlines <$> parseInlines
|
parsePara = para . trimInlines <$> parseInlines
|
||||||
|
@ -289,7 +270,13 @@ parseCodeBlock = try $ do
|
||||||
where
|
where
|
||||||
|
|
||||||
extractText :: ManToken -> Maybe String
|
extractText :: ManToken -> Maybe String
|
||||||
extractText (MLine ss) = Just $ linePartsToString ss
|
extractText (MLine ss)
|
||||||
|
| not (null ss)
|
||||||
|
, all isFontToken ss = Nothing
|
||||||
|
| otherwise = Just $ linePartsToString ss
|
||||||
|
where isFontToken (FontSize{}) = True
|
||||||
|
isFontToken (Font{}) = True
|
||||||
|
isFontToken _ = False
|
||||||
extractText MEmptyLine = Just ""
|
extractText MEmptyLine = Just ""
|
||||||
-- string are intercalated with '\n', this prevents double '\n'
|
-- string are intercalated with '\n', this prevents double '\n'
|
||||||
extractText _ = Nothing
|
extractText _ = Nothing
|
||||||
|
|
Loading…
Add table
Reference in a new issue