HTML Reader: Parse <ol> type, class, and inline list-style(-type) CSS

This commit is contained in:
Ophir Lifshitz 2015-07-24 02:53:17 -04:00
parent 8390d935d8
commit 7ef8700734
3 changed files with 67 additions and 18 deletions

View file

@ -358,7 +358,8 @@ Library
Text.Pandoc.Templates,
Text.Pandoc.XML,
Text.Pandoc.SelfContained,
Text.Pandoc.Process
Text.Pandoc.Process,
Text.Pandoc.CSS
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Reducible,
Text.Pandoc.Readers.Docx.Parse,

35
src/Text/Pandoc/CSS.hs Normal file
View file

@ -0,0 +1,35 @@
module Text.Pandoc.CSS ( foldOrElse,
pickStyleAttrProps
)
where
import Text.Pandoc.Shared (trim)
import Text.Parsec
import Text.Parsec.String
import Control.Applicative ((<*))
ruleParser :: Parser (String, String)
ruleParser = do
p <- many1 (noneOf ":") <* char ':'
v <- many1 (noneOf ":;") <* char ';' <* spaces
return (trim p, trim v)
styleAttrParser :: Parser [(String, String)]
styleAttrParser = do
p <- many1 ruleParser
return p
orElse :: Eq a => a -> a -> a -> a
orElse v x y = if v == x then y else x
foldOrElse :: Eq a => a -> [a] -> a
foldOrElse v xs = foldr (orElse v) v xs
eitherToMaybe :: Either a b -> Maybe b
eitherToMaybe (Right x) = Just x
eitherToMaybe _ = Nothing
pickStyleAttrProps :: [String] -> String -> Maybe String
pickStyleAttrProps lookupProps styleAttr = do
styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
foldOrElse Nothing $ map (flip lookup styles) lookupProps

View file

@ -64,6 +64,7 @@ import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Network.URI (isURI)
import Text.Pandoc.Error
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Parsec.Error
@ -252,6 +253,22 @@ pListItem nonItem = do
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
(liDiv <>) <$> pInTags "li" block <* skipMany nonItem
parseListStyleType :: String -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman
parseListStyleType "upper-roman" = UpperRoman
parseListStyleType "lower-alpha" = LowerAlpha
parseListStyleType "upper-alpha" = UpperAlpha
parseListStyleType "decimal" = Decimal
parseListStyleType _ = DefaultStyle
parseTypeAttr :: String -> ListNumberStyle
parseTypeAttr "i" = LowerRoman
parseTypeAttr "I" = UpperRoman
parseTypeAttr "a" = LowerAlpha
parseTypeAttr "A" = UpperAlpha
parseTypeAttr "1" = Decimal
parseTypeAttr _ = DefaultStyle
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@ -261,23 +278,19 @@ pOrderedList = try $ do
sta' = if all isDigit sta
then read sta
else 1
sty = fromMaybe (fromMaybe "" $
lookup "style" attribs) $
lookup "class" attribs
sty' = case sty of
"lower-roman" -> LowerRoman
"upper-roman" -> UpperRoman
"lower-alpha" -> LowerAlpha
"upper-alpha" -> UpperAlpha
"decimal" -> Decimal
_ ->
case lookup "type" attribs of
Just "1" -> Decimal
Just "I" -> UpperRoman
Just "i" -> LowerRoman
Just "A" -> UpperAlpha
Just "a" -> LowerAlpha
_ -> DefaultStyle
pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
typeAttr = fromMaybe "" $ lookup "type" attribs
classAttr = fromMaybe "" $ lookup "class" attribs
styleAttr = fromMaybe "" $ lookup "style" attribs
listStyle = fromMaybe "" $ pickListStyle styleAttr
sty' = foldOrElse DefaultStyle
[ parseTypeAttr typeAttr
, parseListStyleType classAttr
, parseListStyleType listStyle
]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))