Fixed regression in CSS parsing with --self-contained
.
In 1b44acf0c5
we replaced some
hackish CSS parsing with css-text, which I thought was a complete
CSS parser. It turns out that it is very buggy, which results
in lots of things being silently dropped from CSS when
`--self-contained` is used (#2224).
This commit replaces the use of css-text with a small but
more principled css preprocessor, which only removes whitespace
and replaces URLs with base 64 data when possible.
Closes #2224.
This commit is contained in:
parent
2768d1c2d2
commit
ed9a118b54
2 changed files with 44 additions and 37 deletions
|
@ -262,7 +262,6 @@ Library
|
|||
temporary >= 1.1 && < 1.3,
|
||||
blaze-html >= 0.5 && < 0.9,
|
||||
blaze-markup >= 0.5.1 && < 0.8,
|
||||
css-text >= 0.1.2 && < 0.3,
|
||||
yaml >= 0.8.8.2 && < 0.9,
|
||||
scientific >= 0.2 && < 0.4,
|
||||
vector >= 0.10 && < 0.11,
|
||||
|
|
|
@ -40,20 +40,16 @@ import System.FilePath (takeExtension, takeDirectory, (</>))
|
|||
import Data.Char (toLower, isAscii, isAlphaNum)
|
||||
import Codec.Compression.GZip as Gzip
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Text.Pandoc.Shared (renderTags', err, fetchItem')
|
||||
import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim)
|
||||
import Text.Pandoc.MediaBag (MediaBag)
|
||||
import Text.Pandoc.MIME (MimeType)
|
||||
import Text.Pandoc.UTF8 (toString)
|
||||
import Text.Pandoc.Options (WriterOptions(..))
|
||||
import Data.List (isPrefixOf)
|
||||
import Control.Applicative
|
||||
import Text.CSS.Parse (parseNestedBlocks, NestedBlock(..))
|
||||
import Text.CSS.Render (renderNestedBlocks)
|
||||
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
|
||||
import qualified Data.Text as T
|
||||
import Data.Text (Text)
|
||||
import Data.Text.Lazy (toStrict)
|
||||
import Data.Text.Lazy.Builder (toLazyText)
|
||||
import Text.Parsec (runParserT, ParsecT)
|
||||
import qualified Text.Parsec as P
|
||||
import Control.Monad.Trans (lift)
|
||||
|
||||
isOk :: Char -> Bool
|
||||
isOk c = isAscii c && isAlphaNum c
|
||||
|
@ -94,40 +90,52 @@ convertTag media sourceURL t@(TagOpen "link" as) =
|
|||
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
|
||||
convertTag _ _ t = return t
|
||||
|
||||
-- NOTE: This is really crude, it doesn't respect CSS comments.
|
||||
cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
|
||||
-> IO ByteString
|
||||
cssURLs media sourceURL d orig = do
|
||||
case parseNestedBlocks (decodeUtf8 orig) of
|
||||
Left _err -> return orig
|
||||
Right bs -> (encodeUtf8 . toStrict . toLazyText . renderNestedBlocks)
|
||||
<$> mapM (handleCSSUrls media sourceURL d) bs
|
||||
res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
|
||||
case res of
|
||||
Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
|
||||
Right bs -> return bs
|
||||
|
||||
handleCSSUrls :: MediaBag -> Maybe String -> FilePath -> NestedBlock
|
||||
-> IO NestedBlock
|
||||
handleCSSUrls media sourceURL d (NestedBlock t bs) =
|
||||
NestedBlock t <$> mapM (handleCSSUrls media sourceURL d) bs
|
||||
handleCSSUrls media sourceURL d (LeafBlock (selector, attrs)) = do
|
||||
attrs' <- mapM (handleCSSAttr media sourceURL d) attrs
|
||||
return (LeafBlock (selector, attrs'))
|
||||
parseCSSUrls :: MediaBag -> Maybe String -> FilePath
|
||||
-> ParsecT ByteString () IO ByteString
|
||||
parseCSSUrls media sourceURL d = B.concat <$> P.many
|
||||
(pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
|
||||
|
||||
handleCSSAttr :: MediaBag -> Maybe String -> FilePath -> (Text, Text)
|
||||
-> IO (Text, Text)
|
||||
handleCSSAttr media sourceURL d (key, val) =
|
||||
if "url(" `T.isPrefixOf` val
|
||||
then do
|
||||
let url = T.unpack $ dropParens $ T.drop 3 val
|
||||
case url of
|
||||
'#':_ -> return (key, val)
|
||||
'd':'a':'t':'a':':':_ -> return (key, val)
|
||||
_ -> do
|
||||
let url' = if isURI url then url else d </> url
|
||||
enc <- getDataURI media sourceURL "" url'
|
||||
return (key, T.pack enc)
|
||||
else return (key, val)
|
||||
pCSSWhite :: ParsecT ByteString () IO ByteString
|
||||
pCSSWhite = P.space >> P.spaces >> return B.empty
|
||||
|
||||
pCSSComment :: ParsecT ByteString () IO ByteString
|
||||
pCSSComment = P.try $ do
|
||||
P.string "/*"
|
||||
P.manyTill P.anyChar (P.try (P.string "*/"))
|
||||
return B.empty
|
||||
|
||||
pCSSOther :: ParsecT ByteString () IO ByteString
|
||||
pCSSOther = do
|
||||
(B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
|
||||
(B.singleton <$> P.char 'u') <|>
|
||||
(B.singleton <$> P.char '/')
|
||||
|
||||
pCSSUrl :: MediaBag -> Maybe String -> FilePath
|
||||
-> ParsecT ByteString () IO ByteString
|
||||
pCSSUrl media sourceURL d = P.try $ do
|
||||
P.string "url("
|
||||
P.spaces
|
||||
quote <- P.option Nothing (Just <$> P.oneOf "\"'")
|
||||
url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote)
|
||||
P.spaces
|
||||
P.char ')'
|
||||
let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
|
||||
maybe "" (:[]) quote ++ ")")
|
||||
case trim url of
|
||||
'#':_ -> return fallback
|
||||
'd':'a':'t':'a':':':_ -> return fallback
|
||||
u -> do let url' = if isURI u then u else d </> u
|
||||
enc <- lift $ getDataURI media sourceURL "" url'
|
||||
return (B.pack enc)
|
||||
|
||||
dropParens :: Text -> Text
|
||||
dropParens = T.dropAround (`elem` ['(',')','"','\'',' ','\t','\n','\r'])
|
||||
|
||||
getDataURI :: MediaBag -> Maybe String -> MimeType -> String
|
||||
-> IO String
|
||||
|
|
Loading…
Add table
Reference in a new issue