HTML reader/writer: Better handling of <q> with cite attribute (#5837)
* HTML reader: Handle cite attribute for quotes. If a `<q>` tag has a `cite` attribute, we interpret it as a Quoted element with an inner Span. Closes #5798 * Refactor url canonicalization into a helper function * Modify HTML writer to handle quote with cite. [0]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/q
This commit is contained in:
parent
91c325c714
commit
45479114e8
5 changed files with 69 additions and 25 deletions
|
@ -684,19 +684,27 @@ pSelfClosing f g = do
|
|||
return open
|
||||
|
||||
pQ :: PandocMonad m => TagParser m Inlines
|
||||
pQ = do
|
||||
context <- asks quoteContext
|
||||
let quoteType = case context of
|
||||
InDoubleQuote -> SingleQuote
|
||||
_ -> DoubleQuote
|
||||
let innerQuoteContext = if quoteType == SingleQuote
|
||||
then InSingleQuote
|
||||
else InDoubleQuote
|
||||
let constructor = case quoteType of
|
||||
SingleQuote -> B.singleQuoted
|
||||
DoubleQuote -> B.doubleQuoted
|
||||
withQuoteContext innerQuoteContext $
|
||||
pInlinesInTags "q" constructor
|
||||
pQ = choice $ map try [citedQuote, normalQuote]
|
||||
where citedQuote = do
|
||||
tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
|
||||
|
||||
url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag
|
||||
let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
|
||||
maybeFromAttrib "id" tag
|
||||
let cls = words $ T.unpack $ fromAttrib "class" tag
|
||||
|
||||
makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
|
||||
normalQuote = do
|
||||
pSatisfy $ tagOpenLit "q" (const True)
|
||||
makeQuote id
|
||||
makeQuote wrapper = do
|
||||
ctx <- asks quoteContext
|
||||
let (constructor, innerContext) = case ctx of
|
||||
InDoubleQuote -> (B.singleQuoted, InSingleQuote)
|
||||
_ -> (B.doubleQuoted, InDoubleQuote)
|
||||
|
||||
content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
|
||||
return $ extractSpaces (constructor . wrapper) content
|
||||
|
||||
pEmph :: PandocMonad m => TagParser m Inlines
|
||||
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
|
||||
|
@ -757,21 +765,13 @@ pLink = try $ do
|
|||
Nothing ->
|
||||
return $ extractSpaces (B.spanWith (uid, cls, [])) lab
|
||||
Just url' -> do
|
||||
mbBaseHref <- baseHref <$> getState
|
||||
let url = case (parseURIReference url', mbBaseHref) of
|
||||
(Just rel, Just bs) ->
|
||||
show (rel `nonStrictRelativeTo` bs)
|
||||
_ -> url'
|
||||
url <- canonicalizeUrl url'
|
||||
return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
|
||||
|
||||
pImage :: PandocMonad m => TagParser m Inlines
|
||||
pImage = do
|
||||
tag <- pSelfClosing (=="img") (isJust . lookup "src")
|
||||
mbBaseHref <- baseHref <$> getState
|
||||
let url' = T.unpack $ fromAttrib "src" tag
|
||||
let url = case (parseURIReference url', mbBaseHref) of
|
||||
(Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
|
||||
_ -> url'
|
||||
url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag
|
||||
let title = T.unpack $ fromAttrib "title" tag
|
||||
let alt = T.unpack $ fromAttrib "alt" tag
|
||||
let uid = T.unpack $ fromAttrib "id" tag
|
||||
|
@ -1292,6 +1292,17 @@ isSpace '\n' = True
|
|||
isSpace '\r' = True
|
||||
isSpace _ = False
|
||||
|
||||
-- Utilities
|
||||
|
||||
-- | Adjusts a url according to the document's base URL.
|
||||
canonicalizeUrl :: PandocMonad m => String -> TagParser m String
|
||||
canonicalizeUrl url = do
|
||||
mbBaseHref <- baseHref <$> getState
|
||||
return $ case (parseURIReference url, mbBaseHref) of
|
||||
(Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
|
||||
_ -> url
|
||||
|
||||
|
||||
-- Instances
|
||||
|
||||
instance HasMacros HTMLState where
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE MultiWayIf #-}
|
||||
{-# LANGUAGE NoImplicitPrelude #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
@ -1047,10 +1048,17 @@ inlineToHtml opts inline = do
|
|||
strToHtml "’")
|
||||
DoubleQuote -> (strToHtml "“",
|
||||
strToHtml "”")
|
||||
in if writerHtmlQTags opts
|
||||
|
||||
in if writerHtmlQTags opts
|
||||
then do
|
||||
modify $ \st -> st{ stQuotes = True }
|
||||
H.q `fmap` inlineListToHtml opts lst
|
||||
let (maybeAttr, lst') = case lst of
|
||||
[Span attr@(_, _, kvs) cs]
|
||||
| any ((=="cite") . fst) kvs
|
||||
-> (Just attr, cs)
|
||||
cs -> (Nothing, cs)
|
||||
H.q `fmap` inlineListToHtml opts lst'
|
||||
>>= maybe return (addAttrs opts) maybeAttr
|
||||
else (\x -> leftQuote >> x >> rightQuote)
|
||||
`fmap` inlineListToHtml opts lst
|
||||
(Math t str) -> do
|
||||
|
|
|
@ -13,6 +13,11 @@ import Text.Pandoc.Builder
|
|||
html :: (ToPandoc a) => a -> String
|
||||
html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc
|
||||
|
||||
htmlQTags :: (ToPandoc a) => a -> String
|
||||
htmlQTags = unpack
|
||||
. purely (writeHtml4String def{ writerWrapText = WrapNone, writerHtmlQTags = True })
|
||||
. toPandoc
|
||||
|
||||
{-
|
||||
"my test" =: X =?> Y
|
||||
|
||||
|
@ -48,4 +53,16 @@ tests = [ testGroup "inline code"
|
|||
definitionList [(mempty, [para $ text "foo bar"])]
|
||||
=?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>"
|
||||
]
|
||||
, testGroup "quotes"
|
||||
[ "quote with cite attribute (without q-tags)" =:
|
||||
doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
|
||||
=?> "“<span cite=\"http://example.org\">examples</span>”"
|
||||
, tQ "quote with cite attribute (with q-tags)" $
|
||||
doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
|
||||
=?> "<q cite=\"http://example.org\">examples</q>"
|
||||
]
|
||||
]
|
||||
where
|
||||
tQ :: (ToString a, ToPandoc a)
|
||||
=> String -> (a, String) -> TestTree
|
||||
tQ = test htmlQTags
|
||||
|
|
|
@ -81,6 +81,10 @@ span.pandocNoteMarker { }
|
|||
</blockquote>
|
||||
<p>And a following paragraph.</p>
|
||||
<hr />
|
||||
<h1>Inline quotes</h1>
|
||||
<p>Normal text but then a <q cite="https://www.imdb.com/title/tt0062622/quotes/qt0396921">inline quote</q>.</p>
|
||||
<p><q>Missing a cite attribute means its just normal text</q></p>
|
||||
<hr />
|
||||
<h1>Code Blocks</h1>
|
||||
<p>Code:</p>
|
||||
<pre><code>---- (should be four hyphens)
|
||||
|
|
|
@ -51,6 +51,10 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
|
|||
[Para [Str "Don't",Space,Str "quote",Space,Str "me."]]]
|
||||
,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."]
|
||||
,HorizontalRule
|
||||
,Header 1 ("inline-quotes",[],[]) [Str "Inline",Space,Str "quotes"]
|
||||
,Para [Str "Normal",Space,Str "text",Space,Str "but",Space,Str "then",Space,Str "a",Space,Quoted DoubleQuote [Span ("",[],[("cite","https://www.imdb.com/title/tt0062622/quotes/qt0396921")]) [Str "inline",Space,Str "quote"]],Str "."]
|
||||
,Para [Quoted DoubleQuote [Str "Missing",Space,Str "a",Space,Str "cite",Space,Str "attribute",Space,Str "means",Space,Str "its",Space,Str "just",Space,Str "normal",Space,Str "text"]]
|
||||
,HorizontalRule
|
||||
,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"]
|
||||
,Para [Str "Code:"]
|
||||
,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab"
|
||||
|
|
Loading…
Add table
Reference in a new issue