HTML reader: Support base tag.
We only support the href attribute, as there's no place for "target" in the Pandoc document model for links. Added HTML reader test module, with tests for this feature. Closes #1751.
This commit is contained in:
parent
75cfa7b462
commit
e06810499e
4 changed files with 58 additions and 7 deletions
|
@ -481,6 +481,7 @@ Test-Suite test-pandoc
|
||||||
Tests.Shared
|
Tests.Shared
|
||||||
Tests.Walk
|
Tests.Walk
|
||||||
Tests.Readers.LaTeX
|
Tests.Readers.LaTeX
|
||||||
|
Tests.Readers.HTML
|
||||||
Tests.Readers.Markdown
|
Tests.Readers.Markdown
|
||||||
Tests.Readers.Org
|
Tests.Readers.Org
|
||||||
Tests.Readers.RST
|
Tests.Readers.RST
|
||||||
|
|
|
@ -62,7 +62,7 @@ import Debug.Trace (trace)
|
||||||
import Text.TeXMath (readMathML, writeTeX)
|
import Text.TeXMath (readMathML, writeTeX)
|
||||||
import Data.Default (Default (..), def)
|
import Data.Default (Default (..), def)
|
||||||
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
|
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
|
||||||
|
import Network.URI (isURI)
|
||||||
import Text.Pandoc.Error
|
import Text.Pandoc.Error
|
||||||
|
|
||||||
import Text.Parsec.Error
|
import Text.Parsec.Error
|
||||||
|
@ -74,7 +74,8 @@ readHtml :: ReaderOptions -- ^ Reader options
|
||||||
-> Either PandocError Pandoc
|
-> Either PandocError Pandoc
|
||||||
readHtml opts inp =
|
readHtml opts inp =
|
||||||
mapLeft (ParseFailure . getError) . flip runReader def $
|
mapLeft (ParseFailure . getError) . flip runReader def $
|
||||||
runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags
|
runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
|
||||||
|
"source" tags
|
||||||
where tags = stripPrefixes . canonicalizeTags $
|
where tags = stripPrefixes . canonicalizeTags $
|
||||||
parseTagsOptions parseOptions{ optTagPosition = True } inp
|
parseTagsOptions parseOptions{ optTagPosition = True } inp
|
||||||
parseDoc = do
|
parseDoc = do
|
||||||
|
@ -98,7 +99,8 @@ replaceNotes' x = return x
|
||||||
data HTMLState =
|
data HTMLState =
|
||||||
HTMLState
|
HTMLState
|
||||||
{ parserState :: ParserState,
|
{ parserState :: ParserState,
|
||||||
noteTable :: [(String, Blocks)]
|
noteTable :: [(String, Blocks)],
|
||||||
|
baseHref :: Maybe String
|
||||||
}
|
}
|
||||||
|
|
||||||
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
|
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
|
||||||
|
@ -120,7 +122,7 @@ pBody :: TagParser Blocks
|
||||||
pBody = pInTags "body" block
|
pBody = pInTags "body" block
|
||||||
|
|
||||||
pHead :: TagParser Blocks
|
pHead :: TagParser Blocks
|
||||||
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
|
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
|
||||||
where pTitle = pInTags "title" inline >>= setTitle . trimInlines
|
where pTitle = pInTags "title" inline >>= setTitle . trimInlines
|
||||||
setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
|
setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
|
||||||
pMetaTag = do
|
pMetaTag = do
|
||||||
|
@ -132,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
|
||||||
let content = fromAttrib "content" mt
|
let content = fromAttrib "content" mt
|
||||||
updateState $ B.setMeta name (B.text content)
|
updateState $ B.setMeta name (B.text content)
|
||||||
return mempty
|
return mempty
|
||||||
|
pBaseTag = do
|
||||||
|
bt <- pSatisfy (~== TagOpen "base" [])
|
||||||
|
let baseH = fromAttrib "href" bt
|
||||||
|
if null baseH
|
||||||
|
then return mempty
|
||||||
|
else do
|
||||||
|
let baseH' = case reverse baseH of
|
||||||
|
'/':_ -> baseH
|
||||||
|
_ -> baseH ++ "/"
|
||||||
|
updateState $ \st -> st{ baseHref = Just baseH' }
|
||||||
|
return mempty
|
||||||
|
|
||||||
block :: TagParser Blocks
|
block :: TagParser Blocks
|
||||||
block = do
|
block = do
|
||||||
|
@ -566,7 +579,11 @@ pAnchor = try $ do
|
||||||
pRelLink :: TagParser Inlines
|
pRelLink :: TagParser Inlines
|
||||||
pRelLink = try $ do
|
pRelLink = try $ do
|
||||||
tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
|
tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
|
||||||
let url = fromAttrib "href" tag
|
mbBaseHref <- baseHref <$> getState
|
||||||
|
let url' = fromAttrib "href" tag
|
||||||
|
let url = case (isURI url', mbBaseHref) of
|
||||||
|
(False, Just h) -> h ++ url'
|
||||||
|
_ -> url'
|
||||||
let title = fromAttrib "title" tag
|
let title = fromAttrib "title" tag
|
||||||
let uid = fromAttrib "id" tag
|
let uid = fromAttrib "id" tag
|
||||||
let spanC = case uid of
|
let spanC = case uid of
|
||||||
|
@ -578,7 +595,11 @@ pRelLink = try $ do
|
||||||
pImage :: TagParser Inlines
|
pImage :: TagParser Inlines
|
||||||
pImage = do
|
pImage = do
|
||||||
tag <- pSelfClosing (=="img") (isJust . lookup "src")
|
tag <- pSelfClosing (=="img") (isJust . lookup "src")
|
||||||
let url = fromAttrib "src" tag
|
mbBaseHref <- baseHref <$> getState
|
||||||
|
let url' = fromAttrib "src" tag
|
||||||
|
let url = case (isURI url', mbBaseHref) of
|
||||||
|
(False, Just h) -> h ++ url'
|
||||||
|
_ -> url'
|
||||||
let title = fromAttrib "title" tag
|
let title = fromAttrib "title" tag
|
||||||
let alt = fromAttrib "alt" tag
|
let alt = fromAttrib "alt" tag
|
||||||
return $ B.image (escapeURI url) title (B.text alt)
|
return $ B.image (escapeURI url) title (B.text alt)
|
||||||
|
@ -945,7 +966,7 @@ instance HasReaderOptions HTMLState where
|
||||||
extractReaderOptions = extractReaderOptions . parserState
|
extractReaderOptions = extractReaderOptions . parserState
|
||||||
|
|
||||||
instance Default HTMLState where
|
instance Default HTMLState where
|
||||||
def = HTMLState def []
|
def = HTMLState def [] Nothing
|
||||||
|
|
||||||
instance HasMeta HTMLState where
|
instance HasMeta HTMLState where
|
||||||
setMeta s b st = st {parserState = setMeta s b $ parserState st}
|
setMeta s b st = st {parserState = setMeta s b $ parserState st}
|
||||||
|
|
27
tests/Tests/Readers/HTML.hs
Normal file
27
tests/Tests/Readers/HTML.hs
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
module Tests.Readers.HTML (tests) where
|
||||||
|
|
||||||
|
import Text.Pandoc.Definition
|
||||||
|
import Test.Framework
|
||||||
|
import Tests.Helpers
|
||||||
|
import Tests.Arbitrary()
|
||||||
|
import Text.Pandoc.Builder
|
||||||
|
import Text.Pandoc
|
||||||
|
import Text.Pandoc.Error
|
||||||
|
|
||||||
|
html :: String -> Pandoc
|
||||||
|
html = handleError . readHtml def
|
||||||
|
|
||||||
|
tests :: [Test]
|
||||||
|
tests = [ testGroup "base tag"
|
||||||
|
[ test html "simple" $
|
||||||
|
"<head><base href=\"http://www.w3schools.com/images\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
|
||||||
|
plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
|
||||||
|
, test html "slash at end of base" $
|
||||||
|
"<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
|
||||||
|
plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
|
||||||
|
, test html "absolute URL" $
|
||||||
|
"<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?>
|
||||||
|
plain (image "http://example.com/stickman.gif" "" (text "Stickman"))
|
||||||
|
]
|
||||||
|
]
|
|
@ -8,6 +8,7 @@ import qualified Tests.Old
|
||||||
import qualified Tests.Readers.LaTeX
|
import qualified Tests.Readers.LaTeX
|
||||||
import qualified Tests.Readers.Markdown
|
import qualified Tests.Readers.Markdown
|
||||||
import qualified Tests.Readers.Org
|
import qualified Tests.Readers.Org
|
||||||
|
import qualified Tests.Readers.HTML
|
||||||
import qualified Tests.Readers.RST
|
import qualified Tests.Readers.RST
|
||||||
import qualified Tests.Readers.Docx
|
import qualified Tests.Readers.Docx
|
||||||
import qualified Tests.Readers.Txt2Tags
|
import qualified Tests.Readers.Txt2Tags
|
||||||
|
@ -46,6 +47,7 @@ tests = [ testGroup "Old" Tests.Old.tests
|
||||||
, testGroup "Readers"
|
, testGroup "Readers"
|
||||||
[ testGroup "LaTeX" Tests.Readers.LaTeX.tests
|
[ testGroup "LaTeX" Tests.Readers.LaTeX.tests
|
||||||
, testGroup "Markdown" Tests.Readers.Markdown.tests
|
, testGroup "Markdown" Tests.Readers.Markdown.tests
|
||||||
|
, testGroup "HTML" Tests.Readers.HTML.tests
|
||||||
, testGroup "Org" Tests.Readers.Org.tests
|
, testGroup "Org" Tests.Readers.Org.tests
|
||||||
, testGroup "RST" Tests.Readers.RST.tests
|
, testGroup "RST" Tests.Readers.RST.tests
|
||||||
, testGroup "Docx" Tests.Readers.Docx.tests
|
, testGroup "Docx" Tests.Readers.Docx.tests
|
||||||
|
|
Loading…
Add table
Reference in a new issue