HTML reader: Use the lang value of <html> to set the lang meta value. (#3765)

* HTML reader: Use the lang value of <html> to set the lang meta value.

* Fix for pre-AMP environments.
This commit is contained in:
bucklereed 2017-06-27 09:19:37 +01:00 committed by John MacFarlane
parent 19d9482fc4
commit 460b6c470b
2 changed files with 15 additions and 0 deletions

View file

@ -53,6 +53,7 @@ import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import qualified Data.Map as M
import Data.Foldable ( for_ )
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isPrefixOf )
import Data.Char ( isDigit, isLetter, isAlphaNum )
@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
type TagParser m = HTMLParser m [Tag Text]
pHtml :: PandocMonad m => TagParser m Blocks
pHtml = try $ do
(TagOpen "html" attr) <- lookAhead $ pAnyTag
for_ (lookup "lang" attr) $
updateState . B.setMeta "lang" . B.text . T.unpack
pInTags "html" block
pBody :: PandocMonad m => TagParser m Blocks
pBody = pInTags "body" block
@ -175,6 +183,7 @@ block = do
, pList
, pHrule
, pTable
, pHtml
, pHead
, pBody
, pDiv

View file

@ -30,4 +30,10 @@ tests = [ testGroup "base tag"
[ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
plain (spanWith ("anchor",[],[]) mempty)
]
, testGroup "lang"
[ test html "lang on <html>" $ "<html lang=\"es\">hola" =?>
setMeta "lang" (text "es") (doc (plain (text "hola")))
, test html "xml:lang on <html>" $ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"es\"><head></head><body>hola</body></html>" =?>
setMeta "lang" (text "es") (doc (plain (text "hola")))
]
]