DocBook reader: handle complete set of entities...
as specified at <https://www.w3.org/2003/entities/2007doc/byalpha.html>. Closes #7938.
This commit is contained in:
parent
7dea81f992
commit
5375bd1446
3 changed files with 2262 additions and 2 deletions
2237
data/docbook-entities.txt
Normal file
2237
data/docbook-entities.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -93,6 +93,8 @@ data-files:
|
||||||
data/templates/default.markua
|
data/templates/default.markua
|
||||||
-- translations
|
-- translations
|
||||||
data/translations/*.yaml
|
data/translations/*.yaml
|
||||||
|
-- entities
|
||||||
|
data/docbook-entities.txt
|
||||||
-- source files for reference.docx
|
-- source files for reference.docx
|
||||||
data/docx/[Content_Types].xml
|
data/docx/[Content_Types].xml
|
||||||
data/docx/_rels/.rels
|
data/docx/_rels/.rels
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE TemplateHaskell #-}
|
||||||
{- |
|
{- |
|
||||||
Module : Text.Pandoc.Readers.DocBook
|
Module : Text.Pandoc.Readers.DocBook
|
||||||
Copyright : Copyright (C) 2006-2022 John MacFarlane
|
Copyright : Copyright (C) 2006-2022 John MacFarlane
|
||||||
|
@ -12,7 +13,9 @@ Conversion of DocBook XML to 'Pandoc' document.
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
|
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
|
||||||
import Control.Monad.State.Strict
|
import Control.Monad.State.Strict
|
||||||
import Data.Char (isSpace, isLetter)
|
import Data.ByteString (ByteString)
|
||||||
|
import Data.FileEmbed
|
||||||
|
import Data.Char (isSpace, isLetter, chr)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Data.Either (rights)
|
import Data.Either (rights)
|
||||||
import Data.Foldable (asum)
|
import Data.Foldable (asum)
|
||||||
|
@ -21,6 +24,8 @@ import Data.List (intersperse,elemIndex)
|
||||||
import Data.List.NonEmpty (nonEmpty)
|
import Data.List.NonEmpty (nonEmpty)
|
||||||
import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList)
|
import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
import Data.Text.Read as TR
|
||||||
|
import Data.Text.Encoding (decodeUtf8)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Lazy as TL
|
import qualified Data.Text.Lazy as TL
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
|
@ -33,6 +38,7 @@ import Text.Pandoc.Logging (LogMessage(..))
|
||||||
import Text.Pandoc.Shared (safeRead, extractSpaces)
|
import Text.Pandoc.Shared (safeRead, extractSpaces)
|
||||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||||
import Text.TeXMath (readMathML, writeTeX)
|
import Text.TeXMath (readMathML, writeTeX)
|
||||||
|
import qualified Data.Map as M
|
||||||
import Text.Pandoc.XML.Light
|
import Text.Pandoc.XML.Light
|
||||||
|
|
||||||
{-
|
{-
|
||||||
|
@ -548,7 +554,8 @@ readDocBook :: (PandocMonad m, ToSources a)
|
||||||
readDocBook _ inp = do
|
readDocBook _ inp = do
|
||||||
let sources = toSources inp
|
let sources = toSources inp
|
||||||
tree <- either (throwError . PandocXMLError "") return $
|
tree <- either (throwError . PandocXMLError "") return $
|
||||||
parseXMLContents
|
parseXMLContentsWithEntities
|
||||||
|
docbookEntityMap
|
||||||
(TL.fromStrict . handleInstructions . sourcesToText $ sources)
|
(TL.fromStrict . handleInstructions . sourcesToText $ sources)
|
||||||
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
|
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
|
||||||
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
|
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
|
||||||
|
@ -1335,3 +1342,17 @@ paraToPlain :: Block -> Block
|
||||||
paraToPlain (Para ils) = Plain ils
|
paraToPlain (Para ils) = Plain ils
|
||||||
paraToPlain x = x
|
paraToPlain x = x
|
||||||
|
|
||||||
|
docbookEntityMap :: M.Map Text Text
|
||||||
|
docbookEntityMap = M.fromList
|
||||||
|
(map lineToPair (T.lines (decodeUtf8 docbookEntities)))
|
||||||
|
where
|
||||||
|
lineToPair l =
|
||||||
|
case T.words l of
|
||||||
|
(x:ys) -> (x, T.pack (mapMaybe readHex ys))
|
||||||
|
[] -> ("","")
|
||||||
|
readHex t = case TR.hexadecimal t of
|
||||||
|
Left _ -> Nothing
|
||||||
|
Right (n,_) -> Just (chr n)
|
||||||
|
|
||||||
|
docbookEntities :: ByteString
|
||||||
|
docbookEntities = $(embedFile "data/docbook-entities.txt")
|
||||||
|
|
Loading…
Reference in a new issue