Docx Parse: Improved font recognition when specified in rFonts element
This commit is contained in:
parent
427466f80c
commit
973ed469de
1 changed files with 27 additions and 8 deletions
|
@ -1,4 +1,4 @@
|
||||||
{-# LANGUAGE PatternGuards #-}
|
{-# LANGUAGE PatternGuards, ViewPatterns #-}
|
||||||
|
|
||||||
{-
|
{-
|
||||||
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
|
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
|
||||||
|
@ -59,18 +59,19 @@ import Data.Bits ((.|.))
|
||||||
import qualified Data.ByteString.Lazy as B
|
import qualified Data.ByteString.Lazy as B
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
import Control.Monad.Reader
|
import Control.Monad.Reader
|
||||||
import Control.Applicative ((<$>))
|
import Control.Applicative ((<$>), (<|>))
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Text.Pandoc.Compat.Except
|
import Text.Pandoc.Compat.Except
|
||||||
import Text.TeXMath.Readers.OMML (readOMML)
|
import Text.TeXMath.Readers.OMML (readOMML)
|
||||||
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
|
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
|
||||||
import Text.TeXMath (Exp)
|
import Text.TeXMath (Exp)
|
||||||
import Data.Char (readLitChar)
|
import Data.Char (readLitChar, ord, chr)
|
||||||
|
|
||||||
data ReaderEnv = ReaderEnv { envNotes :: Notes
|
data ReaderEnv = ReaderEnv { envNotes :: Notes
|
||||||
, envNumbering :: Numbering
|
, envNumbering :: Numbering
|
||||||
, envRelationships :: [Relationship]
|
, envRelationships :: [Relationship]
|
||||||
, envMedia :: Media
|
, envMedia :: Media
|
||||||
|
, envFont :: Maybe Font
|
||||||
}
|
}
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
|
@ -234,7 +235,7 @@ archiveToDocx archive = do
|
||||||
numbering = archiveToNumbering archive
|
numbering = archiveToNumbering archive
|
||||||
rels = archiveToRelationships archive
|
rels = archiveToRelationships archive
|
||||||
media = archiveToMedia archive
|
media = archiveToMedia archive
|
||||||
rEnv = ReaderEnv notes numbering rels media
|
rEnv = ReaderEnv notes numbering rels media Nothing
|
||||||
doc <- runD (archiveToDocument archive) rEnv
|
doc <- runD (archiveToDocument archive) rEnv
|
||||||
return $ Docx doc
|
return $ Docx doc
|
||||||
|
|
||||||
|
@ -672,12 +673,21 @@ elemToRunElem :: NameSpaces -> Element -> D RunElem
|
||||||
elemToRunElem ns element
|
elemToRunElem ns element
|
||||||
| isElem ns "w" "t" element
|
| isElem ns "w" "t" element
|
||||||
|| isElem ns "w" "delText" element
|
|| isElem ns "w" "delText" element
|
||||||
|| isElem ns "m" "t" element =
|
|| isElem ns "m" "t" element = do
|
||||||
return $ TextRun $ strContent element
|
let str = strContent element
|
||||||
|
font <- asks envFont
|
||||||
|
case font of
|
||||||
|
Nothing -> return $ TextRun str
|
||||||
|
Just f -> return . TextRun $
|
||||||
|
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
|
||||||
| isElem ns "w" "br" element = return LnBrk
|
| isElem ns "w" "br" element = return LnBrk
|
||||||
| isElem ns "w" "tab" element = return Tab
|
| isElem ns "w" "tab" element = return Tab
|
||||||
| isElem ns "w" "sym" element = return (getSymChar ns element)
|
| isElem ns "w" "sym" element = return (getSymChar ns element)
|
||||||
| otherwise = throwError WrongElem
|
| otherwise = throwError WrongElem
|
||||||
|
where
|
||||||
|
lowerFromPrivate (ord -> c)
|
||||||
|
| c >= ord '\xF000' = chr $ c - ord '\xF000'
|
||||||
|
| otherwise = chr c
|
||||||
|
|
||||||
-- The char attribute is a hex string
|
-- The char attribute is a hex string
|
||||||
getSymChar :: NameSpaces -> Element -> RunElem
|
getSymChar :: NameSpaces -> Element -> RunElem
|
||||||
|
@ -700,6 +710,15 @@ stringToFont _ = Nothing
|
||||||
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
|
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
|
||||||
elemToRunElems ns element
|
elemToRunElems ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
|| isElem ns "m" "r" element =
|
|| isElem ns "m" "r" element = do
|
||||||
mapD (elemToRunElem ns) (elChildren element)
|
let qualName = elemName ns "w"
|
||||||
|
let font = do
|
||||||
|
fontElem <- findElement (qualName "rFonts") element
|
||||||
|
stringToFont =<<
|
||||||
|
(foldr (<|>) Nothing $
|
||||||
|
map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
|
||||||
|
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
|
||||||
elemToRunElems _ _ = throwError WrongElem
|
elemToRunElems _ _ = throwError WrongElem
|
||||||
|
|
||||||
|
setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
|
||||||
|
setFont f s = s{envFont = f}
|
||||||
|
|
Loading…
Reference in a new issue