Docx reader: pass code tests.
This commit is contained in:
parent
21295c5ab5
commit
bebea5e936
1 changed files with 47 additions and 33 deletions
|
@ -84,7 +84,7 @@ import Text.Pandoc.Readers.Docx.Parse
|
||||||
import Text.Pandoc.Readers.Docx.Lists
|
import Text.Pandoc.Readers.Docx.Lists
|
||||||
import Text.Pandoc.Readers.Docx.Reducible
|
import Text.Pandoc.Readers.Docx.Reducible
|
||||||
import Data.Maybe (mapMaybe, isJust, fromJust)
|
import Data.Maybe (mapMaybe, isJust, fromJust)
|
||||||
import Data.List (delete, isPrefixOf, (\\), intersect)
|
import Data.List (delete, isPrefixOf, (\\))
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.ByteString.Lazy as B
|
import qualified Data.ByteString.Lazy as B
|
||||||
import Data.ByteString.Base64 (encode)
|
import Data.ByteString.Base64 (encode)
|
||||||
|
@ -99,7 +99,7 @@ readDocx opts bytes =
|
||||||
Nothing -> error $ "couldn't parse docx file"
|
Nothing -> error $ "couldn't parse docx file"
|
||||||
|
|
||||||
spansToKeep :: [String]
|
spansToKeep :: [String]
|
||||||
spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans
|
spansToKeep = ["list-item", "Definition", "DefinitionTerm"]
|
||||||
|
|
||||||
|
|
||||||
-- This is empty, but we put it in for future-proofing.
|
-- This is empty, but we put it in for future-proofing.
|
||||||
|
@ -108,20 +108,28 @@ divsToKeep = []
|
||||||
|
|
||||||
runStyleToContainers :: RunStyle -> [Container Inline]
|
runStyleToContainers :: RunStyle -> [Container Inline]
|
||||||
runStyleToContainers rPr =
|
runStyleToContainers rPr =
|
||||||
let formatters = mapMaybe id
|
let spanClassToContainers :: String -> [Container Inline]
|
||||||
|
spanClassToContainers s | s `elem` codeSpans =
|
||||||
|
[Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))]
|
||||||
|
spanClassToContainers s | s `elem` spansToKeep =
|
||||||
|
[Container $ Span ("", [s], [])]
|
||||||
|
spanClassToContainers _ = []
|
||||||
|
|
||||||
|
classContainers = case rStyle rPr of
|
||||||
|
Nothing -> []
|
||||||
|
Just s -> spanClassToContainers s
|
||||||
|
|
||||||
|
formatters = map Container $ mapMaybe id
|
||||||
[ if isBold rPr then (Just Strong) else Nothing
|
[ if isBold rPr then (Just Strong) else Nothing
|
||||||
, if isItalic rPr then (Just Emph) else Nothing
|
, if isItalic rPr then (Just Emph) else Nothing
|
||||||
, if isSmallCaps rPr then (Just SmallCaps) else Nothing
|
, if isSmallCaps rPr then (Just SmallCaps) else Nothing
|
||||||
, if isStrike rPr then (Just Strikeout) else Nothing
|
, if isStrike rPr then (Just Strikeout) else Nothing
|
||||||
, if isSuperScript rPr then (Just Superscript) else Nothing
|
, if isSuperScript rPr then (Just Superscript) else Nothing
|
||||||
, if isSubScript rPr then (Just Subscript) else Nothing
|
, if isSubScript rPr then (Just Subscript) else Nothing
|
||||||
, rStyle rPr >>=
|
|
||||||
(\s -> if s `elem` spansToKeep then Just s else Nothing) >>=
|
|
||||||
(\s -> Just $ Span ("", [s], []))
|
|
||||||
, underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
|
, underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
|
||||||
]
|
]
|
||||||
in
|
in
|
||||||
map Container formatters
|
classContainers ++ formatters
|
||||||
|
|
||||||
|
|
||||||
divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
|
divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
|
||||||
|
@ -132,7 +140,9 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
|
||||||
[(Container $ \blks ->
|
[(Container $ \blks ->
|
||||||
Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
|
Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
|
||||||
divAttrToContainers (c:_) _ | c `elem` codeDivs =
|
divAttrToContainers (c:_) _ | c `elem` codeDivs =
|
||||||
[Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)]
|
-- This is a bit of a cludge. We make the codeblock from the raw
|
||||||
|
-- parparts in bodyPartToBlocks. But we need something to match against.
|
||||||
|
[Container $ \_ -> CodeBlock ("", [], []) ""]
|
||||||
divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
|
divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
|
||||||
let kvs' = filter (\(k,_) -> k /= "indent") kvs
|
let kvs' = filter (\(k,_) -> k /= "indent") kvs
|
||||||
in
|
in
|
||||||
|
@ -183,18 +193,23 @@ runElemToString (Tab) = ['\t']
|
||||||
runElemsToString :: [RunElem] -> String
|
runElemsToString :: [RunElem] -> String
|
||||||
runElemsToString = concatMap runElemToString
|
runElemsToString = concatMap runElemToString
|
||||||
|
|
||||||
|
runToString :: Run -> String
|
||||||
|
runToString (Run _ runElems) = runElemsToString runElems
|
||||||
|
runToString _ = ""
|
||||||
|
|
||||||
|
parPartToString :: ParPart -> String
|
||||||
|
parPartToString (PlainRun run) = runToString run
|
||||||
|
parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
|
||||||
|
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
|
||||||
|
parPartToString _ = ""
|
||||||
|
|
||||||
|
|
||||||
inlineCodeContainer :: Container Inline -> Bool
|
inlineCodeContainer :: Container Inline -> Bool
|
||||||
inlineCodeContainer (Container f) = case f [] of
|
inlineCodeContainer (Container f) = case f [] of
|
||||||
Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans)
|
Code _ "" -> True
|
||||||
_ -> False
|
_ -> False
|
||||||
inlineCodeContainer _ = False
|
inlineCodeContainer _ = False
|
||||||
|
|
||||||
-- blockCodeContainer :: Container Block -> Bool
|
|
||||||
-- blockCodeContainer (Container f) = case f [] of
|
|
||||||
-- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs)
|
|
||||||
-- _ -> False
|
|
||||||
-- blockCodeContainer _ = False
|
|
||||||
|
|
||||||
runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
|
runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
|
||||||
runToInlines _ _ (Run rs runElems)
|
runToInlines _ _ (Run rs runElems)
|
||||||
|
@ -274,7 +289,16 @@ cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
|
||||||
rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
|
rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
|
||||||
rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
|
rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
|
||||||
|
|
||||||
|
blockCodeContainer :: Container Block -> Bool
|
||||||
|
blockCodeContainer (Container f) = case f [] of
|
||||||
|
CodeBlock _ _ -> True
|
||||||
|
_ -> False
|
||||||
|
blockCodeContainer _ = False
|
||||||
|
|
||||||
bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
|
bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
|
||||||
|
bodyPartToBlocks _ _ (Paragraph pPr parparts)
|
||||||
|
| any blockCodeContainer (parStyleToContainers pPr) =
|
||||||
|
[CodeBlock ("", [], []) (concatMap parPartToString parparts)]
|
||||||
bodyPartToBlocks opts docx (Paragraph pPr parparts) =
|
bodyPartToBlocks opts docx (Paragraph pPr parparts) =
|
||||||
case parPartsToInlines opts docx parparts of
|
case parPartsToInlines opts docx parparts of
|
||||||
[] ->
|
[] ->
|
||||||
|
@ -358,6 +382,7 @@ docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
|
||||||
|
|
||||||
ilToCode :: Inline -> String
|
ilToCode :: Inline -> String
|
||||||
ilToCode (Str s) = s
|
ilToCode (Str s) = s
|
||||||
|
ilToCode Space = " "
|
||||||
ilToCode _ = ""
|
ilToCode _ = ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -369,18 +394,7 @@ isHeaderClass s | "Heading" `isPrefixOf` s =
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
isHeaderClass _ = Nothing
|
isHeaderClass _ = Nothing
|
||||||
|
|
||||||
|
|
||||||
blksToInlines :: [Block] -> [Inline]
|
blksToInlines :: [Block] -> [Inline]
|
||||||
blksToInlines (Para ils : _) = ils
|
blksToInlines (Para ils : _) = ils
|
||||||
blksToInlines (Plain ils : _) = ils
|
blksToInlines (Plain ils : _) = ils
|
||||||
blksToInlines _ = []
|
blksToInlines _ = []
|
||||||
|
|
||||||
|
|
||||||
blkToCode :: Block -> String
|
|
||||||
blkToCode (Para []) = ""
|
|
||||||
blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
|
|
||||||
blkToCode (Para ((Span (_, classes, _) ils'): ils))
|
|
||||||
| (not . null) (codeSpans `intersect` classes) =
|
|
||||||
(init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
|
|
||||||
blkToCode _ = ""
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue