2014-06-14 10:02:35 -04:00
|
|
|
{-
|
|
|
|
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
2014-06-16 22:44:40 -07:00
|
|
|
Module : Text.Pandoc.Readers.Docx
|
2014-06-14 10:02:35 -04:00
|
|
|
Copyright : Copyright (C) 2014 Jesse Rosenthal
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
|
2014-06-14 10:02:35 -04:00
|
|
|
to 'Pandoc' document. -}
|
|
|
|
|
|
|
|
{-
|
2014-06-16 22:44:40 -07:00
|
|
|
Current state of implementation of Docx entities ([x] means
|
2014-06-14 10:02:35 -04:00
|
|
|
implemented, [-] means partially implemented):
|
|
|
|
|
|
|
|
* Blocks
|
|
|
|
|
|
|
|
- [X] Para
|
|
|
|
- [X] CodeBlock (styled with `SourceCode`)
|
|
|
|
- [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
|
|
|
|
indented)
|
|
|
|
- [X] OrderedList
|
|
|
|
- [X] BulletList
|
|
|
|
- [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
|
|
|
|
- [X] Header (styled with `Heading#`)
|
|
|
|
- [ ] HorizontalRule
|
|
|
|
- [-] Table (column widths and alignments not yet implemented)
|
|
|
|
|
|
|
|
* Inlines
|
|
|
|
|
|
|
|
- [X] Str
|
|
|
|
- [X] Emph (From italics. `underline` currently read as span. In
|
|
|
|
future, it might optionally be emph as well)
|
|
|
|
- [X] Strong
|
|
|
|
- [X] Strikeout
|
|
|
|
- [X] Superscript
|
|
|
|
- [X] Subscript
|
|
|
|
- [X] SmallCaps
|
|
|
|
- [ ] Quoted
|
|
|
|
- [ ] Cite
|
|
|
|
- [X] Code (styled with `VerbatimChar`)
|
|
|
|
- [X] Space
|
|
|
|
- [X] LineBreak (these are invisible in Word: entered with Shift-Return)
|
|
|
|
- [ ] Math
|
|
|
|
- [X] Link (links to an arbitrary bookmark create a span with the target as
|
|
|
|
id and "anchor" class)
|
|
|
|
- [-] Image (Links to path in archive. Future option for
|
|
|
|
data-encoded URI likely.)
|
|
|
|
- [X] Note (Footnotes and Endnotes are silently combined.)
|
|
|
|
-}
|
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
module Text.Pandoc.Readers.Docx
|
|
|
|
( readDocx
|
2014-06-14 10:02:35 -04:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Codec.Archive.Zip
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.Pandoc.Options
|
|
|
|
import Text.Pandoc.Builder (text, toList)
|
|
|
|
import Text.Pandoc.MIME (getMimeType)
|
|
|
|
import Text.Pandoc.UTF8 (toString)
|
2014-06-23 15:27:55 -04:00
|
|
|
import Text.Pandoc.Walk
|
2014-06-16 22:44:40 -07:00
|
|
|
import Text.Pandoc.Readers.Docx.Parse
|
|
|
|
import Text.Pandoc.Readers.Docx.Lists
|
2014-06-23 15:27:55 -04:00
|
|
|
import Text.Pandoc.Readers.Docx.Reducible
|
2014-06-14 10:02:35 -04:00
|
|
|
import Data.Maybe (mapMaybe, isJust, fromJust)
|
2014-06-24 10:34:07 -04:00
|
|
|
import Data.List (delete, isPrefixOf, (\\))
|
2014-06-14 10:02:35 -04:00
|
|
|
import qualified Data.ByteString as BS
|
|
|
|
import qualified Data.ByteString.Lazy as B
|
|
|
|
import Data.ByteString.Base64 (encode)
|
|
|
|
import System.FilePath (combine)
|
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
readDocx :: ReaderOptions
|
2014-06-14 10:02:35 -04:00
|
|
|
-> B.ByteString
|
|
|
|
-> Pandoc
|
2014-06-16 22:44:40 -07:00
|
|
|
readDocx opts bytes =
|
|
|
|
case archiveToDocx (toArchive bytes) of
|
2014-06-14 10:02:35 -04:00
|
|
|
Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
|
|
|
|
Nothing -> error $ "couldn't parse docx file"
|
|
|
|
|
2014-06-23 15:27:55 -04:00
|
|
|
spansToKeep :: [String]
|
2014-06-24 12:10:49 -04:00
|
|
|
spansToKeep = []
|
2014-06-23 15:27:55 -04:00
|
|
|
|
|
|
|
|
|
|
|
-- This is empty, but we put it in for future-proofing.
|
|
|
|
divsToKeep :: [String]
|
2014-06-24 12:10:49 -04:00
|
|
|
divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
|
2014-06-23 15:27:55 -04:00
|
|
|
|
|
|
|
runStyleToContainers :: RunStyle -> [Container Inline]
|
|
|
|
runStyleToContainers rPr =
|
2014-06-24 10:34:07 -04:00
|
|
|
let spanClassToContainers :: String -> [Container Inline]
|
|
|
|
spanClassToContainers s | s `elem` codeSpans =
|
|
|
|
[Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))]
|
|
|
|
spanClassToContainers s | s `elem` spansToKeep =
|
|
|
|
[Container $ Span ("", [s], [])]
|
|
|
|
spanClassToContainers _ = []
|
|
|
|
|
|
|
|
classContainers = case rStyle rPr of
|
|
|
|
Nothing -> []
|
|
|
|
Just s -> spanClassToContainers s
|
|
|
|
|
|
|
|
formatters = map Container $ mapMaybe id
|
|
|
|
[ if isBold rPr then (Just Strong) else Nothing
|
|
|
|
, if isItalic rPr then (Just Emph) else Nothing
|
|
|
|
, if isSmallCaps rPr then (Just SmallCaps) else Nothing
|
|
|
|
, if isStrike rPr then (Just Strikeout) else Nothing
|
|
|
|
, if isSuperScript rPr then (Just Superscript) else Nothing
|
|
|
|
, if isSubScript rPr then (Just Subscript) else Nothing
|
|
|
|
, underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
|
2014-06-23 15:27:55 -04:00
|
|
|
]
|
|
|
|
in
|
2014-06-24 10:34:07 -04:00
|
|
|
classContainers ++ formatters
|
2014-06-23 15:27:55 -04:00
|
|
|
|
|
|
|
|
|
|
|
divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
|
|
|
|
divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
|
|
|
|
let n = fromJust (isHeaderClass c)
|
|
|
|
in
|
|
|
|
[(Container $ \blks ->
|
|
|
|
Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
|
2014-06-24 12:10:49 -04:00
|
|
|
divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
|
|
|
|
(Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
|
|
|
|
divAttrToContainers (c:cs) kvs | c `elem` codeDivs =
|
2014-06-24 10:34:07 -04:00
|
|
|
-- This is a bit of a cludge. We make the codeblock from the raw
|
|
|
|
-- parparts in bodyPartToBlocks. But we need something to match against.
|
2014-06-24 12:10:49 -04:00
|
|
|
(Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs)
|
2014-06-23 15:27:55 -04:00
|
|
|
divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
|
|
|
|
let kvs' = filter (\(k,_) -> k /= "indent") kvs
|
|
|
|
in
|
|
|
|
(Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs')
|
|
|
|
divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
|
|
|
|
(Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
|
|
|
|
divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
|
2014-06-24 14:24:38 -04:00
|
|
|
divAttrToContainers [] kvs | isJust (lookup "indent" kvs) =
|
|
|
|
let kvs' = filter (\(k,_) -> k /= "indent") kvs
|
|
|
|
in
|
|
|
|
case fromJust (lookup "indent" kvs) of
|
|
|
|
"0" -> divAttrToContainers [] kvs'
|
|
|
|
('-' : _) -> divAttrToContainers [] kvs'
|
|
|
|
_ -> (Container BlockQuote) : divAttrToContainers [] kvs'
|
|
|
|
divAttrToContainers _ _ = []
|
2014-06-23 15:27:55 -04:00
|
|
|
|
|
|
|
|
|
|
|
parStyleToContainers :: ParagraphStyle -> [Container Block]
|
|
|
|
parStyleToContainers pPr =
|
|
|
|
let classes = pStyle pPr
|
|
|
|
kvs = case indent pPr of
|
|
|
|
Just n -> [("indent", show n)]
|
|
|
|
Nothing -> []
|
|
|
|
in
|
|
|
|
divAttrToContainers classes kvs
|
|
|
|
|
2014-06-14 10:02:35 -04:00
|
|
|
|
|
|
|
strToInlines :: String -> [Inline]
|
|
|
|
strToInlines = toList . text
|
|
|
|
|
|
|
|
codeSpans :: [String]
|
|
|
|
codeSpans = ["VerbatimChar"]
|
|
|
|
|
|
|
|
blockQuoteDivs :: [String]
|
|
|
|
blockQuoteDivs = ["Quote", "BlockQuote"]
|
|
|
|
|
|
|
|
codeDivs :: [String]
|
|
|
|
codeDivs = ["SourceCode"]
|
|
|
|
|
|
|
|
runElemToInlines :: RunElem -> [Inline]
|
|
|
|
runElemToInlines (TextRun s) = strToInlines s
|
|
|
|
runElemToInlines (LnBrk) = [LineBreak]
|
2014-06-19 17:55:02 -04:00
|
|
|
runElemToInlines (Tab) = [Space]
|
2014-06-14 10:02:35 -04:00
|
|
|
|
|
|
|
runElemToString :: RunElem -> String
|
|
|
|
runElemToString (TextRun s) = s
|
|
|
|
runElemToString (LnBrk) = ['\n']
|
2014-06-19 17:55:02 -04:00
|
|
|
runElemToString (Tab) = ['\t']
|
2014-06-14 10:02:35 -04:00
|
|
|
|
|
|
|
runElemsToString :: [RunElem] -> String
|
|
|
|
runElemsToString = concatMap runElemToString
|
|
|
|
|
2014-06-24 10:34:07 -04:00
|
|
|
runToString :: Run -> String
|
|
|
|
runToString (Run _ runElems) = runElemsToString runElems
|
|
|
|
runToString _ = ""
|
|
|
|
|
|
|
|
parPartToString :: ParPart -> String
|
|
|
|
parPartToString (PlainRun run) = runToString run
|
|
|
|
parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
|
|
|
|
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
|
|
|
|
parPartToString _ = ""
|
|
|
|
|
2014-06-23 15:27:55 -04:00
|
|
|
|
|
|
|
inlineCodeContainer :: Container Inline -> Bool
|
|
|
|
inlineCodeContainer (Container f) = case f [] of
|
2014-06-24 10:34:07 -04:00
|
|
|
Code _ "" -> True
|
2014-06-23 15:27:55 -04:00
|
|
|
_ -> False
|
|
|
|
inlineCodeContainer _ = False
|
|
|
|
|
2014-06-19 19:28:55 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
|
|
|
|
runToInlines _ _ (Run rs runElems)
|
2014-06-23 15:27:55 -04:00
|
|
|
| any inlineCodeContainer (runStyleToContainers rs) =
|
|
|
|
rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
|
|
|
|
| otherwise =
|
|
|
|
rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
|
2014-06-16 22:44:40 -07:00
|
|
|
runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
|
2014-06-14 10:02:35 -04:00
|
|
|
case (getFootNote fnId notes) of
|
|
|
|
Just bodyParts ->
|
2014-06-23 15:27:55 -04:00
|
|
|
[Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
|
2014-06-14 10:02:35 -04:00
|
|
|
Nothing ->
|
2014-06-23 15:27:55 -04:00
|
|
|
[Note []]
|
2014-06-16 22:44:40 -07:00
|
|
|
runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
|
2014-06-14 10:02:35 -04:00
|
|
|
case (getEndNote fnId notes) of
|
|
|
|
Just bodyParts ->
|
2014-06-23 15:27:55 -04:00
|
|
|
[Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
|
2014-06-14 10:02:35 -04:00
|
|
|
Nothing ->
|
2014-06-23 15:27:55 -04:00
|
|
|
[Note []]
|
2014-06-14 10:02:35 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
|
2014-06-14 10:02:35 -04:00
|
|
|
parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
|
2014-06-25 10:38:01 -04:00
|
|
|
parPartToInlines opts docx (Insertion _ _ _ runs) =
|
|
|
|
concatMap (runToInlines opts docx) runs
|
|
|
|
parPartToInlines _ _ (Deletion _ _ _ _) = []
|
2014-06-23 15:27:55 -04:00
|
|
|
parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
|
|
|
|
parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
|
2014-06-16 22:44:40 -07:00
|
|
|
parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
|
2014-06-14 10:02:35 -04:00
|
|
|
case lookupRelationship relid rels of
|
|
|
|
Just target -> [Image [] (combine "word" target, "")]
|
|
|
|
Nothing -> [Image [] ("", "")]
|
|
|
|
parPartToInlines opts docx (InternalHyperLink anchor runs) =
|
|
|
|
[Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
|
2014-06-16 22:44:40 -07:00
|
|
|
parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) =
|
2014-06-14 10:02:35 -04:00
|
|
|
case lookupRelationship relid rels of
|
|
|
|
Just target ->
|
|
|
|
[Link (concatMap (runToInlines opts docx) runs) (target, "")]
|
|
|
|
Nothing ->
|
|
|
|
[Link (concatMap (runToInlines opts docx) runs) ("", "")]
|
|
|
|
|
|
|
|
isAnchorSpan :: Inline -> Bool
|
|
|
|
isAnchorSpan (Span (ident, classes, kvs) ils) =
|
|
|
|
(not . null) ident &&
|
|
|
|
classes == ["anchor"] &&
|
|
|
|
null kvs &&
|
|
|
|
null ils
|
|
|
|
isAnchorSpan _ = False
|
|
|
|
|
|
|
|
dummyAnchors :: [String]
|
|
|
|
dummyAnchors = ["_GoBack"]
|
|
|
|
|
|
|
|
makeHeaderAnchors :: Block -> Block
|
|
|
|
makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
|
|
|
|
case filter isAnchorSpan ils of
|
|
|
|
[] -> h
|
|
|
|
(x@(Span (ident, _, _) _) : xs) ->
|
|
|
|
case ident `elem` dummyAnchors of
|
|
|
|
True -> h
|
|
|
|
False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
|
|
|
|
_ -> h
|
|
|
|
makeHeaderAnchors blk = blk
|
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
|
2014-06-14 10:02:35 -04:00
|
|
|
parPartsToInlines opts docx parparts =
|
|
|
|
--
|
|
|
|
-- We're going to skip data-uri's for now. It should be an option,
|
|
|
|
-- not mandatory.
|
|
|
|
--
|
2014-06-16 23:02:20 -07:00
|
|
|
(if False -- TODO depend on option
|
2014-06-23 15:27:55 -04:00
|
|
|
then walk (makeImagesSelfContained docx)
|
2014-06-16 23:02:20 -07:00
|
|
|
else id) $
|
2014-06-23 15:27:55 -04:00
|
|
|
-- bottomUp spanTrim $
|
|
|
|
-- bottomUp spanCorrect $
|
|
|
|
-- bottomUp spanReduce $
|
|
|
|
reduceList $ concatMap (parPartToInlines opts docx) parparts
|
2014-06-14 10:02:35 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
|
2014-06-23 15:27:55 -04:00
|
|
|
cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
|
2014-06-14 10:02:35 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
|
2014-06-14 10:02:35 -04:00
|
|
|
rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
|
|
|
|
|
2014-06-24 10:34:07 -04:00
|
|
|
blockCodeContainer :: Container Block -> Bool
|
|
|
|
blockCodeContainer (Container f) = case f [] of
|
|
|
|
CodeBlock _ _ -> True
|
|
|
|
_ -> False
|
|
|
|
blockCodeContainer _ = False
|
|
|
|
|
2014-06-23 15:27:55 -04:00
|
|
|
bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
|
2014-06-24 10:34:07 -04:00
|
|
|
bodyPartToBlocks _ _ (Paragraph pPr parparts)
|
|
|
|
| any blockCodeContainer (parStyleToContainers pPr) =
|
2014-06-24 12:10:49 -04:00
|
|
|
let
|
|
|
|
otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr)
|
|
|
|
in
|
|
|
|
rebuild
|
|
|
|
otherConts
|
|
|
|
[CodeBlock ("", [], []) (concatMap parPartToString parparts)]
|
2014-06-23 15:27:55 -04:00
|
|
|
bodyPartToBlocks opts docx (Paragraph pPr parparts) =
|
|
|
|
case parPartsToInlines opts docx parparts of
|
|
|
|
[] ->
|
|
|
|
[]
|
|
|
|
_ ->
|
|
|
|
let parContents = parPartsToInlines opts docx parparts
|
|
|
|
trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents
|
|
|
|
in
|
|
|
|
rebuild
|
|
|
|
(parStyleToContainers pPr)
|
|
|
|
[Para trimmedContents]
|
|
|
|
bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
|
2014-06-14 10:02:35 -04:00
|
|
|
let
|
|
|
|
kvs = case lookupLevel numId lvl numbering of
|
|
|
|
Just (_, fmt, txt, Just start) -> [ ("level", lvl)
|
|
|
|
, ("num-id", numId)
|
|
|
|
, ("format", fmt)
|
|
|
|
, ("text", txt)
|
|
|
|
, ("start", (show start))
|
|
|
|
]
|
2014-06-16 22:44:40 -07:00
|
|
|
|
2014-06-14 10:02:35 -04:00
|
|
|
Just (_, fmt, txt, Nothing) -> [ ("level", lvl)
|
|
|
|
, ("num-id", numId)
|
|
|
|
, ("format", fmt)
|
|
|
|
, ("text", txt)
|
|
|
|
]
|
|
|
|
Nothing -> []
|
|
|
|
in
|
2014-06-23 15:27:55 -04:00
|
|
|
[Div
|
|
|
|
("", ["list-item"], kvs)
|
|
|
|
(bodyPartToBlocks opts docx (Paragraph pPr parparts))]
|
|
|
|
bodyPartToBlocks _ _ (Tbl _ _ _ []) =
|
|
|
|
[Para []]
|
|
|
|
bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) =
|
2014-06-14 10:02:35 -04:00
|
|
|
let caption = strToInlines cap
|
|
|
|
(hdr, rows) = case firstRowFormatting look of
|
|
|
|
True -> (Just r, rs)
|
|
|
|
False -> (Nothing, r:rs)
|
|
|
|
hdrCells = case hdr of
|
|
|
|
Just r' -> rowToBlocksList opts docx r'
|
|
|
|
Nothing -> []
|
|
|
|
cells = map (rowToBlocksList opts docx) rows
|
2014-06-16 22:44:40 -07:00
|
|
|
|
2014-06-14 10:02:35 -04:00
|
|
|
size = case null hdrCells of
|
|
|
|
True -> length $ head cells
|
|
|
|
False -> length $ hdrCells
|
|
|
|
--
|
|
|
|
-- The two following variables (horizontal column alignment and
|
|
|
|
-- relative column widths) go to the default at the
|
|
|
|
-- moment. Width information is in the TblGrid field of the Tbl,
|
|
|
|
-- so should be possible. Alignment might be more difficult,
|
|
|
|
-- since there doesn't seem to be a column entity in docx.
|
|
|
|
alignments = take size (repeat AlignDefault)
|
|
|
|
widths = take size (repeat 0) :: [Double]
|
|
|
|
in
|
2014-06-23 15:27:55 -04:00
|
|
|
[Table caption alignments widths hdrCells cells]
|
|
|
|
|
2014-06-14 10:02:35 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
makeImagesSelfContained :: Docx -> Inline -> Inline
|
|
|
|
makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
|
2014-06-14 10:02:35 -04:00
|
|
|
case lookup uri media of
|
|
|
|
Just bs -> case getMimeType uri of
|
|
|
|
Just mime -> let data_uri =
|
|
|
|
"data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
|
|
|
|
in
|
|
|
|
Image alt (data_uri, title)
|
|
|
|
Nothing -> i
|
|
|
|
Nothing -> i
|
|
|
|
makeImagesSelfContained _ inline = inline
|
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
|
2014-06-14 10:02:35 -04:00
|
|
|
bodyToBlocks opts docx (Body bps) =
|
|
|
|
map (makeHeaderAnchors) $
|
2014-06-24 12:10:49 -04:00
|
|
|
blocksToDefinitions $
|
2014-06-14 10:02:35 -04:00
|
|
|
blocksToBullets $
|
2014-06-23 15:27:55 -04:00
|
|
|
concatMap (bodyPartToBlocks opts docx) bps
|
2014-06-14 10:02:35 -04:00
|
|
|
|
2014-06-16 22:44:40 -07:00
|
|
|
docxToBlocks :: ReaderOptions -> Docx -> [Block]
|
|
|
|
docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
|
2014-06-14 10:02:35 -04:00
|
|
|
|
|
|
|
|
|
|
|
ilToCode :: Inline -> String
|
|
|
|
ilToCode (Str s) = s
|
2014-06-24 10:34:07 -04:00
|
|
|
ilToCode Space = " "
|
|
|
|
ilToCode _ = ""
|
2014-06-14 10:02:35 -04:00
|
|
|
|
|
|
|
|
|
|
|
isHeaderClass :: String -> Maybe Int
|
|
|
|
isHeaderClass s | "Heading" `isPrefixOf` s =
|
|
|
|
case reads (drop (length "Heading") s) :: [(Int, String)] of
|
|
|
|
[] -> Nothing
|
|
|
|
((n, "") : []) -> Just n
|
|
|
|
_ -> Nothing
|
|
|
|
isHeaderClass _ = Nothing
|
|
|
|
|
|
|
|
blksToInlines :: [Block] -> [Inline]
|
|
|
|
blksToInlines (Para ils : _) = ils
|
|
|
|
blksToInlines (Plain ils : _) = ils
|
|
|
|
blksToInlines _ = []
|