Docx reader: change elemToParPart to return [ParPart]
...instead of ParPart. Also remove NullParPart constructor, as it is no longer needed. This will allow us to handle elements that contain multiple ParParts, e.g. w:drawing elements with multiple pic:pic. See #7786.
This commit is contained in:
parent
4ff997bf68
commit
cc30d646ca
2 changed files with 34 additions and 35 deletions
|
@ -450,7 +450,6 @@ parPartToInlines' (Field info children) =
|
||||||
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
|
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
|
||||||
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
|
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
|
||||||
_ -> smushInlines <$> mapM parPartToInlines' children
|
_ -> smushInlines <$> mapM parPartToInlines' children
|
||||||
parPartToInlines' NullParPart = return mempty
|
|
||||||
|
|
||||||
isAnchorSpan :: Inline -> Bool
|
isAnchorSpan :: Inline -> Bool
|
||||||
isAnchorSpan (Span (_, ["anchor"], []) _) = True
|
isAnchorSpan (Span (_, ["anchor"], []) _) = True
|
||||||
|
|
|
@ -320,8 +320,6 @@ data ParPart = PlainRun Run
|
||||||
| Diagram -- placeholder for now
|
| Diagram -- placeholder for now
|
||||||
| PlainOMath [Exp]
|
| PlainOMath [Exp]
|
||||||
| Field FieldInfo [ParPart]
|
| Field FieldInfo [ParPart]
|
||||||
| NullParPart -- when we need to return nothing, but
|
|
||||||
-- not because of an error.
|
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
data Run = Run RunStyle [RunElem]
|
data Run = Run RunStyle [RunElem]
|
||||||
|
@ -694,13 +692,13 @@ elemToBodyPart ns element
|
||||||
| isElem ns "w" "p" element
|
| isElem ns "w" "p" element
|
||||||
, Just (numId, lvl) <- getNumInfo ns element = do
|
, Just (numId, lvl) <- getNumInfo ns element = do
|
||||||
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
|
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
|
||||||
parparts <- mapD (elemToParPart ns) (elChildren element)
|
parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
|
||||||
levelInfo <- lookupLevel numId lvl <$> asks envNumbering
|
levelInfo <- lookupLevel numId lvl <$> asks envNumbering
|
||||||
return $ ListItem parstyle numId lvl levelInfo parparts
|
return $ ListItem parstyle numId lvl levelInfo parparts
|
||||||
elemToBodyPart ns element
|
elemToBodyPart ns element
|
||||||
| isElem ns "w" "p" element = do
|
| isElem ns "w" "p" element = do
|
||||||
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
|
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
|
||||||
parparts' <- mapD (elemToParPart ns) (elChildren element)
|
parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
|
||||||
fldCharState <- gets stateFldCharState
|
fldCharState <- gets stateFldCharState
|
||||||
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
|
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
|
||||||
-- Word uses list enumeration for numbered headings, so we only
|
-- Word uses list enumeration for numbered headings, so we only
|
||||||
|
@ -792,7 +790,7 @@ getTitleAndAlt ns element =
|
||||||
alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
|
alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
|
||||||
in (title, alt)
|
in (title, alt)
|
||||||
|
|
||||||
elemToParPart :: NameSpaces -> Element -> D ParPart
|
elemToParPart :: NameSpaces -> Element -> D [ParPart]
|
||||||
{-
|
{-
|
||||||
The next one is a bit complicated. fldChar fields work by first
|
The next one is a bit complicated. fldChar fields work by first
|
||||||
having a <w:fldChar fldCharType="begin"> in a run, then a run with
|
having a <w:fldChar fldCharType="begin"> in a run, then a run with
|
||||||
|
@ -840,21 +838,21 @@ elemToParPart ns element
|
||||||
case fldCharState of
|
case fldCharState of
|
||||||
_ | fldCharType == "begin" -> do
|
_ | fldCharType == "begin" -> do
|
||||||
modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState}
|
modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState}
|
||||||
return NullParPart
|
return []
|
||||||
FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
|
FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
|
||||||
modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
|
modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
|
||||||
return NullParPart
|
return []
|
||||||
-- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
|
-- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
|
||||||
FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
|
FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
|
||||||
modify $ \st -> st {stateFldCharState = ancestors}
|
modify $ \st -> st {stateFldCharState = ancestors}
|
||||||
return NullParPart
|
return []
|
||||||
[FldCharContent info children] | fldCharType == "end" -> do
|
[FldCharContent info children] | fldCharType == "end" -> do
|
||||||
modify $ \st -> st {stateFldCharState = []}
|
modify $ \st -> st {stateFldCharState = []}
|
||||||
return $ Field info $ reverse children
|
return [Field info $ reverse children]
|
||||||
FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" ->
|
FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" ->
|
||||||
let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do
|
let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do
|
||||||
modify $ \st -> st {stateFldCharState = parent : ancestors}
|
modify $ \st -> st {stateFldCharState = parent : ancestors}
|
||||||
return NullParPart
|
return []
|
||||||
_ -> throwError WrongElem
|
_ -> throwError WrongElem
|
||||||
elemToParPart ns element
|
elemToParPart ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
|
@ -864,8 +862,8 @@ elemToParPart ns element
|
||||||
FldCharOpen : ancestors -> do
|
FldCharOpen : ancestors -> do
|
||||||
info <- eitherToD $ parseFieldInfo $ strContent instrText
|
info <- eitherToD $ parseFieldInfo $ strContent instrText
|
||||||
modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors}
|
modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors}
|
||||||
return NullParPart
|
return []
|
||||||
_ -> return NullParPart
|
_ -> return []
|
||||||
{-
|
{-
|
||||||
There is an open fldchar, so we calculate the element and add it to the
|
There is an open fldchar, so we calculate the element and add it to the
|
||||||
children. For this we need to first change the fldchar state to an empty
|
children. For this we need to first change the fldchar state to an empty
|
||||||
|
@ -878,12 +876,12 @@ elemToParPart ns element = do
|
||||||
case fldCharState of
|
case fldCharState of
|
||||||
FldCharContent info children : ancestors -> do
|
FldCharContent info children : ancestors -> do
|
||||||
modify $ \st -> st {stateFldCharState = []}
|
modify $ \st -> st {stateFldCharState = []}
|
||||||
parPart <- elemToParPart' ns element `catchError` \_ -> return NullParPart
|
parParts <- elemToParPart' ns element `catchError` \_ -> return []
|
||||||
modify $ \st -> st{stateFldCharState = FldCharContent info (parPart : children) : ancestors}
|
modify $ \st -> st{stateFldCharState = FldCharContent info (parParts ++ children) : ancestors}
|
||||||
return NullParPart
|
return []
|
||||||
_ -> elemToParPart' ns element
|
_ -> elemToParPart' ns element
|
||||||
|
|
||||||
elemToParPart' :: NameSpaces -> Element -> D ParPart
|
elemToParPart' :: NameSpaces -> Element -> D [ParPart]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
||||||
|
@ -895,7 +893,8 @@ elemToParPart' ns element
|
||||||
>>= findAttrByName ns "r" "embed"
|
>>= findAttrByName ns "r" "embed"
|
||||||
in
|
in
|
||||||
case drawing of
|
case drawing of
|
||||||
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
|
Just s -> expandDrawingId s >>= \(fp, bs) ->
|
||||||
|
return [Drawing fp title alt bs (elemToExtent drawingElem)]
|
||||||
Nothing -> throwError WrongElem
|
Nothing -> throwError WrongElem
|
||||||
-- The two cases below are an attempt to deal with images in deprecated vml format.
|
-- The two cases below are an attempt to deal with images in deprecated vml format.
|
||||||
-- Todo: check out title and attr for deprecated format.
|
-- Todo: check out title and attr for deprecated format.
|
||||||
|
@ -906,7 +905,7 @@ elemToParPart' ns element
|
||||||
>>= findAttrByName ns "r" "id"
|
>>= findAttrByName ns "r" "id"
|
||||||
in
|
in
|
||||||
case drawing of
|
case drawing of
|
||||||
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
|
Just s -> expandDrawingId s >>= (\(fp, bs) -> return [Drawing fp "" "" bs Nothing])
|
||||||
Nothing -> throwError WrongElem
|
Nothing -> throwError WrongElem
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
|
@ -914,51 +913,52 @@ elemToParPart' ns element
|
||||||
, Just shapeElem <- findChildByName ns "v" "shape" objectElem
|
, Just shapeElem <- findChildByName ns "v" "shape" objectElem
|
||||||
, Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
|
, Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
|
||||||
, Just drawingId <- findAttrByName ns "r" "id" imagedataElem
|
, Just drawingId <- findAttrByName ns "r" "id" imagedataElem
|
||||||
= expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
|
= expandDrawingId drawingId >>= (\(fp, bs) -> return [Drawing fp "" "" bs Nothing])
|
||||||
-- Diagram
|
-- Diagram
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
||||||
, d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
|
, d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
|
||||||
, Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem
|
, Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem
|
||||||
= return Diagram
|
= return [Diagram]
|
||||||
-- Chart
|
-- Chart
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
, Just drawingElem <- findChildByName ns "w" "drawing" element
|
||||||
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
|
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
|
||||||
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
|
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
|
||||||
= return Chart
|
= return [Chart]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "r" element = do
|
| isElem ns "w" "r" element = do
|
||||||
run <- elemToRun ns element
|
run <- elemToRun ns element
|
||||||
return $ PlainRun run
|
return [PlainRun run]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| Just change <- getTrackedChange ns element = do
|
| Just change <- getTrackedChange ns element = do
|
||||||
runs <- mapD (elemToRun ns) (elChildren element)
|
runs <- mapD (elemToRun ns) (elChildren element)
|
||||||
return $ ChangedRuns change runs
|
return [ChangedRuns change runs]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "bookmarkStart" element
|
| isElem ns "w" "bookmarkStart" element
|
||||||
, Just bmId <- findAttrByName ns "w" "id" element
|
, Just bmId <- findAttrByName ns "w" "id" element
|
||||||
, Just bmName <- findAttrByName ns "w" "name" element =
|
, Just bmName <- findAttrByName ns "w" "name" element =
|
||||||
return $ BookMark bmId bmName
|
return [BookMark bmId bmName]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "hyperlink" element
|
| isElem ns "w" "hyperlink" element
|
||||||
, Just relId <- findAttrByName ns "r" "id" element = do
|
, Just relId <- findAttrByName ns "r" "id" element = do
|
||||||
location <- asks envLocation
|
location <- asks envLocation
|
||||||
children <- mapD (elemToParPart ns) (elChildren element)
|
children <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
|
||||||
rels <- asks envRelationships
|
rels <- asks envRelationships
|
||||||
case lookupRelationship location relId rels of
|
case lookupRelationship location relId rels of
|
||||||
Just target ->
|
Just target ->
|
||||||
case findAttrByName ns "w" "anchor" element of
|
case findAttrByName ns "w" "anchor" element of
|
||||||
Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) children
|
Just anchor -> return
|
||||||
Nothing -> return $ ExternalHyperLink target children
|
[ExternalHyperLink (target <> "#" <> anchor) children]
|
||||||
Nothing -> return $ ExternalHyperLink "" children
|
Nothing -> return [ExternalHyperLink target children]
|
||||||
|
Nothing -> return [ExternalHyperLink "" children]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "hyperlink" element
|
| isElem ns "w" "hyperlink" element
|
||||||
, Just anchor <- findAttrByName ns "w" "anchor" element = do
|
, Just anchor <- findAttrByName ns "w" "anchor" element = do
|
||||||
children <- mapD (elemToParPart ns) (elChildren element)
|
children <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
|
||||||
return $ InternalHyperLink anchor children
|
return [InternalHyperLink anchor children]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "commentRangeStart" element
|
| isElem ns "w" "commentRangeStart" element
|
||||||
, Just cmtId <- findAttrByName ns "w" "id" element = do
|
, Just cmtId <- findAttrByName ns "w" "id" element = do
|
||||||
|
@ -969,20 +969,20 @@ elemToParPart' ns element
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "w" "commentRangeEnd" element
|
| isElem ns "w" "commentRangeEnd" element
|
||||||
, Just cmtId <- findAttrByName ns "w" "id" element =
|
, Just cmtId <- findAttrByName ns "w" "id" element =
|
||||||
return $ CommentEnd cmtId
|
return [CommentEnd cmtId]
|
||||||
elemToParPart' ns element
|
elemToParPart' ns element
|
||||||
| isElem ns "m" "oMath" element =
|
| isElem ns "m" "oMath" element =
|
||||||
fmap PlainOMath (eitherToD $ readOMML $ showElement element)
|
fmap (return . PlainOMath) (eitherToD $ readOMML $ showElement element)
|
||||||
elemToParPart' _ _ = throwError WrongElem
|
elemToParPart' _ _ = throwError WrongElem
|
||||||
|
|
||||||
elemToCommentStart :: NameSpaces -> Element -> D ParPart
|
elemToCommentStart :: NameSpaces -> Element -> D [ParPart]
|
||||||
elemToCommentStart ns element
|
elemToCommentStart ns element
|
||||||
| isElem ns "w" "comment" element
|
| isElem ns "w" "comment" element
|
||||||
, Just cmtId <- findAttrByName ns "w" "id" element
|
, Just cmtId <- findAttrByName ns "w" "id" element
|
||||||
, Just cmtAuthor <- findAttrByName ns "w" "author" element
|
, Just cmtAuthor <- findAttrByName ns "w" "author" element
|
||||||
, cmtDate <- findAttrByName ns "w" "date" element = do
|
, cmtDate <- findAttrByName ns "w" "date" element = do
|
||||||
bps <- mapD (elemToBodyPart ns) (elChildren element)
|
bps <- mapD (elemToBodyPart ns) (elChildren element)
|
||||||
return $ CommentStart cmtId cmtAuthor cmtDate bps
|
return [CommentStart cmtId cmtAuthor cmtDate bps]
|
||||||
elemToCommentStart _ _ = throwError WrongElem
|
elemToCommentStart _ _ = throwError WrongElem
|
||||||
|
|
||||||
lookupFootnote :: T.Text -> Notes -> Maybe Element
|
lookupFootnote :: T.Text -> Notes -> Maybe Element
|
||||||
|
|
Loading…
Add table
Reference in a new issue