Docx Reader: Get rid of Modifiable typeclass.
The docx reader used to use a Modifiable typeclass to combine both Blocks and Inlines. But all the work was in the inlines. So most of the generality was wasted, at the expense of making the code harder to understand. This gets rid of the generality, and adds functions for Blocks and Inlines. It should be a bit easier to work with going forward.
This commit is contained in:
parent
38bd4162fe
commit
a7a0b452a5
4 changed files with 170 additions and 199 deletions
|
@ -372,7 +372,7 @@ Library
|
|||
Text.Pandoc.Process,
|
||||
Text.Pandoc.CSS
|
||||
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
|
||||
Text.Pandoc.Readers.Docx.Reducible,
|
||||
Text.Pandoc.Readers.Docx.Combine,
|
||||
Text.Pandoc.Readers.Docx.Parse,
|
||||
Text.Pandoc.Readers.Docx.Fonts,
|
||||
Text.Pandoc.Readers.Docx.Util,
|
||||
|
|
|
@ -81,7 +81,7 @@ import Text.Pandoc.Builder
|
|||
import Text.Pandoc.Walk
|
||||
import Text.Pandoc.Readers.Docx.Parse
|
||||
import Text.Pandoc.Readers.Docx.Lists
|
||||
import Text.Pandoc.Readers.Docx.Reducible
|
||||
import Text.Pandoc.Readers.Docx.Combine
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
|
||||
import Data.List (delete, (\\), intersect)
|
||||
|
@ -167,7 +167,7 @@ bodyPartsToMeta' (bp : bps)
|
|||
| (Paragraph pPr parParts) <- bp
|
||||
, (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
|
||||
, (Just metaField) <- M.lookup c metaStyles = do
|
||||
inlines <- concatReduce <$> mapM parPartToInlines parParts
|
||||
inlines <- smushInlines <$> mapM parPartToInlines parParts
|
||||
remaining <- bodyPartsToMeta' bps
|
||||
let
|
||||
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
|
||||
|
@ -291,13 +291,13 @@ runToInlines (Run rs runElems)
|
|||
Just SubScrpt -> subscript codeString
|
||||
_ -> codeString
|
||||
| otherwise = do
|
||||
let ils = concatReduce (map runElemToInlines runElems)
|
||||
let ils = smushInlines (map runElemToInlines runElems)
|
||||
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
|
||||
runToInlines (Footnote bps) = do
|
||||
blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
|
||||
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
|
||||
return $ note blksList
|
||||
runToInlines (Endnote bps) = do
|
||||
blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
|
||||
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
|
||||
return $ note blksList
|
||||
runToInlines (InlineDrawing fp bs ext) = do
|
||||
mediaBag <- gets docxMediaBag
|
||||
|
@ -316,19 +316,19 @@ parPartToInlines (PlainRun r) = runToInlines r
|
|||
parPartToInlines (Insertion _ author date runs) = do
|
||||
opts <- asks docxOptions
|
||||
case readerTrackChanges opts of
|
||||
AcceptChanges -> concatReduce <$> mapM runToInlines runs
|
||||
AcceptChanges -> smushInlines <$> mapM runToInlines runs
|
||||
RejectChanges -> return mempty
|
||||
AllChanges -> do
|
||||
ils <- concatReduce <$> mapM runToInlines runs
|
||||
ils <- smushInlines <$> mapM runToInlines runs
|
||||
let attr = ("", ["insertion"], [("author", author), ("date", date)])
|
||||
return $ spanWith attr ils
|
||||
parPartToInlines (Deletion _ author date runs) = do
|
||||
opts <- asks docxOptions
|
||||
case readerTrackChanges opts of
|
||||
AcceptChanges -> return mempty
|
||||
RejectChanges -> concatReduce <$> mapM runToInlines runs
|
||||
RejectChanges -> smushInlines <$> mapM runToInlines runs
|
||||
AllChanges -> do
|
||||
ils <- concatReduce <$> mapM runToInlines runs
|
||||
ils <- smushInlines <$> mapM runToInlines runs
|
||||
let attr = ("", ["deletion"], [("author", author), ("date", date)])
|
||||
return $ spanWith attr ils
|
||||
parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
|
||||
|
@ -361,10 +361,10 @@ parPartToInlines (Drawing fp bs ext) = do
|
|||
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
|
||||
return $ imageWith (extentToAttr ext) fp "" ""
|
||||
parPartToInlines (InternalHyperLink anchor runs) = do
|
||||
ils <- concatReduce <$> mapM runToInlines runs
|
||||
ils <- smushInlines <$> mapM runToInlines runs
|
||||
return $ link ('#' : anchor) "" ils
|
||||
parPartToInlines (ExternalHyperLink target runs) = do
|
||||
ils <- concatReduce <$> mapM runToInlines runs
|
||||
ils <- smushInlines <$> mapM runToInlines runs
|
||||
return $ link target "" ils
|
||||
parPartToInlines (PlainOMath exps) = do
|
||||
return $ math $ writeTeX exps
|
||||
|
@ -417,7 +417,7 @@ singleParaToPlain blks = blks
|
|||
|
||||
cellToBlocks :: Cell -> DocxContext Blocks
|
||||
cellToBlocks (Cell bps) = do
|
||||
blks <- concatReduce <$> mapM bodyPartToBlocks bps
|
||||
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
|
||||
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
|
||||
|
||||
rowToBlocksList :: Row -> DocxContext [Blocks]
|
||||
|
@ -479,11 +479,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
|
|||
$ concatMap parPartToString parparts
|
||||
| Just (style, n) <- pHeading pPr = do
|
||||
ils <- local (\s-> s{docxInHeaderBlock=True}) $
|
||||
(concatReduce <$> mapM parPartToInlines parparts)
|
||||
(smushInlines <$> mapM parPartToInlines parparts)
|
||||
makeHeaderAnchor $
|
||||
headerWith ("", delete style (pStyle pPr), []) n ils
|
||||
| otherwise = do
|
||||
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
|
||||
ils <- smushInlines <$> mapM parPartToInlines parparts >>=
|
||||
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
|
||||
dropIls <- gets docxDropCap
|
||||
let ils' = dropIls <> ils
|
||||
|
@ -561,7 +561,7 @@ bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
|
|||
bodyToOutput (Body bps) = do
|
||||
let (metabps, blkbps) = sepBodyParts bps
|
||||
meta <- bodyPartsToMeta metabps
|
||||
blks <- concatReduce <$> mapM bodyPartToBlocks blkbps
|
||||
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
|
||||
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
|
||||
mediaBag <- gets docxMediaBag
|
||||
return $ (meta,
|
||||
|
|
154
src/Text/Pandoc/Readers/Docx/Combine.hs
Normal file
154
src/Text/Pandoc/Readers/Docx/Combine.hs
Normal file
|
@ -0,0 +1,154 @@
|
|||
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
|
||||
PatternGuards #-}
|
||||
|
||||
module Text.Pandoc.Readers.Docx.Combine ( smushInlines
|
||||
, smushBlocks
|
||||
)
|
||||
where
|
||||
|
||||
import Text.Pandoc.Builder
|
||||
import Data.List
|
||||
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
|
||||
import qualified Data.Sequence as Seq (null)
|
||||
|
||||
data Modifier a = Modifier (a -> a)
|
||||
| AttrModifier (Attr -> a -> a) Attr
|
||||
| NullModifier
|
||||
|
||||
spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
|
||||
spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
|
||||
where (l, m, r) = spaceOutInlines ms
|
||||
(fs, m') = unstackInlines m
|
||||
|
||||
spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
|
||||
spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
|
||||
where (l, m, r) = spaceOutInlines ms
|
||||
(fs, m') = unstackInlines m
|
||||
|
||||
spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
|
||||
spaceOutInlines ils =
|
||||
let (fs, ils') = unstackInlines ils
|
||||
contents = unMany ils'
|
||||
left = case viewl contents of
|
||||
(Space :< _) -> space
|
||||
_ -> mempty
|
||||
right = case viewr contents of
|
||||
(_ :> Space) -> space
|
||||
_ -> mempty in
|
||||
(left, (stackInlines fs $ trimInlines . Many $ contents), right)
|
||||
|
||||
stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
|
||||
stackInlines [] ms = ms
|
||||
stackInlines (NullModifier : fs) ms = stackInlines fs ms
|
||||
stackInlines ((Modifier f) : fs) ms =
|
||||
if isEmpty ms
|
||||
then stackInlines fs ms
|
||||
else f $ stackInlines fs ms
|
||||
stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
|
||||
|
||||
unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
|
||||
unstackInlines ms = case ilModifier ms of
|
||||
NullModifier -> ([], ms)
|
||||
_ -> (f : fs, ms') where
|
||||
f = ilModifier ms
|
||||
(fs, ms') = unstackInlines $ ilInnards ms
|
||||
|
||||
ilModifier :: Inlines -> Modifier Inlines
|
||||
ilModifier ils = case viewl (unMany ils) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(Emph _) -> Modifier emph
|
||||
(Strong _) -> Modifier strong
|
||||
(SmallCaps _) -> Modifier smallcaps
|
||||
(Strikeout _) -> Modifier strikeout
|
||||
(Superscript _) -> Modifier superscript
|
||||
(Subscript _) -> Modifier subscript
|
||||
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
|
||||
(Span attr _) -> AttrModifier spanWith attr
|
||||
_ -> NullModifier
|
||||
_ -> NullModifier
|
||||
|
||||
ilInnards :: Inlines -> Inlines
|
||||
ilInnards ils = case viewl (unMany ils) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(Emph lst) -> fromList lst
|
||||
(Strong lst) -> fromList lst
|
||||
(SmallCaps lst) -> fromList lst
|
||||
(Strikeout lst) -> fromList lst
|
||||
(Superscript lst) -> fromList lst
|
||||
(Subscript lst) -> fromList lst
|
||||
(Link _ lst _) -> fromList lst
|
||||
(Span _ lst) -> fromList lst
|
||||
_ -> ils
|
||||
_ -> ils
|
||||
|
||||
inlinesL :: Inlines -> (Inlines, Inlines)
|
||||
inlinesL ils = case viewl $ unMany ils of
|
||||
(s :< sq) -> (singleton s, Many sq)
|
||||
_ -> (mempty, ils)
|
||||
|
||||
inlinesR :: Inlines -> (Inlines, Inlines)
|
||||
inlinesR ils = case viewr $ unMany ils of
|
||||
(sq :> s) -> (Many sq, singleton s)
|
||||
_ -> (ils, mempty)
|
||||
|
||||
combineInlines :: Inlines -> Inlines -> Inlines
|
||||
combineInlines x y =
|
||||
let (xs', x') = inlinesR x
|
||||
(y', ys') = inlinesL y
|
||||
in
|
||||
xs' <> (combineSingletonInlines x' y') <> ys'
|
||||
|
||||
combineSingletonInlines :: Inlines -> Inlines -> Inlines
|
||||
combineSingletonInlines x y =
|
||||
let (xfs, xs) = unstackInlines x
|
||||
(yfs, ys) = unstackInlines y
|
||||
shared = xfs `intersect` yfs
|
||||
x_remaining = xfs \\ shared
|
||||
y_remaining = yfs \\ shared
|
||||
x_rem_attr = filter isAttrModifier x_remaining
|
||||
y_rem_attr = filter isAttrModifier y_remaining
|
||||
in
|
||||
case null shared of
|
||||
True | isEmpty xs && isEmpty ys ->
|
||||
stackInlines (x_rem_attr ++ y_rem_attr) mempty
|
||||
| isEmpty xs ->
|
||||
let (sp, y') = spaceOutInlinesL y in
|
||||
(stackInlines x_rem_attr mempty) <> sp <> y'
|
||||
| isEmpty ys ->
|
||||
let (x', sp) = spaceOutInlinesR x in
|
||||
x' <> sp <> (stackInlines y_rem_attr mempty)
|
||||
| otherwise ->
|
||||
let (x', xsp) = spaceOutInlinesR x
|
||||
(ysp, y') = spaceOutInlinesL y
|
||||
in
|
||||
x' <> xsp <> ysp <> y'
|
||||
False -> stackInlines shared $
|
||||
combineInlines
|
||||
(stackInlines x_remaining xs)
|
||||
(stackInlines y_remaining ys)
|
||||
|
||||
combineBlocks :: Blocks -> Blocks -> Blocks
|
||||
combineBlocks bs cs
|
||||
| bs' :> (BlockQuote bs'') <- viewr (unMany bs)
|
||||
, (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
|
||||
Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
|
||||
combineBlocks bs cs = bs <> cs
|
||||
|
||||
instance (Monoid a, Eq a) => Eq (Modifier a) where
|
||||
(Modifier f) == (Modifier g) = (f mempty == g mempty)
|
||||
(AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
|
||||
(NullModifier) == (NullModifier) = True
|
||||
_ == _ = False
|
||||
|
||||
isEmpty :: (Monoid a, Eq a) => a -> Bool
|
||||
isEmpty x = x == mempty
|
||||
|
||||
isAttrModifier :: Modifier a -> Bool
|
||||
isAttrModifier (AttrModifier _ _) = True
|
||||
isAttrModifier _ = False
|
||||
|
||||
smushInlines :: [Inlines] -> Inlines
|
||||
smushInlines xs = foldl combineInlines mempty xs
|
||||
|
||||
smushBlocks :: [Blocks] -> Blocks
|
||||
smushBlocks xs = foldl combineBlocks mempty xs
|
|
@ -1,183 +0,0 @@
|
|||
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
|
||||
PatternGuards #-}
|
||||
|
||||
module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
|
||||
, (<+>)
|
||||
)
|
||||
where
|
||||
|
||||
|
||||
import Text.Pandoc.Builder
|
||||
import Data.List
|
||||
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
|
||||
import qualified Data.Sequence as Seq (null)
|
||||
|
||||
data Modifier a = Modifier (a -> a)
|
||||
| AttrModifier (Attr -> a -> a) Attr
|
||||
| NullModifier
|
||||
|
||||
class (Eq a) => Modifiable a where
|
||||
modifier :: a -> Modifier a
|
||||
innards :: a -> a
|
||||
getL :: a -> (a, a)
|
||||
getR :: a -> (a, a)
|
||||
spaceOut :: a -> (a, a, a)
|
||||
|
||||
spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a)
|
||||
spaceOutL ms = (l, stack fs (m' <> r))
|
||||
where (l, m, r) = spaceOut ms
|
||||
(fs, m') = unstack m
|
||||
|
||||
spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a)
|
||||
spaceOutR ms = (stack fs (l <> m'), r)
|
||||
where (l, m, r) = spaceOut ms
|
||||
(fs, m') = unstack m
|
||||
|
||||
instance (Monoid a, Show a) => Show (Modifier a) where
|
||||
show (Modifier f) = show $ f mempty
|
||||
show (AttrModifier f attr) = show $ f attr mempty
|
||||
show (NullModifier) = "NullModifier"
|
||||
|
||||
instance (Monoid a, Eq a) => Eq (Modifier a) where
|
||||
(Modifier f) == (Modifier g) = (f mempty == g mempty)
|
||||
(AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
|
||||
(NullModifier) == (NullModifier) = True
|
||||
_ == _ = False
|
||||
|
||||
instance Modifiable Inlines where
|
||||
modifier ils = case viewl (unMany ils) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(Emph _) -> Modifier emph
|
||||
(Strong _) -> Modifier strong
|
||||
(SmallCaps _) -> Modifier smallcaps
|
||||
(Strikeout _) -> Modifier strikeout
|
||||
(Superscript _) -> Modifier superscript
|
||||
(Subscript _) -> Modifier subscript
|
||||
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
|
||||
(Span attr _) -> AttrModifier spanWith attr
|
||||
_ -> NullModifier
|
||||
_ -> NullModifier
|
||||
|
||||
innards ils = case viewl (unMany ils) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(Emph lst) -> fromList lst
|
||||
(Strong lst) -> fromList lst
|
||||
(SmallCaps lst) -> fromList lst
|
||||
(Strikeout lst) -> fromList lst
|
||||
(Superscript lst) -> fromList lst
|
||||
(Subscript lst) -> fromList lst
|
||||
(Link _ lst _) -> fromList lst
|
||||
(Span _ lst) -> fromList lst
|
||||
_ -> ils
|
||||
_ -> ils
|
||||
|
||||
getL ils = case viewl $ unMany ils of
|
||||
(s :< sq) -> (singleton s, Many sq)
|
||||
_ -> (mempty, ils)
|
||||
|
||||
getR ils = case viewr $ unMany ils of
|
||||
(sq :> s) -> (Many sq, singleton s)
|
||||
_ -> (ils, mempty)
|
||||
|
||||
spaceOut ils =
|
||||
let (fs, ils') = unstack ils
|
||||
contents = unMany ils'
|
||||
left = case viewl contents of
|
||||
(Space :< _) -> space
|
||||
_ -> mempty
|
||||
right = case viewr contents of
|
||||
(_ :> Space) -> space
|
||||
_ -> mempty in
|
||||
(left, (stack fs $ trimInlines .Many $ contents), right)
|
||||
|
||||
instance Modifiable Blocks where
|
||||
modifier blks = case viewl (unMany blks) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(BlockQuote _) -> Modifier blockQuote
|
||||
-- (Div attr _) -> AttrModifier divWith attr
|
||||
_ -> NullModifier
|
||||
_ -> NullModifier
|
||||
|
||||
innards blks = case viewl (unMany blks) of
|
||||
(x :< xs) | Seq.null xs -> case x of
|
||||
(BlockQuote lst) -> fromList lst
|
||||
-- (Div attr lst) -> fromList lst
|
||||
_ -> blks
|
||||
_ -> blks
|
||||
|
||||
spaceOut blks = (mempty, blks, mempty)
|
||||
|
||||
getL ils = case viewl $ unMany ils of
|
||||
(s :< sq) -> (singleton s, Many sq)
|
||||
_ -> (mempty, ils)
|
||||
|
||||
getR ils = case viewr $ unMany ils of
|
||||
(sq :> s) -> (Many sq, singleton s)
|
||||
_ -> (ils, mempty)
|
||||
|
||||
|
||||
unstack :: (Modifiable a) => a -> ([Modifier a], a)
|
||||
unstack ms = case modifier ms of
|
||||
NullModifier -> ([], ms)
|
||||
_ -> (f : fs, ms') where
|
||||
f = modifier ms
|
||||
(fs, ms') = unstack $ innards ms
|
||||
|
||||
stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a
|
||||
stack [] ms = ms
|
||||
stack (NullModifier : fs) ms = stack fs ms
|
||||
stack ((Modifier f) : fs) ms =
|
||||
if isEmpty ms
|
||||
then stack fs ms
|
||||
else f $ stack fs ms
|
||||
stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms
|
||||
|
||||
isEmpty :: (Monoid a, Eq a) => a -> Bool
|
||||
isEmpty x = x == mempty
|
||||
|
||||
|
||||
combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
|
||||
combine x y =
|
||||
let (xs', x') = getR x
|
||||
(y', ys') = getL y
|
||||
in
|
||||
xs' <> (combineSingleton x' y') <> ys'
|
||||
|
||||
isAttrModifier :: Modifier a -> Bool
|
||||
isAttrModifier (AttrModifier _ _) = True
|
||||
isAttrModifier _ = False
|
||||
|
||||
combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
|
||||
combineSingleton x y =
|
||||
let (xfs, xs) = unstack x
|
||||
(yfs, ys) = unstack y
|
||||
shared = xfs `intersect` yfs
|
||||
x_remaining = xfs \\ shared
|
||||
y_remaining = yfs \\ shared
|
||||
x_rem_attr = filter isAttrModifier x_remaining
|
||||
y_rem_attr = filter isAttrModifier y_remaining
|
||||
in
|
||||
case null shared of
|
||||
True | isEmpty xs && isEmpty ys ->
|
||||
stack (x_rem_attr ++ y_rem_attr) mempty
|
||||
| isEmpty xs ->
|
||||
let (sp, y') = spaceOutL y in
|
||||
(stack x_rem_attr mempty) <> sp <> y'
|
||||
| isEmpty ys ->
|
||||
let (x', sp) = spaceOutR x in
|
||||
x' <> sp <> (stack y_rem_attr mempty)
|
||||
| otherwise ->
|
||||
let (x', xsp) = spaceOutR x
|
||||
(ysp, y') = spaceOutL y
|
||||
in
|
||||
x' <> xsp <> ysp <> y'
|
||||
False -> stack shared $
|
||||
combine
|
||||
(stack x_remaining xs)
|
||||
(stack y_remaining ys)
|
||||
|
||||
(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
|
||||
x <+> y = combine x y
|
||||
|
||||
concatReduce :: (Monoid a, Modifiable a) => [a] -> a
|
||||
concatReduce xs = foldl combine mempty xs
|
Loading…
Reference in a new issue