Docx Reader: Get rid of Modifiable typeclass.

The docx reader used to use a Modifiable typeclass to combine both
Blocks and Inlines. But all the work was in the inlines. So most of the
generality was wasted, at the expense of making the code harder to
understand. This gets rid of the generality, and adds functions for
Blocks and Inlines. It should be a bit easier to work with going forward.
This commit is contained in:
Jesse Rosenthal 2016-02-26 08:47:26 -05:00
parent 38bd4162fe
commit a7a0b452a5
4 changed files with 170 additions and 199 deletions

View file

@ -372,7 +372,7 @@ Library
Text.Pandoc.Process,
Text.Pandoc.CSS
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Reducible,
Text.Pandoc.Readers.Docx.Combine,
Text.Pandoc.Readers.Docx.Parse,
Text.Pandoc.Readers.Docx.Fonts,
Text.Pandoc.Readers.Docx.Util,

View file

@ -81,7 +81,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.List (delete, (\\), intersect)
@ -167,7 +167,7 @@ bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
, (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- concatReduce <$> mapM parPartToInlines parParts
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
@ -291,13 +291,13 @@ runToInlines (Run rs runElems)
Just SubScrpt -> subscript codeString
_ -> codeString
| otherwise = do
let ils = concatReduce (map runElemToInlines runElems)
let ils = smushInlines (map runElemToInlines runElems)
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
runToInlines (Footnote bps) = do
blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (Endnote bps) = do
blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (InlineDrawing fp bs ext) = do
mediaBag <- gets docxMediaBag
@ -316,19 +316,19 @@ parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> concatReduce <$> mapM runToInlines runs
AcceptChanges -> smushInlines <$> mapM runToInlines runs
RejectChanges -> return mempty
AllChanges -> do
ils <- concatReduce <$> mapM runToInlines runs
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines (Deletion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
RejectChanges -> concatReduce <$> mapM runToInlines runs
RejectChanges -> smushInlines <$> mapM runToInlines runs
AllChanges -> do
ils <- concatReduce <$> mapM runToInlines runs
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
@ -361,10 +361,10 @@ parPartToInlines (Drawing fp bs ext) = do
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return $ imageWith (extentToAttr ext) fp "" ""
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatReduce <$> mapM runToInlines runs
ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils
parPartToInlines (ExternalHyperLink target runs) = do
ils <- concatReduce <$> mapM runToInlines runs
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
parPartToInlines (PlainOMath exps) = do
return $ math $ writeTeX exps
@ -417,7 +417,7 @@ singleParaToPlain blks = blks
cellToBlocks :: Cell -> DocxContext Blocks
cellToBlocks (Cell bps) = do
blks <- concatReduce <$> mapM bodyPartToBlocks bps
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: Row -> DocxContext [Blocks]
@ -479,11 +479,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ concatMap parPartToString parparts
| Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
(concatReduce <$> mapM parPartToInlines parparts)
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
ils <- smushInlines <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
@ -561,7 +561,7 @@ bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatReduce <$> mapM bodyPartToBlocks blkbps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
mediaBag <- gets docxMediaBag
return $ (meta,

View file

@ -0,0 +1,154 @@
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
PatternGuards #-}
module Text.Pandoc.Readers.Docx.Combine ( smushInlines
, smushBlocks
)
where
import Text.Pandoc.Builder
import Data.List
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
import qualified Data.Sequence as Seq (null)
data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
| NullModifier
spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
where (l, m, r) = spaceOutInlines ms
(fs, m') = unstackInlines m
spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
where (l, m, r) = spaceOutInlines ms
(fs, m') = unstackInlines m
spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
spaceOutInlines ils =
let (fs, ils') = unstackInlines ils
contents = unMany ils'
left = case viewl contents of
(Space :< _) -> space
_ -> mempty
right = case viewr contents of
(_ :> Space) -> space
_ -> mempty in
(left, (stackInlines fs $ trimInlines . Many $ contents), right)
stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
stackInlines [] ms = ms
stackInlines (NullModifier : fs) ms = stackInlines fs ms
stackInlines ((Modifier f) : fs) ms =
if isEmpty ms
then stackInlines fs ms
else f $ stackInlines fs ms
stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
unstackInlines ms = case ilModifier ms of
NullModifier -> ([], ms)
_ -> (f : fs, ms') where
f = ilModifier ms
(fs, ms') = unstackInlines $ ilInnards ms
ilModifier :: Inlines -> Modifier Inlines
ilModifier ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph _) -> Modifier emph
(Strong _) -> Modifier strong
(SmallCaps _) -> Modifier smallcaps
(Strikeout _) -> Modifier strikeout
(Superscript _) -> Modifier superscript
(Subscript _) -> Modifier subscript
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
(Span attr _) -> AttrModifier spanWith attr
_ -> NullModifier
_ -> NullModifier
ilInnards :: Inlines -> Inlines
ilInnards ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph lst) -> fromList lst
(Strong lst) -> fromList lst
(SmallCaps lst) -> fromList lst
(Strikeout lst) -> fromList lst
(Superscript lst) -> fromList lst
(Subscript lst) -> fromList lst
(Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
_ -> ils
_ -> ils
inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of
(s :< sq) -> (singleton s, Many sq)
_ -> (mempty, ils)
inlinesR :: Inlines -> (Inlines, Inlines)
inlinesR ils = case viewr $ unMany ils of
(sq :> s) -> (Many sq, singleton s)
_ -> (ils, mempty)
combineInlines :: Inlines -> Inlines -> Inlines
combineInlines x y =
let (xs', x') = inlinesR x
(y', ys') = inlinesL y
in
xs' <> (combineSingletonInlines x' y') <> ys'
combineSingletonInlines :: Inlines -> Inlines -> Inlines
combineSingletonInlines x y =
let (xfs, xs) = unstackInlines x
(yfs, ys) = unstackInlines y
shared = xfs `intersect` yfs
x_remaining = xfs \\ shared
y_remaining = yfs \\ shared
x_rem_attr = filter isAttrModifier x_remaining
y_rem_attr = filter isAttrModifier y_remaining
in
case null shared of
True | isEmpty xs && isEmpty ys ->
stackInlines (x_rem_attr ++ y_rem_attr) mempty
| isEmpty xs ->
let (sp, y') = spaceOutInlinesL y in
(stackInlines x_rem_attr mempty) <> sp <> y'
| isEmpty ys ->
let (x', sp) = spaceOutInlinesR x in
x' <> sp <> (stackInlines y_rem_attr mempty)
| otherwise ->
let (x', xsp) = spaceOutInlinesR x
(ysp, y') = spaceOutInlinesL y
in
x' <> xsp <> ysp <> y'
False -> stackInlines shared $
combineInlines
(stackInlines x_remaining xs)
(stackInlines y_remaining ys)
combineBlocks :: Blocks -> Blocks -> Blocks
combineBlocks bs cs
| bs' :> (BlockQuote bs'') <- viewr (unMany bs)
, (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
combineBlocks bs cs = bs <> cs
instance (Monoid a, Eq a) => Eq (Modifier a) where
(Modifier f) == (Modifier g) = (f mempty == g mempty)
(AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
(NullModifier) == (NullModifier) = True
_ == _ = False
isEmpty :: (Monoid a, Eq a) => a -> Bool
isEmpty x = x == mempty
isAttrModifier :: Modifier a -> Bool
isAttrModifier (AttrModifier _ _) = True
isAttrModifier _ = False
smushInlines :: [Inlines] -> Inlines
smushInlines xs = foldl combineInlines mempty xs
smushBlocks :: [Blocks] -> Blocks
smushBlocks xs = foldl combineBlocks mempty xs

View file

@ -1,183 +0,0 @@
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
PatternGuards #-}
module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
, (<+>)
)
where
import Text.Pandoc.Builder
import Data.List
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Sequence as Seq (null)
data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
| NullModifier
class (Eq a) => Modifiable a where
modifier :: a -> Modifier a
innards :: a -> a
getL :: a -> (a, a)
getR :: a -> (a, a)
spaceOut :: a -> (a, a, a)
spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a)
spaceOutL ms = (l, stack fs (m' <> r))
where (l, m, r) = spaceOut ms
(fs, m') = unstack m
spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a)
spaceOutR ms = (stack fs (l <> m'), r)
where (l, m, r) = spaceOut ms
(fs, m') = unstack m
instance (Monoid a, Show a) => Show (Modifier a) where
show (Modifier f) = show $ f mempty
show (AttrModifier f attr) = show $ f attr mempty
show (NullModifier) = "NullModifier"
instance (Monoid a, Eq a) => Eq (Modifier a) where
(Modifier f) == (Modifier g) = (f mempty == g mempty)
(AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
(NullModifier) == (NullModifier) = True
_ == _ = False
instance Modifiable Inlines where
modifier ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph _) -> Modifier emph
(Strong _) -> Modifier strong
(SmallCaps _) -> Modifier smallcaps
(Strikeout _) -> Modifier strikeout
(Superscript _) -> Modifier superscript
(Subscript _) -> Modifier subscript
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
(Span attr _) -> AttrModifier spanWith attr
_ -> NullModifier
_ -> NullModifier
innards ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph lst) -> fromList lst
(Strong lst) -> fromList lst
(SmallCaps lst) -> fromList lst
(Strikeout lst) -> fromList lst
(Superscript lst) -> fromList lst
(Subscript lst) -> fromList lst
(Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
_ -> ils
_ -> ils
getL ils = case viewl $ unMany ils of
(s :< sq) -> (singleton s, Many sq)
_ -> (mempty, ils)
getR ils = case viewr $ unMany ils of
(sq :> s) -> (Many sq, singleton s)
_ -> (ils, mempty)
spaceOut ils =
let (fs, ils') = unstack ils
contents = unMany ils'
left = case viewl contents of
(Space :< _) -> space
_ -> mempty
right = case viewr contents of
(_ :> Space) -> space
_ -> mempty in
(left, (stack fs $ trimInlines .Many $ contents), right)
instance Modifiable Blocks where
modifier blks = case viewl (unMany blks) of
(x :< xs) | Seq.null xs -> case x of
(BlockQuote _) -> Modifier blockQuote
-- (Div attr _) -> AttrModifier divWith attr
_ -> NullModifier
_ -> NullModifier
innards blks = case viewl (unMany blks) of
(x :< xs) | Seq.null xs -> case x of
(BlockQuote lst) -> fromList lst
-- (Div attr lst) -> fromList lst
_ -> blks
_ -> blks
spaceOut blks = (mempty, blks, mempty)
getL ils = case viewl $ unMany ils of
(s :< sq) -> (singleton s, Many sq)
_ -> (mempty, ils)
getR ils = case viewr $ unMany ils of
(sq :> s) -> (Many sq, singleton s)
_ -> (ils, mempty)
unstack :: (Modifiable a) => a -> ([Modifier a], a)
unstack ms = case modifier ms of
NullModifier -> ([], ms)
_ -> (f : fs, ms') where
f = modifier ms
(fs, ms') = unstack $ innards ms
stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a
stack [] ms = ms
stack (NullModifier : fs) ms = stack fs ms
stack ((Modifier f) : fs) ms =
if isEmpty ms
then stack fs ms
else f $ stack fs ms
stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms
isEmpty :: (Monoid a, Eq a) => a -> Bool
isEmpty x = x == mempty
combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
combine x y =
let (xs', x') = getR x
(y', ys') = getL y
in
xs' <> (combineSingleton x' y') <> ys'
isAttrModifier :: Modifier a -> Bool
isAttrModifier (AttrModifier _ _) = True
isAttrModifier _ = False
combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
combineSingleton x y =
let (xfs, xs) = unstack x
(yfs, ys) = unstack y
shared = xfs `intersect` yfs
x_remaining = xfs \\ shared
y_remaining = yfs \\ shared
x_rem_attr = filter isAttrModifier x_remaining
y_rem_attr = filter isAttrModifier y_remaining
in
case null shared of
True | isEmpty xs && isEmpty ys ->
stack (x_rem_attr ++ y_rem_attr) mempty
| isEmpty xs ->
let (sp, y') = spaceOutL y in
(stack x_rem_attr mempty) <> sp <> y'
| isEmpty ys ->
let (x', sp) = spaceOutR x in
x' <> sp <> (stack y_rem_attr mempty)
| otherwise ->
let (x', xsp) = spaceOutR x
(ysp, y') = spaceOutL y
in
x' <> xsp <> ysp <> y'
False -> stack shared $
combine
(stack x_remaining xs)
(stack y_remaining ys)
(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
x <+> y = combine x y
concatReduce :: (Monoid a, Modifiable a) => [a] -> a
concatReduce xs = foldl combine mempty xs