[Docx Reader] Refactor/update smushInlines

This commit is contained in:
Nikolay Yakimov 2020-07-07 09:04:38 +03:00
parent 804e8eeed2
commit 48cef91d18
3 changed files with 32 additions and 44 deletions

View file

@ -4,7 +4,8 @@
{- | {- |
Module : Text.Pandoc.Readers.Docx.Combine Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
2014-2020 John MacFarlane <jgm@berkeley.edu> 2014-2020 John MacFarlane <jgm@berkeley.edu>,
2020 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@ -59,7 +60,9 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines
where where
import Data.List import Data.List
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>)) import Data.Bifunctor
import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
, (><), (|>) )
import qualified Data.Sequence as Seq (null) import qualified Data.Sequence as Seq (null)
import Text.Pandoc.Builder import Text.Pandoc.Builder
@ -80,14 +83,14 @@ spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
spaceOutInlines ils = spaceOutInlines ils =
let (fs, ils') = unstackInlines ils let (fs, ils') = unstackInlines ils
contents = unMany ils' (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils'
left = case viewl contents of -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element
(Space :< _) -> space in (Many left, stackInlines fs $ Many contents', Many right)
_ -> mempty
right = case viewr contents of isSpace :: Inline -> Bool
(_ :> Space) -> space isSpace Space = True
_ -> mempty in isSpace SoftBreak = True
(left, stackInlines fs $ trimInlines . Many $ contents, right) isSpace _ = False
stackInlines :: [Modifier Inlines] -> Inlines -> Inlines stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
stackInlines [] ms = ms stackInlines [] ms = ms
@ -99,39 +102,24 @@ stackInlines (Modifier f : fs) ms =
stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms
unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
unstackInlines ms = case ilModifier ms of unstackInlines ms = case ilModifierAndInnards ms of
NullModifier -> ([], ms) (NullModifier, _) -> ([], ms)
_ -> (f : fs, ms') where (f, innards ) -> first (f :) $ unstackInlines innards
f = ilModifier ms
(fs, ms') = unstackInlines $ ilInnards ms
ilModifier :: Inlines -> Modifier Inlines ilModifierAndInnards :: Inlines -> (Modifier Inlines, Inlines)
ilModifier ils = case viewl (unMany ils) of ilModifierAndInnards ils = case viewl $ unMany ils of
(x :< xs) | Seq.null xs -> case x of x :< xs | Seq.null xs -> case x of
(Emph _) -> Modifier emph Emph lst -> (Modifier emph, fromList lst)
(Strong _) -> Modifier strong Strong lst -> (Modifier strong, fromList lst)
(SmallCaps _) -> Modifier smallcaps SmallCaps lst -> (Modifier smallcaps, fromList lst)
(Strikeout _) -> Modifier strikeout Strikeout lst -> (Modifier strikeout, fromList lst)
(Superscript _) -> Modifier superscript Underline lst -> (Modifier underline, fromList lst)
(Subscript _) -> Modifier subscript Superscript lst -> (Modifier superscript, fromList lst)
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) Subscript lst -> (Modifier subscript, fromList lst)
(Span attr _) -> AttrModifier spanWith attr Link attr lst tgt -> (Modifier $ linkWith attr (fst tgt) (snd tgt), fromList lst)
_ -> NullModifier Span attr lst -> (AttrModifier spanWith attr, fromList lst)
_ -> NullModifier _ -> (NullModifier, ils)
_ -> (NullModifier, ils)
ilInnards :: Inlines -> Inlines
ilInnards ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph lst) -> fromList lst
(Strong lst) -> fromList lst
(SmallCaps lst) -> fromList lst
(Strikeout lst) -> fromList lst
(Superscript lst) -> fromList lst
(Subscript lst) -> fromList lst
(Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
_ -> ils
_ -> ils
inlinesL :: Inlines -> (Inlines, Inlines) inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of inlinesL ils = case viewl $ unMany ils of
@ -162,7 +150,7 @@ combineSingletonInlines x y =
in in
case null shared of case null shared of
True | isEmpty xs && isEmpty ys -> True | isEmpty xs && isEmpty ys ->
stackInlines (x_rem_attr ++ y_rem_attr) mempty stackInlines (x_rem_attr <> y_rem_attr) mempty
| isEmpty xs -> | isEmpty xs ->
let (sp, y') = spaceOutInlinesL y in let (sp, y') = spaceOutInlinesL y in
stackInlines x_rem_attr mempty <> sp <> y' stackInlines x_rem_attr mempty <> sp <> y'

View file

@ -1,6 +1,6 @@
Pandoc (Meta {unMeta = fromList []}) Pandoc (Meta {unMeta = fromList []})
[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."] [Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] ,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space],Emph [Underline [Str "emphasis"]],Str "."] ,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."] ,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] ,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]