ICML writer: consolidate adjacent strings, inc. spaces.

This avoids chunking up the output unnecessarily into
separate elements.
This commit is contained in:
John MacFarlane 2018-11-06 22:53:56 -08:00
parent dae3a0e3d2
commit 985db7b0a8
2 changed files with 38 additions and 143 deletions

View file

@ -416,7 +416,7 @@ definitionListItemToICML opts style (term,defs) = do
-- | Convert a list of inline elements to ICML.
inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeStrings opts lst)
-- | Convert an inline element to ICML.
inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc
@ -427,8 +427,10 @@ inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:styl
inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst
inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst
inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str ""] ++ lst ++ [Str ""]
inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str ""] ++ lst ++ [Str ""]
inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $
mergeStrings opts $ [Str ""] ++ lst ++ [Str ""]
inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $
mergeStrings opts $ [Str ""] ++ lst ++ [Str ""]
inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst
inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
inlineToICML _ style Space = charStyle style space
@ -474,18 +476,16 @@ footnoteToICML opts style lst =
$ inTags True "Footnote" [] $ number $$ intersperseBrs contents
-- | Auxiliary function to merge Space elements into the adjacent Strs.
mergeSpaces :: [Inline] -> [Inline]
mergeSpaces (Str s:(x:(Str s':xs))) | isSp x =
mergeSpaces $ Str(s++" "++s') : xs
mergeSpaces (x:(Str s:xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs
mergeSpaces (Str s:(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs
mergeSpaces (x:xs) = x : mergeSpaces xs
mergeSpaces [] = []
isSp :: Inline -> Bool
isSp Space = True
isSp SoftBreak = True
isSp _ = False
mergeStrings :: WriterOptions -> [Inline] -> [Inline]
mergeStrings opts = mergeStrings' . map spaceToStr
where spaceToStr Space = Str " "
spaceToStr SoftBreak = case writerWrapText opts of
WrapPreserve -> Str "\n"
_ -> Str " "
spaceToStr x = x
mergeStrings' (Str x : Str y : zs) = mergeStrings' (Str (x ++ y) : zs)
mergeStrings' (x : xs) = x : mergeStrings' xs
mergeStrings' [] = []
-- | Intersperse line breaks
intersperseBrs :: [Doc] -> Doc

View file

@ -945,10 +945,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList &gt; subParagraph &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Item 1. graf two. The quick brown fox jumped over the lazy dogs back.</Content>
<Content> Item 1. graf two. The quick brown fox jumped over the lazy dogs back.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
@ -1122,10 +1119,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList &gt; subParagraph &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>with a continuation</Content>
<Content> with a continuation</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
@ -1897,13 +1891,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>“</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Hello,</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>”</Content>
<Content>“Hello,”</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> said the spider. </Content>
@ -1912,55 +1900,28 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Content>“</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>Shelob</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Shelob</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> is my name.</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>”</Content>
<Content> is my name.”</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>A</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>A</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>, </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>B</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>B</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>, and </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>C</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>C</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> are letters.</Content>
@ -1969,67 +1930,34 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Oak,</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>Oak,</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>elm,</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>elm,</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> and </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>beech</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>beech</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> are names of trees. So is </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>pine.</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>pine.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
<Content>He said, </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>He said, </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>“</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>I want to go.</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>”</Content>
<Content>“I want to go.”</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content></Content>
@ -2231,13 +2159,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Content> of money. So is $34,000. (It worked if </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>“</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>lot</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>”</Content>
<Content>“lot”</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> is emphasized.)</Content>
@ -2818,13 +2740,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Content>From </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>“</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Voyage dans la Lune</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>”</Content>
<Content>“Voyage dans la Lune”</Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> by Georges Melies (1902):</Content>
@ -2916,10 +2832,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
</ParagraphStyleRange>
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.</Content>
<Content> Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
</Footnote>
@ -2936,19 +2849,13 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
</ParagraphStyleRange>
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Heres the long note. This one contains multiple blocks.</Content>
<Content> Heres the long note. This one contains multiple blocks.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</Content>
<Content> Subsequent blocks are indented to show that they belong to the footnote (as with list items).</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
<Br />
@ -2960,10 +2867,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
<Br />
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</Content>
<Content> If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
</Footnote>
@ -2986,10 +2890,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
</ParagraphStyleRange>
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>This is </Content>
<Content> This is </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic">
<Content>easier</Content>
@ -3029,10 +2930,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
</ParagraphStyleRange>
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>In quote.</Content>
<Content> In quote.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
</Footnote>
@ -3052,10 +2950,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{</Content>
</ParagraphStyleRange>
<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote &gt; Paragraph">
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content> </Content>
</CharacterStyleRange>
<CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
<Content>In list.</Content>
<Content> In list.</Content>
</CharacterStyleRange>
</ParagraphStyleRange>
</Footnote>