Implemented east_asian_line_breaks extension.

Text.Pandoc.Options: Added `Ext_east_asian_line_breaks` constructor to
`Extension` (API change).

This extension is like `ignore_line_breaks`, but smarter -- it
only ignores line breaks between two East Asian wide characters.
This makes it better suited for writing with a mix of East Asian
and non-East Asian scripts.

Closes #2586.
This commit is contained in:
John MacFarlane 2015-12-12 17:28:52 -08:00
parent 60d383e27e
commit 44120ea716
3 changed files with 26 additions and 2 deletions

10
README
View file

@ -3224,7 +3224,15 @@ treated as spaces or as hard line breaks. This option is intended for
use with East Asian languages where spaces are not used between words,
but text is divided into lines for readability.
#### Extension: `emoji` ####
#### Extension: `east_asian_line_breaks` ####
Causes newlines within a paragraph to be ignored, rather than
being treated as spaces or as hard line breaks, when they occur
between two East Asian wide characters. This is a better choice
than `ignore_line_breaks` for texts that include a mix of East
Asian wide characters and other characters.
##### Extension: `emoji` ####
Parses textual emojis like `:smile:` as Unicode emoticons.

View file

@ -106,6 +106,8 @@ data Extension =
| Ext_subscript -- ^ Subscript using ~this~ syntax
| Ext_hard_line_breaks -- ^ All newlines become hard line breaks
| Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
| Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
-- East Asian wide characters
| Ext_literate_haskell -- ^ Enable literate Haskell conventions
| Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
| Ext_emoji -- ^ Support emoji like :smile:

View file

@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
import Text.Pandoc.Generic (bottomUp)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Yaml as Yaml
@ -51,6 +52,7 @@ import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Text.Pandoc.Options
import Text.Pandoc.Shared
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
@ -356,7 +358,19 @@ parseMarkdown = do
st <- getState
let meta = runF (stateMeta' st) st
let Pandoc _ bs = B.doc $ runF blocks st
return $ Pandoc meta bs
eastAsianLineBreaks <- option False $
True <$ guardEnabled Ext_east_asian_line_breaks
return $ (if eastAsianLineBreaks
then bottomUp softBreakFilter
else id) $ Pandoc meta bs
softBreakFilter :: [Inline] -> [Inline]
softBreakFilter (x:SoftBreak:y:zs) =
case (stringify x, stringify y) of
(xs@(_:_), (c:_))
| charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
_ -> x:SoftBreak:y:zs
softBreakFilter xs = xs
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do