Org reader: add support for smart punctuation
This commit is contained in:
parent
c7c45918dc
commit
513221f822
2 changed files with 74 additions and 8 deletions
|
@ -1,6 +1,6 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
|
||||||
{-
|
{-
|
||||||
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
|
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure
|
||||||
, (<$>), (<$), (<*>), (<*), (*>) )
|
, (<$>), (<$), (<*>), (<*), (*>) )
|
||||||
import Control.Arrow (first)
|
import Control.Arrow (first)
|
||||||
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
|
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
|
||||||
import Control.Monad.Reader (Reader, runReader, ask, asks)
|
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
|
||||||
import Data.Char (isAlphaNum, toLower)
|
import Data.Char (isAlphaNum, toLower)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
||||||
|
@ -62,9 +62,11 @@ import Network.HTTP (urlEncode)
|
||||||
readOrg :: ReaderOptions -- ^ Reader options
|
readOrg :: ReaderOptions -- ^ Reader options
|
||||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||||
-> Pandoc
|
-> Pandoc
|
||||||
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||||
|
|
||||||
type OrgParser = Parser [Char] OrgParserState
|
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
|
||||||
|
|
||||||
|
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||||
|
|
||||||
parseOrg :: OrgParser Pandoc
|
parseOrg :: OrgParser Pandoc
|
||||||
parseOrg = do
|
parseOrg = do
|
||||||
|
@ -125,6 +127,9 @@ data OrgParserState = OrgParserState
|
||||||
, orgStateNotes' :: OrgNoteTable
|
, orgStateNotes' :: OrgNoteTable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
instance Default OrgParserLocal where
|
||||||
|
def = OrgParserLocal NoQuote
|
||||||
|
|
||||||
instance HasReaderOptions OrgParserState where
|
instance HasReaderOptions OrgParserState where
|
||||||
extractReaderOptions = orgStateOptions
|
extractReaderOptions = orgStateOptions
|
||||||
|
|
||||||
|
@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where
|
||||||
getLastStrPos = orgStateLastStrPos
|
getLastStrPos = orgStateLastStrPos
|
||||||
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||||
|
|
||||||
|
instance HasQuoteContext st (Reader OrgParserLocal) where
|
||||||
|
getQuoteContext = asks orgLocalQuoteContext
|
||||||
|
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
|
||||||
|
|
||||||
instance Default OrgParserState where
|
instance Default OrgParserState where
|
||||||
def = defaultOrgParserState
|
def = defaultOrgParserState
|
||||||
|
|
||||||
|
@ -964,6 +973,7 @@ inline =
|
||||||
, subscript
|
, subscript
|
||||||
, superscript
|
, superscript
|
||||||
, inlineLaTeX
|
, inlineLaTeX
|
||||||
|
, smart
|
||||||
, symbol
|
, symbol
|
||||||
] <* (guard =<< newlinesCountWithinLimits)
|
] <* (guard =<< newlinesCountWithinLimits)
|
||||||
<?> "inline"
|
<?> "inline"
|
||||||
|
@ -1270,13 +1280,17 @@ displayMath :: OrgParser (F Inlines)
|
||||||
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||||
, rawMathBetween "$$" "$$"
|
, rawMathBetween "$$" "$$"
|
||||||
]
|
]
|
||||||
symbol :: OrgParser (F Inlines)
|
|
||||||
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
updatePositions :: Char
|
||||||
where updatePositions c = do
|
-> OrgParser (Char)
|
||||||
|
updatePositions c = do
|
||||||
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
||||||
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
||||||
return c
|
return c
|
||||||
|
|
||||||
|
symbol :: OrgParser (F Inlines)
|
||||||
|
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||||
|
|
||||||
emphasisBetween :: Char
|
emphasisBetween :: Char
|
||||||
-> OrgParser (F Inlines)
|
-> OrgParser (F Inlines)
|
||||||
emphasisBetween c = try $ do
|
emphasisBetween c = try $ do
|
||||||
|
@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do
|
||||||
count len anyChar
|
count len anyChar
|
||||||
return cs
|
return cs
|
||||||
_ -> mzero
|
_ -> mzero
|
||||||
|
|
||||||
|
smart :: OrgParser (F Inlines)
|
||||||
|
smart = do
|
||||||
|
getOption readerSmart >>= guard
|
||||||
|
doubleQuoted <|> singleQuoted <|>
|
||||||
|
choice (map (return <$>) [orgApostrophe, dash, ellipses])
|
||||||
|
where orgApostrophe =
|
||||||
|
(char '\'' <|> char '\8217') <* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
*> return (B.str "\x2019")
|
||||||
|
|
||||||
|
singleQuoted :: OrgParser (F Inlines)
|
||||||
|
singleQuoted = try $ do
|
||||||
|
singleQuoteStart
|
||||||
|
withQuoteContext InSingleQuote $
|
||||||
|
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
||||||
|
many1Till inline singleQuoteEnd
|
||||||
|
|
||||||
|
-- doubleQuoted will handle regular double-quoted sections, as well
|
||||||
|
-- as dialogues with an open double-quote without a close double-quote
|
||||||
|
-- in the same paragraph.
|
||||||
|
doubleQuoted :: OrgParser (F Inlines)
|
||||||
|
doubleQuoted = try $ do
|
||||||
|
doubleQuoteStart
|
||||||
|
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
||||||
|
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
|
||||||
|
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
||||||
|
<|> (return $ return (B.str "\8220") <> contents)
|
||||||
|
|
|
@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat)
|
||||||
org :: String -> Pandoc
|
org :: String -> Pandoc
|
||||||
org = readOrg def
|
org = readOrg def
|
||||||
|
|
||||||
|
orgSmart :: String -> Pandoc
|
||||||
|
orgSmart = readOrg def { readerSmart = True }
|
||||||
|
|
||||||
infix 4 =:
|
infix 4 =:
|
||||||
(=:) :: ToString c
|
(=:) :: ToString c
|
||||||
=> String -> (String, c) -> Test
|
=> String -> (String, c) -> Test
|
||||||
|
@ -1152,4 +1155,25 @@ tests =
|
||||||
]
|
]
|
||||||
in codeBlockWith ( "", classes, params) "code body\n"
|
in codeBlockWith ( "", classes, params) "code body\n"
|
||||||
]
|
]
|
||||||
|
, testGroup "Smart punctuation"
|
||||||
|
[ test orgSmart "quote before ellipses"
|
||||||
|
("'...hi'"
|
||||||
|
=?> para (singleQuoted "…hi"))
|
||||||
|
|
||||||
|
, test orgSmart "apostrophe before emph"
|
||||||
|
("D'oh! A l'/aide/!"
|
||||||
|
=?> para ("D’oh! A l’" <> emph "aide" <> "!"))
|
||||||
|
|
||||||
|
, test orgSmart "apostrophe in French"
|
||||||
|
("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
|
||||||
|
=?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")
|
||||||
|
|
||||||
|
, test orgSmart "Quotes cannot occur at the end of emphasized text"
|
||||||
|
("/say \"yes\"/" =?>
|
||||||
|
para ("/say" <> space <> doubleQuoted "yes" <> "/"))
|
||||||
|
|
||||||
|
, test orgSmart "Dashes are allowed at the borders of emphasis'"
|
||||||
|
("/foo---/" =?>
|
||||||
|
para (emph "foo—"))
|
||||||
|
]
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in a new issue