Org reader: add support for smart punctuation

This commit is contained in:
Craig S. Bosma 2015-03-09 07:11:53 -05:00
parent c7c45918dc
commit 513221f822
2 changed files with 74 additions and 8 deletions

View file

@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks)
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
@ -62,9 +62,11 @@ import Network.HTTP (urlEncode)
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
type OrgParser = Parser [Char] OrgParserState
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
parseOrg :: OrgParser Pandoc
parseOrg = do
@ -125,6 +127,9 @@ data OrgParserState = OrgParserState
, orgStateNotes' :: OrgNoteTable
}
instance Default OrgParserLocal where
def = OrgParserLocal NoQuote
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
instance HasQuoteContext st (Reader OrgParserLocal) where
getQuoteContext = asks orgLocalQuoteContext
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
instance Default OrgParserState where
def = defaultOrgParserState
@ -964,6 +973,7 @@ inline =
, subscript
, superscript
, inlineLaTeX
, smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
@ -1270,12 +1280,16 @@ displayMath :: OrgParser (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
updatePositions :: Char
-> OrgParser (Char)
updatePositions c = do
when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
where updatePositions c = do
when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c
emphasisBetween :: Char
-> OrgParser (F Inlines)
@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do
count len anyChar
return cs
_ -> mzero
smart :: OrgParser (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [orgApostrophe, dash, ellipses])
where orgApostrophe =
(char '\'' <|> char '\8217') <* updateLastPreCharPos
<* updateLastForbiddenCharPos
*> return (B.str "\x2019")
singleQuoted :: OrgParser (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
many1Till inline singleQuoteEnd
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
<|> (return $ return (B.str "\8220") <> contents)

View file

@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat)
org :: String -> Pandoc
org = readOrg def
orgSmart :: String -> Pandoc
orgSmart = readOrg def { readerSmart = True }
infix 4 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
@ -1152,4 +1155,25 @@ tests =
]
in codeBlockWith ( "", classes, params) "code body\n"
]
, testGroup "Smart punctuation"
[ test orgSmart "quote before ellipses"
("'...hi'"
=?> para (singleQuoted "…hi"))
, test orgSmart "apostrophe before emph"
("D'oh! A l'/aide/!"
=?> para ("Doh! A l" <> emph "aide" <> "!"))
, test orgSmart "apostrophe in French"
("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
=?> para "À larrivée de la guerre, le thème de l«impossibilité du socialisme»")
, test orgSmart "Quotes cannot occur at the end of emphasized text"
("/say \"yes\"/" =?>
para ("/say" <> space <> doubleQuoted "yes" <> "/"))
, test orgSmart "Dashes are allowed at the borders of emphasis'"
("/foo---/" =?>
para (emph "foo—"))
]
]