Org reader: add support for smart punctuation

This commit is contained in:
Craig S. Bosma 2015-03-09 07:11:53 -05:00
parent c7c45918dc
commit 513221f822
2 changed files with 74 additions and 8 deletions

View file

@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{- {-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>) ) , (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first) import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks) import Control.Monad.Reader (Reader, runReader, ask, asks, local)
import Data.Char (isAlphaNum, toLower) import Data.Char (isAlphaNum, toLower)
import Data.Default import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf) import Data.List (intersperse, isPrefixOf, isSuffixOf)
@ -62,9 +62,11 @@ import Network.HTTP (urlEncode)
readOrg :: ReaderOptions -- ^ Reader options readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings) -> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc -> Pandoc
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
type OrgParser = Parser [Char] OrgParserState data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
parseOrg :: OrgParser Pandoc parseOrg :: OrgParser Pandoc
parseOrg = do parseOrg = do
@ -125,6 +127,9 @@ data OrgParserState = OrgParserState
, orgStateNotes' :: OrgNoteTable , orgStateNotes' :: OrgNoteTable
} }
instance Default OrgParserLocal where
def = OrgParserLocal NoQuote
instance HasReaderOptions OrgParserState where instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions extractReaderOptions = orgStateOptions
@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
instance HasQuoteContext st (Reader OrgParserLocal) where
getQuoteContext = asks orgLocalQuoteContext
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
instance Default OrgParserState where instance Default OrgParserState where
def = defaultOrgParserState def = defaultOrgParserState
@ -964,6 +973,7 @@ inline =
, subscript , subscript
, superscript , superscript
, inlineLaTeX , inlineLaTeX
, smart
, symbol , symbol
] <* (guard =<< newlinesCountWithinLimits) ] <* (guard =<< newlinesCountWithinLimits)
<?> "inline" <?> "inline"
@ -1270,13 +1280,17 @@ displayMath :: OrgParser (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$" , rawMathBetween "$$" "$$"
] ]
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) updatePositions :: Char
where updatePositions c = do -> OrgParser (Char)
updatePositions c = do
when (c `elem` emphasisPreChars) updateLastPreCharPos when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c return c
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: Char emphasisBetween :: Char
-> OrgParser (F Inlines) -> OrgParser (F Inlines)
emphasisBetween c = try $ do emphasisBetween c = try $ do
@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do
count len anyChar count len anyChar
return cs return cs
_ -> mzero _ -> mzero
smart :: OrgParser (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [orgApostrophe, dash, ellipses])
where orgApostrophe =
(char '\'' <|> char '\8217') <* updateLastPreCharPos
<* updateLastForbiddenCharPos
*> return (B.str "\x2019")
singleQuoted :: OrgParser (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
many1Till inline singleQuoteEnd
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
<|> (return $ return (B.str "\8220") <> contents)

View file

@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat)
org :: String -> Pandoc org :: String -> Pandoc
org = readOrg def org = readOrg def
orgSmart :: String -> Pandoc
orgSmart = readOrg def { readerSmart = True }
infix 4 =: infix 4 =:
(=:) :: ToString c (=:) :: ToString c
=> String -> (String, c) -> Test => String -> (String, c) -> Test
@ -1152,4 +1155,25 @@ tests =
] ]
in codeBlockWith ( "", classes, params) "code body\n" in codeBlockWith ( "", classes, params) "code body\n"
] ]
, testGroup "Smart punctuation"
[ test orgSmart "quote before ellipses"
("'...hi'"
=?> para (singleQuoted "…hi"))
, test orgSmart "apostrophe before emph"
("D'oh! A l'/aide/!"
=?> para ("Doh! A l" <> emph "aide" <> "!"))
, test orgSmart "apostrophe in French"
("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
=?> para "À larrivée de la guerre, le thème de l«impossibilité du socialisme»")
, test orgSmart "Quotes cannot occur at the end of emphasized text"
("/say \"yes\"/" =?>
para ("/say" <> space <> doubleQuoted "yes" <> "/"))
, test orgSmart "Dashes are allowed at the borders of emphasis'"
("/foo---/" =?>
para (emph "foo—"))
]
] ]