Org reader: add support for smart punctuation
This commit is contained in:
parent
c7c45918dc
commit
513221f822
2 changed files with 74 additions and 8 deletions
|
@ -1,6 +1,6 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
|
||||
{-
|
||||
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
|
||||
|
||||
|
@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure
|
|||
, (<$>), (<$), (<*>), (<*), (*>) )
|
||||
import Control.Arrow (first)
|
||||
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
|
||||
import Control.Monad.Reader (Reader, runReader, ask, asks)
|
||||
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
|
||||
import Data.Char (isAlphaNum, toLower)
|
||||
import Data.Default
|
||||
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
||||
|
@ -62,9 +62,11 @@ import Network.HTTP (urlEncode)
|
|||
readOrg :: ReaderOptions -- ^ Reader options
|
||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> Pandoc
|
||||
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||
|
||||
type OrgParser = Parser [Char] OrgParserState
|
||||
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
|
||||
|
||||
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||
|
||||
parseOrg :: OrgParser Pandoc
|
||||
parseOrg = do
|
||||
|
@ -125,6 +127,9 @@ data OrgParserState = OrgParserState
|
|||
, orgStateNotes' :: OrgNoteTable
|
||||
}
|
||||
|
||||
instance Default OrgParserLocal where
|
||||
def = OrgParserLocal NoQuote
|
||||
|
||||
instance HasReaderOptions OrgParserState where
|
||||
extractReaderOptions = orgStateOptions
|
||||
|
||||
|
@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where
|
|||
getLastStrPos = orgStateLastStrPos
|
||||
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||
|
||||
instance HasQuoteContext st (Reader OrgParserLocal) where
|
||||
getQuoteContext = asks orgLocalQuoteContext
|
||||
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
|
||||
|
||||
instance Default OrgParserState where
|
||||
def = defaultOrgParserState
|
||||
|
||||
|
@ -964,6 +973,7 @@ inline =
|
|||
, subscript
|
||||
, superscript
|
||||
, inlineLaTeX
|
||||
, smart
|
||||
, symbol
|
||||
] <* (guard =<< newlinesCountWithinLimits)
|
||||
<?> "inline"
|
||||
|
@ -1270,12 +1280,16 @@ displayMath :: OrgParser (F Inlines)
|
|||
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||
, rawMathBetween "$$" "$$"
|
||||
]
|
||||
|
||||
updatePositions :: Char
|
||||
-> OrgParser (Char)
|
||||
updatePositions c = do
|
||||
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
||||
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
||||
return c
|
||||
|
||||
symbol :: OrgParser (F Inlines)
|
||||
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||
where updatePositions c = do
|
||||
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
||||
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
||||
return c
|
||||
|
||||
emphasisBetween :: Char
|
||||
-> OrgParser (F Inlines)
|
||||
|
@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do
|
|||
count len anyChar
|
||||
return cs
|
||||
_ -> mzero
|
||||
|
||||
smart :: OrgParser (F Inlines)
|
||||
smart = do
|
||||
getOption readerSmart >>= guard
|
||||
doubleQuoted <|> singleQuoted <|>
|
||||
choice (map (return <$>) [orgApostrophe, dash, ellipses])
|
||||
where orgApostrophe =
|
||||
(char '\'' <|> char '\8217') <* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
*> return (B.str "\x2019")
|
||||
|
||||
singleQuoted :: OrgParser (F Inlines)
|
||||
singleQuoted = try $ do
|
||||
singleQuoteStart
|
||||
withQuoteContext InSingleQuote $
|
||||
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
||||
many1Till inline singleQuoteEnd
|
||||
|
||||
-- doubleQuoted will handle regular double-quoted sections, as well
|
||||
-- as dialogues with an open double-quote without a close double-quote
|
||||
-- in the same paragraph.
|
||||
doubleQuoted :: OrgParser (F Inlines)
|
||||
doubleQuoted = try $ do
|
||||
doubleQuoteStart
|
||||
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
||||
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
|
||||
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
||||
<|> (return $ return (B.str "\8220") <> contents)
|
||||
|
|
|
@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat)
|
|||
org :: String -> Pandoc
|
||||
org = readOrg def
|
||||
|
||||
orgSmart :: String -> Pandoc
|
||||
orgSmart = readOrg def { readerSmart = True }
|
||||
|
||||
infix 4 =:
|
||||
(=:) :: ToString c
|
||||
=> String -> (String, c) -> Test
|
||||
|
@ -1152,4 +1155,25 @@ tests =
|
|||
]
|
||||
in codeBlockWith ( "", classes, params) "code body\n"
|
||||
]
|
||||
, testGroup "Smart punctuation"
|
||||
[ test orgSmart "quote before ellipses"
|
||||
("'...hi'"
|
||||
=?> para (singleQuoted "…hi"))
|
||||
|
||||
, test orgSmart "apostrophe before emph"
|
||||
("D'oh! A l'/aide/!"
|
||||
=?> para ("D’oh! A l’" <> emph "aide" <> "!"))
|
||||
|
||||
, test orgSmart "apostrophe in French"
|
||||
("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
|
||||
=?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")
|
||||
|
||||
, test orgSmart "Quotes cannot occur at the end of emphasized text"
|
||||
("/say \"yes\"/" =?>
|
||||
para ("/say" <> space <> doubleQuoted "yes" <> "/"))
|
||||
|
||||
, test orgSmart "Dashes are allowed at the borders of emphasis'"
|
||||
("/foo---/" =?>
|
||||
para (emph "foo—"))
|
||||
]
|
||||
]
|
||||
|
|
Loading…
Reference in a new issue