Generalised more in Parsing.hs to enable the use of custom state

This commit is contained in:
Matthew Pickering 2014-07-26 17:34:11 +01:00
parent 18f4490482
commit 5e2d22a27e
2 changed files with 114 additions and 58 deletions

View file

@ -2,6 +2,7 @@
FlexibleContexts
, GeneralizedNewtypeDeriving
, TypeSynonymInstances
, MultiParamTypeClasses
, FlexibleInstances #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine,
toKey,
registerHeader,
smartPunctuation,
withQuoteContext,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine,
runF,
askF,
asksF,
token,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
token
)
where
@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec
import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace )
@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do
string op
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
mathDisplay :: Stream s m Char => ParserT s ParserState m String
mathDisplay :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@ -492,7 +494,8 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
mathInline :: Stream s m Char => ParserT s ParserState m String
mathInline :: (HasReaderOptions st , Stream s m Char)
=> ParserT s st m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@ -909,6 +912,21 @@ class HasReaderOptions st where
-- default
getOption f = (f . extractReaderOptions) <$> getState
class HasQuoteContext st m where
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
instance Monad m => HasQuoteContext ParserState m where
getQuoteContext = stateQuoteContext <$> getState
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
setState oldState { stateQuoteContext = context }
result <- parser
newState <- getState
setState newState { stateQuoteContext = oldQuoteContext }
return result
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do
failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
failUnlessSmart = getOption readerSmart >>= guard
smartPunctuation :: Stream s m Char
=> ParserT s ParserState m Inlines
-> ParserT s ParserState m Inlines
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
quoted :: Stream s m Char
=> ParserT s ParserState m Inlines
-> ParserT s ParserState m Inlines
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
withQuoteContext :: Stream s m t
=> QuoteContext
-> ParserT s ParserState m a
-> ParserT s ParserState m a
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
setState oldState { stateQuoteContext = context }
result <- parser
newState <- getState
setState newState { stateQuoteContext = oldQuoteContext }
return result
singleQuoted :: Stream s m Char
=> ParserT s ParserState m Inlines
-> ParserT s ParserState m Inlines
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . B.singleQuoted . mconcat
doubleQuoted :: Stream s m Char
=> ParserT s ParserState m Inlines
-> ParserT s ParserState m Inlines
doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
return . B.doubleQuoted . mconcat
failIfInQuoteContext :: Stream s m t
failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
=> QuoteContext
-> ParserT s ParserState m ()
-> ParserT s st m ()
failIfInQuoteContext context = do
st <- getState
if stateQuoteContext st == context
context' <- getQuoteContext
if context' == context
then fail "already inside quotes"
else return ()
@ -1110,8 +1115,8 @@ charOrRef cs =
guard (c `elem` cs)
return c)
singleQuoteStart :: Stream s m Char
=> ParserT s ParserState m ()
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
-- single quote start can't be right after str
@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
doubleQuoteStart :: Stream s m Char
=> ParserT s ParserState m ()
doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
@ -1179,6 +1184,14 @@ citeKey = try $ do
let key = firstChar:rest
return (suppress_author, key)
token :: (Stream s m t)
=> (t -> String)
-> (t -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s st m a
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
--
-- Macros
--
@ -1200,9 +1213,9 @@ macro = do
else return $ rawBlock "latex" def'
-- | Apply current macros to string.
applyMacros' :: Stream [Char] m Char
applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
=> String
-> ParserT [Char] ParserState m String
-> ParserT [Char] st m String
applyMacros' target = do
apply <- getOption readerApplyMacros
if apply

View file

@ -1,3 +1,4 @@
{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@ -40,7 +41,7 @@ import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines)
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) )
import Data.Monoid
import Text.Printf (printf)
import Debug.Trace (trace)
import Data.Default (Default (..))
import Control.Monad.Reader (Reader, runReader, asks, local, ask)
isSpace :: Char -> Bool
isSpace ' ' = True
@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readHtml opts inp =
case runParser parseDoc def{ stateOptions = opts } "source" tags of
case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } ) "source" tags of
Left err' -> error $ "\nError at " ++ show err'
Right result -> result
where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta <$> getState
meta <- stateMeta . parserState <$> getState
return $ Pandoc meta (B.toList blocks)
type TagParser = Parser [Tag String] ParserState
data HTMLState =
HTMLState
{ parserState :: ParserState
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext }
type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
type TagParser = HTMLParser [Tag String]
pBody :: TagParser Blocks
pBody = pInTags "body" block
@ -115,7 +127,6 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
pList :: TagParser Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@ -365,8 +376,8 @@ pSelfClosing f g = do
pQ :: TagParser Inlines
pQ = do
quoteContext <- stateQuoteContext `fmap` getState
let quoteType = case quoteContext of
context <- asks quoteContext
let quoteType = case context of
InDoubleQuote -> SingleQuote
_ -> DoubleQuote
let innerQuoteContext = if quoteType == SingleQuote
@ -477,7 +488,8 @@ pTagText :: TagParser Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
case runParser (many pTagContents) st "text" str of
qu <- ask
case flip runReader qu $ runParserT (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
Right result -> return $ mconcat result
@ -486,7 +498,9 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
pTagContents :: Parser [Char] ParserState Inlines
type InlinesParser = HTMLParser String
pTagContents :: InlinesParser Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@ -496,12 +510,11 @@ pTagContents =
<|> pSymbol
<|> pBad
pStr :: Parser [Char] ParserState Inlines
pStr :: InlinesParser Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
pos <- getPosition
updateState $ \s -> s{ stateLastStrPos = Just pos }
updateLastStrPos
return $ B.str result
isSpecial :: Char -> Bool
@ -516,13 +529,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
pSymbol :: Parser [Char] ParserState Inlines
pSymbol :: InlinesParser Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
pBad :: Parser [Char] ParserState Inlines
pBad :: InlinesParser Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@ -556,7 +569,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
pSpace :: Parser [Char] ParserState Inlines
pSpace :: InlinesParser Inlines
pSpace = many1 (satisfy isSpace) >> return B.space
--
@ -672,19 +685,23 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
htmlInBalanced :: (Monad m)
=> (Tag String -> Bool)
-> ParserT String st m String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
let stopper = htmlTag (~== TagClose t)
let anytag = liftM snd $ htmlTag (const True)
let anytag = snd <$> htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
(htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
-- Instances
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance HasQuoteContext st (Reader HTMLLocal) where
getQuoteContext = asks quoteContext
withQuoteContext q = local (\s -> s{quoteContext = q})
instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
instance Default HTMLState where
def = HTMLState def
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
instance Default HTMLLocal where
def = HTMLLocal NoQuote
instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState