Generalised more in Parsing.hs to enable the use of custom state
This commit is contained in:
parent
18f4490482
commit
5e2d22a27e
2 changed files with 114 additions and 58 deletions
|
@ -2,6 +2,7 @@
|
|||
FlexibleContexts
|
||||
, GeneralizedNewtypeDeriving
|
||||
, TypeSynonymInstances
|
||||
, MultiParamTypeClasses
|
||||
, FlexibleInstances #-}
|
||||
{-
|
||||
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
|
||||
|
@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine,
|
|||
HeaderType (..),
|
||||
ParserContext (..),
|
||||
QuoteContext (..),
|
||||
HasQuoteContext (..),
|
||||
NoteTable,
|
||||
NoteTable',
|
||||
KeyTable,
|
||||
|
@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine,
|
|||
toKey,
|
||||
registerHeader,
|
||||
smartPunctuation,
|
||||
withQuoteContext,
|
||||
singleQuoteStart,
|
||||
singleQuoteEnd,
|
||||
doubleQuoteStart,
|
||||
|
@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine,
|
|||
runF,
|
||||
askF,
|
||||
asksF,
|
||||
token,
|
||||
-- * Re-exports from Text.Pandoc.Parsec
|
||||
Stream,
|
||||
runParser,
|
||||
|
@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine,
|
|||
setSourceColumn,
|
||||
setSourceLine,
|
||||
newPos,
|
||||
token
|
||||
)
|
||||
where
|
||||
|
||||
|
@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
|
|||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
||||
import Text.Parsec
|
||||
import Text.Parsec hiding (token)
|
||||
import Text.Parsec.Pos (newPos)
|
||||
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
|
||||
isHexDigit, isSpace )
|
||||
|
@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do
|
|||
string op
|
||||
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
|
||||
|
||||
mathDisplay :: Stream s m Char => ParserT s ParserState m String
|
||||
mathDisplay :: (HasReaderOptions st, Stream s m Char)
|
||||
=> ParserT s st m String
|
||||
mathDisplay =
|
||||
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
|
||||
<|> (guardEnabled Ext_tex_math_single_backslash >>
|
||||
|
@ -492,7 +494,8 @@ mathDisplay =
|
|||
<|> (guardEnabled Ext_tex_math_double_backslash >>
|
||||
mathDisplayWith "\\\\[" "\\\\]")
|
||||
|
||||
mathInline :: Stream s m Char => ParserT s ParserState m String
|
||||
mathInline :: (HasReaderOptions st , Stream s m Char)
|
||||
=> ParserT s st m String
|
||||
mathInline =
|
||||
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
|
||||
<|> (guardEnabled Ext_tex_math_single_backslash >>
|
||||
|
@ -909,6 +912,21 @@ class HasReaderOptions st where
|
|||
-- default
|
||||
getOption f = (f . extractReaderOptions) <$> getState
|
||||
|
||||
class HasQuoteContext st m where
|
||||
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
|
||||
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
|
||||
|
||||
instance Monad m => HasQuoteContext ParserState m where
|
||||
getQuoteContext = stateQuoteContext <$> getState
|
||||
withQuoteContext context parser = do
|
||||
oldState <- getState
|
||||
let oldQuoteContext = stateQuoteContext oldState
|
||||
setState oldState { stateQuoteContext = context }
|
||||
result <- parser
|
||||
newState <- getState
|
||||
setState newState { stateQuoteContext = oldQuoteContext }
|
||||
return result
|
||||
|
||||
instance HasReaderOptions ParserState where
|
||||
extractReaderOptions = stateOptions
|
||||
|
||||
|
@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do
|
|||
failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
|
||||
failUnlessSmart = getOption readerSmart >>= guard
|
||||
|
||||
smartPunctuation :: Stream s m Char
|
||||
=> ParserT s ParserState m Inlines
|
||||
-> ParserT s ParserState m Inlines
|
||||
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
smartPunctuation inlineParser = do
|
||||
failUnlessSmart
|
||||
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
|
||||
|
@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do
|
|||
apostrophe :: Stream s m Char => ParserT s st m Inlines
|
||||
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
|
||||
|
||||
quoted :: Stream s m Char
|
||||
=> ParserT s ParserState m Inlines
|
||||
-> ParserT s ParserState m Inlines
|
||||
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
|
||||
|
||||
withQuoteContext :: Stream s m t
|
||||
=> QuoteContext
|
||||
-> ParserT s ParserState m a
|
||||
-> ParserT s ParserState m a
|
||||
withQuoteContext context parser = do
|
||||
oldState <- getState
|
||||
let oldQuoteContext = stateQuoteContext oldState
|
||||
setState oldState { stateQuoteContext = context }
|
||||
result <- parser
|
||||
newState <- getState
|
||||
setState newState { stateQuoteContext = oldQuoteContext }
|
||||
return result
|
||||
|
||||
singleQuoted :: Stream s m Char
|
||||
=> ParserT s ParserState m Inlines
|
||||
-> ParserT s ParserState m Inlines
|
||||
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
singleQuoted inlineParser = try $ do
|
||||
singleQuoteStart
|
||||
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
|
||||
return . B.singleQuoted . mconcat
|
||||
|
||||
doubleQuoted :: Stream s m Char
|
||||
=> ParserT s ParserState m Inlines
|
||||
-> ParserT s ParserState m Inlines
|
||||
doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
doubleQuoted inlineParser = try $ do
|
||||
doubleQuoteStart
|
||||
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
|
||||
return . B.doubleQuoted . mconcat
|
||||
|
||||
failIfInQuoteContext :: Stream s m t
|
||||
failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
|
||||
=> QuoteContext
|
||||
-> ParserT s ParserState m ()
|
||||
-> ParserT s st m ()
|
||||
failIfInQuoteContext context = do
|
||||
st <- getState
|
||||
if stateQuoteContext st == context
|
||||
context' <- getQuoteContext
|
||||
if context' == context
|
||||
then fail "already inside quotes"
|
||||
else return ()
|
||||
|
||||
|
@ -1110,8 +1115,8 @@ charOrRef cs =
|
|||
guard (c `elem` cs)
|
||||
return c)
|
||||
|
||||
singleQuoteStart :: Stream s m Char
|
||||
=> ParserT s ParserState m ()
|
||||
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m ()
|
||||
singleQuoteStart = do
|
||||
failIfInQuoteContext InSingleQuote
|
||||
-- single quote start can't be right after str
|
||||
|
@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do
|
|||
charOrRef "'\8217\146"
|
||||
notFollowedBy alphaNum
|
||||
|
||||
doubleQuoteStart :: Stream s m Char
|
||||
=> ParserT s ParserState m ()
|
||||
doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
|
||||
=> ParserT s st m ()
|
||||
doubleQuoteStart = do
|
||||
failIfInQuoteContext InDoubleQuote
|
||||
try $ do charOrRef "\"\8220\147"
|
||||
|
@ -1179,6 +1184,14 @@ citeKey = try $ do
|
|||
let key = firstChar:rest
|
||||
return (suppress_author, key)
|
||||
|
||||
|
||||
token :: (Stream s m t)
|
||||
=> (t -> String)
|
||||
-> (t -> SourcePos)
|
||||
-> (t -> Maybe a)
|
||||
-> ParsecT s st m a
|
||||
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
|
||||
|
||||
--
|
||||
-- Macros
|
||||
--
|
||||
|
@ -1200,9 +1213,9 @@ macro = do
|
|||
else return $ rawBlock "latex" def'
|
||||
|
||||
-- | Apply current macros to string.
|
||||
applyMacros' :: Stream [Char] m Char
|
||||
applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
|
||||
=> String
|
||||
-> ParserT [Char] ParserState m String
|
||||
-> ParserT [Char] st m String
|
||||
applyMacros' target = do
|
||||
apply <- getOption readerApplyMacros
|
||||
if apply
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
|
||||
{-
|
||||
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
|
@ -40,7 +41,7 @@ import Text.HTML.TagSoup
|
|||
import Text.HTML.TagSoup.Match
|
||||
import Text.Pandoc.Definition
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
|
||||
import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines)
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
|
@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) )
|
|||
import Data.Monoid
|
||||
import Text.Printf (printf)
|
||||
import Debug.Trace (trace)
|
||||
import Data.Default (Default (..))
|
||||
import Control.Monad.Reader (Reader, runReader, asks, local, ask)
|
||||
|
||||
isSpace :: Char -> Bool
|
||||
isSpace ' ' = True
|
||||
|
@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options
|
|||
-> String -- ^ String to parse (assumes @'\n'@ line endings)
|
||||
-> Pandoc
|
||||
readHtml opts inp =
|
||||
case runParser parseDoc def{ stateOptions = opts } "source" tags of
|
||||
case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } ) "source" tags of
|
||||
Left err' -> error $ "\nError at " ++ show err'
|
||||
Right result -> result
|
||||
where tags = canonicalizeTags $
|
||||
parseTagsOptions parseOptions{ optTagPosition = True } inp
|
||||
parseDoc = do
|
||||
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
|
||||
meta <- stateMeta <$> getState
|
||||
meta <- stateMeta . parserState <$> getState
|
||||
return $ Pandoc meta (B.toList blocks)
|
||||
|
||||
type TagParser = Parser [Tag String] ParserState
|
||||
data HTMLState =
|
||||
HTMLState
|
||||
{ parserState :: ParserState
|
||||
}
|
||||
|
||||
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext }
|
||||
|
||||
type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
|
||||
|
||||
type TagParser = HTMLParser [Tag String]
|
||||
|
||||
pBody :: TagParser Blocks
|
||||
pBody = pInTags "body" block
|
||||
|
@ -115,7 +127,6 @@ block = do
|
|||
(take 60 $ show $ B.toList res)) (return ())
|
||||
return res
|
||||
|
||||
|
||||
pList :: TagParser Blocks
|
||||
pList = pBulletList <|> pOrderedList <|> pDefinitionList
|
||||
|
||||
|
@ -365,8 +376,8 @@ pSelfClosing f g = do
|
|||
|
||||
pQ :: TagParser Inlines
|
||||
pQ = do
|
||||
quoteContext <- stateQuoteContext `fmap` getState
|
||||
let quoteType = case quoteContext of
|
||||
context <- asks quoteContext
|
||||
let quoteType = case context of
|
||||
InDoubleQuote -> SingleQuote
|
||||
_ -> DoubleQuote
|
||||
let innerQuoteContext = if quoteType == SingleQuote
|
||||
|
@ -477,7 +488,8 @@ pTagText :: TagParser Inlines
|
|||
pTagText = try $ do
|
||||
(TagText str) <- pSatisfy isTagText
|
||||
st <- getState
|
||||
case runParser (many pTagContents) st "text" str of
|
||||
qu <- ask
|
||||
case flip runReader qu $ runParserT (many pTagContents) st "text" str of
|
||||
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
|
||||
Right result -> return $ mconcat result
|
||||
|
||||
|
@ -486,7 +498,9 @@ pBlank = try $ do
|
|||
(TagText str) <- pSatisfy isTagText
|
||||
guard $ all isSpace str
|
||||
|
||||
pTagContents :: Parser [Char] ParserState Inlines
|
||||
type InlinesParser = HTMLParser String
|
||||
|
||||
pTagContents :: InlinesParser Inlines
|
||||
pTagContents =
|
||||
B.displayMath <$> mathDisplay
|
||||
<|> B.math <$> mathInline
|
||||
|
@ -496,12 +510,11 @@ pTagContents =
|
|||
<|> pSymbol
|
||||
<|> pBad
|
||||
|
||||
pStr :: Parser [Char] ParserState Inlines
|
||||
pStr :: InlinesParser Inlines
|
||||
pStr = do
|
||||
result <- many1 $ satisfy $ \c ->
|
||||
not (isSpace c) && not (isSpecial c) && not (isBad c)
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
updateLastStrPos
|
||||
return $ B.str result
|
||||
|
||||
isSpecial :: Char -> Bool
|
||||
|
@ -516,13 +529,13 @@ isSpecial '\8220' = True
|
|||
isSpecial '\8221' = True
|
||||
isSpecial _ = False
|
||||
|
||||
pSymbol :: Parser [Char] ParserState Inlines
|
||||
pSymbol :: InlinesParser Inlines
|
||||
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
|
||||
|
||||
isBad :: Char -> Bool
|
||||
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
|
||||
|
||||
pBad :: Parser [Char] ParserState Inlines
|
||||
pBad :: InlinesParser Inlines
|
||||
pBad = do
|
||||
c <- satisfy isBad
|
||||
let c' = case c of
|
||||
|
@ -556,7 +569,7 @@ pBad = do
|
|||
_ -> '?'
|
||||
return $ B.str [c']
|
||||
|
||||
pSpace :: Parser [Char] ParserState Inlines
|
||||
pSpace :: InlinesParser Inlines
|
||||
pSpace = many1 (satisfy isSpace) >> return B.space
|
||||
|
||||
--
|
||||
|
@ -672,19 +685,23 @@ _ `closes` _ = False
|
|||
--- parsers for use in markdown, textile readers
|
||||
|
||||
-- | Matches a stretch of HTML in balanced tags.
|
||||
htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
|
||||
htmlInBalanced :: (Monad m)
|
||||
=> (Tag String -> Bool)
|
||||
-> ParserT String st m String
|
||||
htmlInBalanced f = try $ do
|
||||
(TagOpen t _, tag) <- htmlTag f
|
||||
guard $ '/' `notElem` tag -- not a self-closing tag
|
||||
let stopper = htmlTag (~== TagClose t)
|
||||
let anytag = liftM snd $ htmlTag (const True)
|
||||
let anytag = snd <$> htmlTag (const True)
|
||||
contents <- many $ notFollowedBy' stopper >>
|
||||
(htmlInBalanced f <|> anytag <|> count 1 anyChar)
|
||||
endtag <- liftM snd stopper
|
||||
return $ tag ++ concat contents ++ endtag
|
||||
|
||||
-- | Matches a tag meeting a certain condition.
|
||||
htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
|
||||
htmlTag :: Monad m
|
||||
=> (Tag String -> Bool)
|
||||
-> ParserT [Char] st m (Tag String, String)
|
||||
htmlTag f = try $ do
|
||||
lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
|
||||
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
|
||||
|
@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
|
|||
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
|
||||
|
||||
|
||||
-- Instances
|
||||
|
||||
-- This signature should be more general
|
||||
-- MonadReader HTMLLocal m => HasQuoteContext st m
|
||||
instance HasQuoteContext st (Reader HTMLLocal) where
|
||||
getQuoteContext = asks quoteContext
|
||||
withQuoteContext q = local (\s -> s{quoteContext = q})
|
||||
|
||||
instance HasReaderOptions HTMLState where
|
||||
extractReaderOptions = extractReaderOptions . parserState
|
||||
|
||||
instance Default HTMLState where
|
||||
def = HTMLState def
|
||||
|
||||
instance HasMeta HTMLState where
|
||||
setMeta s b st = st {parserState = setMeta s b $ parserState st}
|
||||
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
|
||||
|
||||
instance Default HTMLLocal where
|
||||
def = HTMLLocal NoQuote
|
||||
|
||||
instance HasLastStrPosition HTMLState where
|
||||
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
|
||||
getLastStrPos = getLastStrPos . parserState
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue