Change reader types, allowing better tracking of source positions.

Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.

As a result, the readers had no way of knowing which file
was the source of any particular bit of text.  This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).

Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class.  A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`.  The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).

Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides.  Modified parsers to use a `Sources` as stream
[API change].

The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.

In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.

T.P.Error: showPos, do not print "-" as source name.
This commit is contained in:
John MacFarlane 2021-05-01 13:17:45 -07:00
parent 295d93e96b
commit 6e45607f99
46 changed files with 1024 additions and 616 deletions

View file

@ -9,6 +9,7 @@
# Ignore some builtin hints
#
- ignore: {name: "Avoid lambda"}
- ignore: {name: "Use bimap"}
- ignore: {name: "Eta reduce"}
- ignore: {name: "Evaluate"}
- ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained

View file

@ -513,6 +513,7 @@ library
Text.Pandoc.Options,
Text.Pandoc.Extensions,
Text.Pandoc.Shared,
Text.Pandoc.Sources,
Text.Pandoc.MediaBag,
Text.Pandoc.Error,
Text.Pandoc.Filter,

View file

@ -160,9 +160,11 @@ convertWithOpts opts = do
else optTabStop opts)
let readSources :: [FilePath] -> PandocIO Text
readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
mapM readSource srcs
let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
readSources srcs =
mapM (\fp -> do
t <- readSource fp
return (if fp == "-" then "" else fp, convertTabs t)) srcs
outputSettings <- optToOutputSettings opts

View file

@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
where
pMetaString = pure . MetaString <$> P.manyChar P.anyChar
runEverything p =
runPure (P.readWithM p (def :: P.ParserState) "")
runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
>>= fmap (Meta . flip P.runF def)
yamlToMeta _ = return mempty

View file

@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
deriving (Show, Eq, Ord)
-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
readBibtexString :: Variant -- ^ bibtex or biblatex
readBibtexString :: ToSources a
=> Variant -- ^ bibtex or biblatex
-> Locale -- ^ Locale
-> (Text -> Bool) -- ^ Filter on citation ids
-> Text -- ^ bibtex/biblatex text
-> a -- ^ bibtex/biblatex text
-> Either ParseError [Reference Inlines]
readBibtexString variant locale idpred contents = do
case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
filter (\item -> idpred (identifier item) &&
entryType item /= "xdata"))
(fromMaybe defaultLang $ localeLanguage locale, Map.empty)
"" contents of
"" (toSources contents) of
Left err -> Left err
Right xs -> return xs
@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
-- a map of bibtex "string" macros
type StringMap = Map.Map Text Text
type BibParser = Parser Text (Lang, StringMap)
type BibParser = Parser Sources (Lang, StringMap)
data Item = Item{ identifier :: Text
, sourcePos :: SourcePos
@ -804,7 +805,7 @@ bibEntries = do
(bibComment <|> bibPreamble <|> bibString))
bibSkip :: BibParser ()
bibSkip = () <$ take1WhileP (/='@')
bibSkip = skipMany1 (satisfy (/='@'))
bibComment :: BibParser ()
bibComment = do
@ -829,6 +830,9 @@ bibString = do
updateState (\(l,m) -> (l, Map.insert k v m))
return ()
take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
take1WhileP f = T.pack <$> many1 (satisfy f)
inBraces :: BibParser Text
inBraces = do
char '{'

View file

@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
import Data.Typeable (Typeable)
import Data.Word (Word8)
import Data.Text (Text)
import Data.List (sortOn)
import qualified Data.Text as T
import Data.Ord (Down(..))
import GHC.Generics (Generic)
import Network.HTTP.Client (HttpException)
import System.Exit (ExitCode (..), exitWith)
import System.IO (stderr)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Sources (Sources(..))
import Text.Printf (printf)
import Text.Parsec.Error
import Text.Parsec.Pos hiding (Line)
import Text.Pandoc.Shared (tshow)
import Citeproc (CiteprocError, prettyCiteprocError)
type Input = Text
data PandocError = PandocIOError Text IOError
| PandocHttpError Text HttpException
| PandocShouldNeverHappenError Text
| PandocSomeError Text
| PandocParseError Text
| PandocParsecError Input ParseError
| PandocParsecError Sources ParseError
| PandocMakePDFError Text
| PandocOptionError Text
| PandocSyntaxMapError Text
@ -81,22 +82,28 @@ renderError e =
"Please report this to pandoc's developers: " <> s
PandocSomeError s -> s
PandocParseError s -> s
PandocParsecError input err' ->
PandocParsecError (Sources inputs) err' ->
let errPos = errorPos err'
errLine = sourceLine errPos
errColumn = sourceColumn errPos
ls = T.lines input <> [""]
errorInFile = if length ls > errLine - 1
then T.concat ["\n", ls !! (errLine - 1)
,"\n", T.replicate (errColumn - 1) " "
,"^"]
else ""
in "\nError at " <> tshow err' <>
-- if error comes from a chunk or included file,
-- then we won't get the right text this way:
if sourceName errPos == "source"
then errorInFile
else ""
errFile = sourceName errPos
errorInFile =
case sortOn (Down . sourceLine . fst)
[ (pos,t)
| (pos,t) <- inputs
, sourceName pos == errFile
, sourceLine pos <= errLine
] of
[] -> ""
((pos,txt):_) ->
let ls = T.lines txt <> [""]
ln = errLine - sourceLine pos
in if length ls > ln - 1
then T.concat ["\n", ls !! (ln - 1)
,"\n", T.replicate (errColumn - 1) " "
,"^"]
else ""
in "\nError at " <> tshow err' <> errorInFile
PandocMakePDFError s -> s
PandocOptionError s -> s
PandocSyntaxMapError s -> s

View file

@ -241,9 +241,11 @@ instance ToJSON LogMessage where
showPos :: SourcePos -> Text
showPos pos = Text.pack $ sn ++ "line " ++
show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
where sn = if sourceName pos == "source" || sourceName pos == ""
then ""
else sourceName pos ++ " "
where
sn' = sourceName pos
sn = if sn' == "source" || sn' == "" || sn' == "-"
then ""
else sn' ++ " "
encodeLogMessages :: [LogMessage] -> BL.ByteString
encodeLogMessages ms =

View file

@ -5,7 +5,6 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Parsing
@ -19,8 +18,7 @@
A utility library with parsers used in pandoc readers.
-}
module Text.Pandoc.Parsing ( take1WhileP,
takeP,
module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
countChar,
textStr,
anyLine,
@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
getInput,
setInput,
unexpected,
char,
letter,
digit,
alphaNum,
skipMany,
skipMany1,
spaces,
space,
anyChar,
satisfy,
newline,
string,
count,
eof,
noneOf,
oneOf,
lookAhead,
notFollowedBy,
many,
@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
SourcePos,
getPosition,
setPosition,
sourceName,
setSourceName,
sourceColumn,
sourceLine,
setSourceColumn,
@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
where
import Control.Monad.Identity
( guard,
join,
unless,
when,
void,
liftM2,
liftM,
Identity(..),
MonadPlus(mzero) )
import Control.Monad.Reader
( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
isPunctuation, isSpace, ord, toLower, toUpper)
import Data.Default
import Data.Default ( Default(..) )
import Data.Functor (($>))
import Data.List (intercalate, transpose)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Set as Set
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
import Text.Pandoc.Definition
( Target,
nullMeta,
nullAttr,
Meta,
ColWidth(ColWidthDefault, ColWidth),
TableFoot(TableFoot),
TableBody(TableBody),
Attr,
TableHead(TableHead),
Row(..),
Alignment(..),
Inline(Str),
ListNumberDelim(..),
ListAttributes,
ListNumberStyle(..) )
import Text.Pandoc.Logging
( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
import Text.Pandoc.Options
( extensionEnabled,
Extension(Ext_old_dashes, Ext_tex_math_dollars,
Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Shared
( uniqueIdent,
tshow,
mapLeft,
compactify,
trim,
trimr,
splitTextByIndices,
safeRead,
trimMath,
schemes,
escapeURI )
import Text.Pandoc.Sources
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Pandoc.XML (fromEntities)
import Text.Parsec hiding (token)
import Text.Parsec.Pos (initialPos, newPos, updatePosString)
import Control.Monad.Except
import Text.Parsec
( between,
setSourceName,
Parsec,
Column,
Line,
incSourceLine,
incSourceColumn,
setSourceLine,
setSourceColumn,
sourceLine,
sourceColumn,
sourceName,
setSourceName,
setPosition,
getPosition,
updateState,
setState,
getState,
optionMaybe,
optional,
option,
endBy1,
endBy,
sepEndBy1,
sepEndBy,
sepBy1,
sepBy,
try,
choice,
(<?>),
(<|>),
manyTill,
many1,
many,
notFollowedBy,
lookAhead,
eof,
count,
skipMany1,
skipMany,
unexpected,
setInput,
getInput,
anyToken,
tokenPrim,
parse,
runParserT,
runParser,
ParseError,
ParsecT,
SourcePos,
Stream(..) )
import Text.Parsec.Pos (initialPos, newPos)
import Control.Monad.Except ( MonadError(throwError) )
import Text.Pandoc.Error
( PandocError(PandocParseError, PandocParsecError) )
type Parser t s = Parsec t s
type ParserT = ParsecT
-- | Reader monad wrapping the parser state. This is used to possibly delay
-- evaluation until all relevant information has been parsed and made available
-- in the parser state.
@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
mappend = (<>)
-- | Like @count@, but packs its result
countChar :: (Stream s m Char, Monad m)
countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
=> Int
-> ParsecT s st m Char
-> ParsecT s st m Text
countChar n = fmap T.pack . count n
-- | Like @string@, but uses @Text@.
textStr :: Stream s m Char => Text -> ParsecT s u m Text
textStr :: (Stream s m Char, UpdateSourcePos s Char)
=> Text -> ParsecT s u m Text
textStr t = string (T.unpack t) $> t
-- | Parse characters while a predicate is true.
take1WhileP :: Monad m
=> (Char -> Bool)
-> ParserT Text st m Text
take1WhileP f = do
-- needed to persuade parsec that this won't match an empty string:
c <- satisfy f
inp <- getInput
pos <- getPosition
let (t, rest) = T.span f inp
setInput rest
setPosition $
if f '\t' || f '\n'
then updatePosString pos $ T.unpack t
else incSourceColumn pos (T.length t)
return $ T.singleton c <> t
-- Parse n characters of input (or the rest of the input if
-- there aren't n characters).
takeP :: Monad m => Int -> ParserT Text st m Text
takeP n = do
guard (n > 0)
-- faster than 'count n anyChar'
inp <- getInput
pos <- getPosition
let (xs, rest) = T.splitAt n inp
-- needed to persuade parsec that this won't match an empty string:
anyChar
setInput rest
setPosition $ updatePosString pos $ T.unpack xs
return xs
-- | Parse any line of text
anyLine :: Monad m => ParserT Text st m Text
-- | Parse any line of text, returning the contents without the
-- final newline.
anyLine :: Monad m => ParserT Sources st m Text
anyLine = do
-- This is much faster than:
-- manyTill anyChar newline
inp <- getInput
pos <- getPosition
case T.break (=='\n') inp of
(this, T.uncons -> Just ('\n', rest)) -> do
-- needed to persuade parsec that this won't match an empty string:
anyChar
setInput rest
setPosition $ incSourceLine (setSourceColumn pos 1) 1
return this
_ -> mzero
case inp of
Sources [] -> mzero
Sources ((fp,t):inps) ->
-- we assume that lines don't span different input files
case T.break (=='\n') t of
(this, rest)
| T.null rest
, not (null inps) ->
-- line may span different input files, so do it
-- character by character
T.pack <$> manyTill anyChar newline
| otherwise -> do -- either end of inputs or newline in rest
setInput $ Sources ((fp, rest):inps)
char '\n' -- needed so parsec knows we won't match empty string
-- and so source pos is updated
return this
-- | Parse any line, include the final newline in the output
anyLineNewline :: Monad m => ParserT Text st m Text
anyLineNewline :: Monad m => ParserT Sources st m Text
anyLineNewline = (<> "\n") <$> anyLine
-- | Parse indent by specified number of spaces (or equiv. tabs)
indentWith :: Stream s m Char
indentWith :: (Stream s m Char, UpdateSourcePos s Char)
=> HasReaderOptions st
=> Int -> ParserT s st m Text
indentWith num = do
@ -399,11 +460,13 @@ notFollowedBy' p = try $ join $ do a <- try p
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
=> (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
-- TODO: This should be re-implemented in a Text-aware way
oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
=> (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings'' _ [] = Prelude.fail "no strings"
oneOfStrings'' matches strs = try $ do
c <- anyChar
@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
-- | Parses one of a list of strings. If the list contains
-- two strings one of which is a prefix of the other, the longer
-- string will be matched if possible.
oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
=> [Text] -> ParserT s st m Text
oneOfStrings = oneOfStrings' (==)
-- | Parses one of a list of strings (tried in order), case insensitive.
-- TODO: This will not be accurate with general Unicode (neither
-- Text.toLower nor Text.toCaseFold can be implemented with a map)
oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
=> [Text] -> ParserT s st m Text
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
-- this optimizes toLower by checking common ASCII case
@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
| otherwise = toLower c
-- | Parses a space or tab.
spaceChar :: Stream s m Char => ParserT s st m Char
spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
nonspaceChar :: Stream s m Char => ParserT s st m Char
nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Char
nonspaceChar = satisfy (not . isSpaceChar)
isSpaceChar :: Char -> Bool
@ -451,21 +518,24 @@ isSpaceChar '\r' = True
isSpaceChar _ = False
-- | Skips zero or more spaces or tabs.
skipSpaces :: Stream s m Char => ParserT s st m ()
skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
blankline :: Stream s m Char => ParserT s st m Char
blankline :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
blanklines :: Stream s m Char => ParserT s st m Text
blanklines :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
blanklines = T.pack <$> many1 blankline
-- | Gobble n spaces; if tabs are encountered, expand them
-- and gobble some or all of their spaces, leaving the rest.
gobbleSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT Text st m ()
=> Int -> ParserT Sources st m ()
gobbleSpaces 0 = return ()
gobbleSpaces n
| n < 0 = error "gobbleSpaces called with negative number"
@ -473,18 +543,26 @@ gobbleSpaces n
char ' ' <|> eatOneSpaceOfTab
gobbleSpaces (n - 1)
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
eatOneSpaceOfTab = do
char '\t'
lookAhead (char '\t')
pos <- getPosition
tabstop <- getOption readerTabStop
-- replace the tab on the input stream with spaces
let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
inp <- getInput
setInput $ T.replicate (tabstop - 1) " " <> inp
return ' '
setInput $
case inp of
Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
Sources ((fp,t):rest) ->
-- drop the tab and add spaces
Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
char ' '
-- | Gobble up to n spaces; if tabs are encountered, expand them
-- and gobble some or all of their spaces, leaving the rest.
gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT Text st m Int
=> Int -> ParserT Sources st m Int
gobbleAtMostSpaces 0 = return 0
gobbleAtMostSpaces n
| n < 0 = error "gobbleAtMostSpaces called with negative number"
@ -493,7 +571,8 @@ gobbleAtMostSpaces n
(+ 1) <$> gobbleAtMostSpaces (n - 1)
-- | Parses material enclosed between start and end parsers.
enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m t -- ^ start parser
-> ParserT s st m end -- ^ end parser
-> ParserT s st m a -- ^ content parser (to be used repeatedly)
-> ParserT s st m [a]
@ -501,39 +580,41 @@ enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
=> Text -> ParserT s st m Text
stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
=> String -> ParserT s st m String
stringAnyCase' [] = string ""
stringAnyCase' (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
rest <- stringAnyCase' xs
return (firstChar:rest)
-- TODO rewrite by just adding to Sources stream?
-- | Parse contents of 'str' using 'parser' and return result.
parseFromString :: (Stream s m Char, IsString s)
=> ParserT s st m r
parseFromString :: Monad m
=> ParserT Sources st m r
-> Text
-> ParserT s st m r
-> ParserT Sources st m r
parseFromString parser str = do
oldPos <- getPosition
setPosition $ initialPos " chunk"
setPosition $ initialPos "chunk"
oldInput <- getInput
setInput $ fromString $ T.unpack str
setInput $ toSources str
result <- parser
spaces
eof
setInput oldInput
setPosition oldPos
return result
-- | Like 'parseFromString' but specialized for 'ParserState'.
-- This resets 'stateLastStrPos', which is almost always what we want.
parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
=> ParserT s u m a
parseFromString' :: (Monad m, HasLastStrPosition u)
=> ParserT Sources u m a
-> Text
-> ParserT s u m a
-> ParserT Sources u m a
parseFromString' parser str = do
oldLastStrPos <- getLastStrPos <$> getState
updateState $ setLastStrPos Nothing
@ -542,7 +623,7 @@ parseFromString' parser str = do
return res
-- | Parse raw line block up to and including blank lines.
lineClump :: Monad m => ParserT Text st m Text
lineClump :: Monad m => ParserT Sources st m Text
lineClump = blanklines
<|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
@ -551,7 +632,7 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
-> ParserT s st m Text
charsInBalanced open close parser = try $ do
char open
@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
-- Auxiliary functions for romanNumeral:
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
@ -606,7 +687,7 @@ romanNumeral upperCase = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" <> full)
@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
uriScheme :: Stream s m Char => ParserT s st m Text
uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
uriScheme = oneOfStringsCI (Set.toList schemes)
-- | Parses a URI. Returns pair of original and URI-escaped version.
uri :: Stream s m Char => ParserT s st m (Text, Text)
uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
uri = try $ do
scheme <- uriScheme
char ':'
@ -677,7 +758,7 @@ uri = try $ do
uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
return (T.pack $ [l] ++ chunk ++ [r])
mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
mathInlineWith op cl = try $ do
textStr op
when (op == "$") $ notFollowedBy space
@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
notFollowedBy digit -- to prevent capture of $5
return $ trimMath $ T.concat words'
where
inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
inBalancedBraces' 0 "" = do
c <- anyChar
if c == '{'
@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
'{' -> inBalancedBraces' (numOpen + 1) (c:xs)
_ -> inBalancedBraces' numOpen (c:xs)
mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
mathDisplayWith op cl = try $ fmap T.pack $ do
textStr op
many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
(try $ textStr cl)
mathDisplay :: (HasReaderOptions st, Stream s m Char)
mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@ -733,7 +814,7 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
mathInline :: (HasReaderOptions st , Stream s m Char)
mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@ -746,7 +827,7 @@ mathInline =
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
withHorizDisplacement :: Stream s m Char
withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m a -- ^ Parser to apply
-> ParserT s st m (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
@ -758,30 +839,37 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
withRaw :: Monad m
=> ParsecT Text st m a
-> ParsecT Text st m (a, Text)
=> ParsecT Sources st m a
-> ParsecT Sources st m (a, Text)
withRaw parser = do
pos1 <- getPosition
inp <- getInput
inps1 <- getInput
result <- parser
pos2 <- getPosition
let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
let inplines = take ((l2 - l1) + 1) $ T.lines inp
let raw = case inplines of
[] -> ""
[l] -> T.take (c2 - c1) l
ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
return (result, raw)
inps2 <- getInput
-- 'raw' is the difference between inps1 and inps2
return (result, sourcesDifference inps1 inps2)
sourcesDifference :: Sources -> Sources -> Text
sourcesDifference (Sources is1) (Sources is2) = go is1 is2
where
go inps1 inps2 =
case (inps1, inps2) of
([], _) -> mempty
(_, []) -> mconcat $ map snd inps1
((p1,t1):rest1, (p2, t2):rest2)
| p1 == p2
, t1 == t2 -> go rest1 rest2
| p1 == p2
, t1 /= t2 -> fromMaybe mempty $ T.stripSuffix t2 t1
| otherwise -> t1 <> go rest1 inps2
-- | Parses backslash, then applies character parser.
escaped :: Stream s m Char
escaped :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Char -- ^ Parser for character to escape
-> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
characterReference :: Stream s m Char => ParserT s st m Char
characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@ -794,19 +882,19 @@ characterReference = try $ do
_ -> Prelude.fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@ -815,7 +903,7 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
exampleNum :: Stream s m Char
exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
@ -834,37 +922,37 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- satisfy isAsciiLower
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- satisfy isAsciiUpper
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
inPeriod :: Stream s m Char
inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inPeriod num = try $ do
@ -876,7 +964,7 @@ inPeriod num = try $ do
return (start, style, delim)
-- | Parses a list number (num) followed by a paren, returns list attributes.
inOneParen :: Stream s m Char
inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inOneParen num = try $ do
@ -885,7 +973,7 @@ inOneParen num = try $ do
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
inTwoParens :: Stream s m Char
inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inTwoParens num = try $ do
@ -896,7 +984,7 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
orderedListMarker :: Stream s m Char
orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
=> ListNumberStyle
-> ListNumberDelim
-> ParserT s ParserState m Int
@ -919,10 +1007,10 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
charRef :: Stream s m Char => ParserT s st m Inline
charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
charRef = Str . T.singleton <$> characterReference
lineBlockLine :: Monad m => ParserT Text st m Text
lineBlockLine :: Monad m => ParserT Sources st m Text
lineBlockLine = try $ do
char '|'
char ' '
@ -932,11 +1020,11 @@ lineBlockLine = try $ do
continuations <- many (try $ char ' ' >> anyLine)
return $ white <> T.unwords (line : continuations)
blankLineBlockLine :: Stream s m Char => ParserT s st m Char
blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
blankLineBlockLine = try (char '|' >> blankline)
-- | Parses an RST-style line block and returns a list of strings.
lineBlockLines :: Monad m => ParserT Text st m [Text]
lineBlockLines :: Monad m => ParserT Sources st m [Text]
lineBlockLines = try $ do
lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
skipMany blankline
@ -944,7 +1032,8 @@ lineBlockLines = try $ do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
tableWith :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
Monad mf, IsString s)
=> ParserT s st m (mf Blocks) -- ^ Block list parser
gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
-> ParserT s st m (mf Blocks)
-> ParserT Sources st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
Monad mf, IsString s)
=> ParserT s st m (mf Blocks) -- ^ Block list parser
gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
Monad mf)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
-> ParserT s st m (TableComponents mf)
-> ParserT Sources st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitTextByIndices (init indices) $ trimr line
gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
@ -1050,7 +1139,7 @@ gridPart ch = do
(False, False) -> AlignDefault
return ((lengthDashes, lengthDashes + 1), alignment)
gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: Text -> Text
@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
go c = T.any (== c) " \t"
-- | Separator between rows of grid table.
gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
gridTableSep :: Monad m => Char -> ParserT Sources st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
=> Bool -- ^ Headerless table
-> ParserT s st m (mf Blocks)
-> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ParserT Sources st m (mf Blocks)
-> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
gridTableHeader True _ = do
optional blanklines
dashes <- gridDashedLines '-'
@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices $ T.pack line)
-- | Parse row of grid table.
gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
=> ParserT s st m (mf Blocks)
gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
=> ParserT Sources st m (mf Blocks)
-> [Int]
-> ParserT s st m (mf [Blocks])
-> ParserT Sources st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
Just (c, _) -> c == ' '
-- | Parse footer for a grid table.
gridTableFooter :: Stream s m Char => ParserT s st m ()
gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
gridTableFooter = optional blanklines
---
-- | Removes the ParsecT layer from the monad transformer stack
readWithM :: Monad m
=> ParserT Text st m a -- ^ parser
-> st -- ^ initial state
-> Text -- ^ input
readWithM :: (Monad m, ToSources t)
=> ParserT Sources st m a -- ^ parser
-> st -- ^ initial state
-> t -- ^ input
-> m (Either PandocError a)
readWithM parser state input =
mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
mapLeft (PandocParsecError sources)
<$> runParserT parser state (initialSourceName sources) sources
where
sources = toSources input
-- | Parse a string with a given parser and state
readWith :: Parser Text st a
readWith :: ToSources t
=> Parser Sources st a
-> st
-> Text
-> t
-> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
-- | Parse a string with @parser@ (for testing).
testStringWith :: Show a
=> ParserT Text ParserState Identity a
=> ParserT Sources ParserState Identity a
-> Text
-> IO ()
testStringWith parser str = UTF8.putStrLn $ tshow $
readWith parser defaultParserState str
readWith parser defaultParserState (toSources str)
-- | Parsing options.
data ParserState = ParserState
@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
updateState $ updateIdentifierList $ Set.insert ident
return (ident,classes,kvs)
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
singleQuoted inlineParser = do
@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
<|> pure "\8217"
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
doubleQuoted inlineParser = do
@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
context' <- getQuoteContext
when (context' == context) $ Prelude.fail "already inside quotes"
charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
@ -1449,7 +1548,7 @@ singleQuoteStart = do
charOrRef "'\8216\145"
void $ lookAhead (satisfy (not . isSpaceChar))
singleQuoteEnd :: Stream s m Char
singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
doubleQuoteStart :: (HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char)
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
@ -1465,21 +1564,21 @@ doubleQuoteStart = do
try $ do charOrRef "\"\8220\147"
void $ lookAhead (satisfy (not . isSpaceChar))
doubleQuoteEnd :: Stream s m Char
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
doubleCloseQuote = B.str "\8221" <$ char '"'
ellipses :: Stream s m Char
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
dash :: (HasReaderOptions st, Stream s m Char)
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
dash = try $ do
oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@ -1506,7 +1605,7 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
citeKey :: (Stream s m Char, HasLastStrPosition st)
citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
=> ParserT s st m (Bool, Text)
citeKey = try $ do
guard =<< notAfterString
@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
insertIncludedFile blocks totoks dirs f =
runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-- TODO: replace this with something using addToSources.
-- | Parse content of include file as future blocks. Circular includes result in
-- an @PandocParseError@.
insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
=> ParserT Text st m (Future st Blocks)
=> ParserT Sources st m (Future st Blocks)
-> [FilePath] -> FilePath
-> ParserT Text st m (Future st Blocks)
insertIncludedFileF p = insertIncludedFile' p id
-> ParserT Sources st m (Future st Blocks)
insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])

View file

@ -1,4 +1,5 @@
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
@ -72,6 +73,7 @@ import Text.Pandoc.Error
import Text.Pandoc.Extensions
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.JATS (readJATS)
import Text.Pandoc.Readers.Jira (readJira)
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.Muse
import Text.Pandoc.Readers.Native
@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
import Text.Pandoc.Readers.CslJson
import Text.Pandoc.Readers.BibTeX
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
data Reader m = TextReader (forall a . ToSources a =>
ReaderOptions -> a -> m Pandoc)
| ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
-- | Association list of formats and readers.
readers :: PandocMonad m => [(Text, Reader m)]
readers = [ ("native" , TextReader readNative)
,("json" , TextReader readJSON)
,("markdown" , TextReader readMarkdown)
,("markdown_strict" , TextReader readMarkdown)
,("markdown_phpextra" , TextReader readMarkdown)
,("markdown_github" , TextReader readMarkdown)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
,("commonmark_x" , TextReader readCommonMark)
,("creole" , TextReader readCreole)
,("dokuwiki" , TextReader readDokuWiki)
,("gfm" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
,("vimwiki" , TextReader readVimwiki)
,("docbook" , TextReader readDocBook)
,("opml" , TextReader readOPML)
,("org" , TextReader readOrg)
,("textile" , TextReader readTextile) -- TODO : textile+lhs
,("html" , TextReader readHtml)
,("jats" , TextReader readJATS)
,("jira" , TextReader readJira)
,("latex" , TextReader readLaTeX)
,("haddock" , TextReader readHaddock)
,("twiki" , TextReader readTWiki)
,("tikiwiki" , TextReader readTikiWiki)
,("docx" , ByteStringReader readDocx)
,("odt" , ByteStringReader readOdt)
,("t2t" , TextReader readTxt2Tags)
,("epub" , ByteStringReader readEPUB)
,("muse" , TextReader readMuse)
,("man" , TextReader readMan)
,("fb2" , TextReader readFB2)
,("ipynb" , TextReader readIpynb)
,("csv" , TextReader readCSV)
,("csljson" , TextReader readCslJson)
,("bibtex" , TextReader readBibTeX)
,("biblatex" , TextReader readBibLaTeX)
readers = [("native" , TextReader readNative)
,("json" , TextReader readJSON)
,("markdown" , TextReader readMarkdown)
,("markdown_strict" , TextReader readMarkdown)
,("markdown_phpextra" , TextReader readMarkdown)
,("markdown_github" , TextReader readMarkdown)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
,("commonmark_x" , TextReader readCommonMark)
,("creole" , TextReader readCreole)
,("dokuwiki" , TextReader readDokuWiki)
,("gfm" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
,("vimwiki" , TextReader readVimwiki)
,("docbook" , TextReader readDocBook)
,("opml" , TextReader readOPML)
,("org" , TextReader readOrg)
,("textile" , TextReader readTextile) -- TODO : textile+lhs
,("html" , TextReader readHtml)
,("jats" , TextReader readJATS)
,("jira" , TextReader readJira)
,("latex" , TextReader readLaTeX)
,("haddock" , TextReader readHaddock)
,("twiki" , TextReader readTWiki)
,("tikiwiki" , TextReader readTikiWiki)
,("docx" , ByteStringReader readDocx)
,("odt" , ByteStringReader readOdt)
,("t2t" , TextReader readTxt2Tags)
,("epub" , ByteStringReader readEPUB)
,("muse" , TextReader readMuse)
,("man" , TextReader readMan)
,("fb2" , TextReader readFB2)
,("ipynb" , TextReader readIpynb)
,("csv" , TextReader readCSV)
,("csljson" , TextReader readCslJson)
,("bibtex" , TextReader readBibTeX)
,("biblatex" , TextReader readBibLaTeX)
]
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
@ -173,9 +176,13 @@ getReader s =
return (r, exts)
-- | Read pandoc document from JSON format.
readJSON :: PandocMonad m
=> ReaderOptions -> Text -> m Pandoc
readJSON _ t =
case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
readJSON :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readJSON _ s =
case eitherDecode' . BL.fromStrict . UTF8.fromText
. sourcesToText . toSources $ s of
Right doc -> return doc
Left e -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
Left e -> throwError $ PandocParseError ("JSON parse error: "
<> T.pack e)

View file

@ -23,30 +23,33 @@ where
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
import Data.Text (Text)
import Citeproc (Lang(..), parseLang)
import Citeproc.Locale (getLocale)
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad, lookupEnv)
import Text.Pandoc.Citeproc.BibTeX as BibTeX
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Text.Pandoc.Sources (ToSources(..))
import Control.Monad.Except (throwError)
-- | Read BibTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readBibTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -> a -> m Pandoc
readBibTeX = readBibTeX' BibTeX.Bibtex
-- | Read BibLaTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readBibLaTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -> a -> m Pandoc
readBibLaTeX = readBibTeX' BibTeX.Biblatex
readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
readBibTeX' :: (PandocMonad m, ToSources a)
=> Variant -> ReaderOptions -> a -> m Pandoc
readBibTeX' variant _opts t = do
mblangEnv <- lookupEnv "LANG"
let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
Left _ -> throwError $ PandocCiteprocError e
Right l -> return l
case BibTeX.readBibtexString variant locale (const True) t of
Left e -> throwError $ PandocParsecError t e
Left e -> throwError $ PandocParsecError (toSources t) e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
. setMeta "nocite"

View file

@ -13,23 +13,23 @@
Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Options (ReaderOptions)
import Control.Monad.Except (throwError)
readCSV :: PandocMonad m
readCSV :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> a
-> m Pandoc
readCSV _opts s =
case parseCSV defaultCSVOptions (crFilter s) of
readCSV _opts s = do
let txt = sourcesToText $ toSources s
case parseCSV defaultCSVOptions txt of
Right (r:rs) -> return $ B.doc $ B.table capt
(zip aligns widths)
(TableHead nullAttr hdrs)
@ -45,4 +45,4 @@ readCSV _opts s =
aligns = replicate numcols AlignDefault
widths = replicate numcols ColWidthDefault
Right [] -> return $ B.doc mempty
Left e -> throwError $ PandocParsecError s e
Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e

View file

@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
import Control.Monad.Except
import Data.Functor.Identity (runIdentity)
import Data.Typeable
import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
runF, defaultParserState, take1WhileP, option)
import Text.Pandoc.Parsing (runParserT, getPosition,
runF, defaultParserState, option, many1, anyChar,
Sources(..), ToSources(..), ParserT, Future,
sourceName)
import qualified Data.Text as T
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark :: (PandocMonad m, ToSources a)
=> ReaderOptions -> a -> m Pandoc
readCommonMark opts s
| isEnabled Ext_yaml_metadata_block opts
, "---" `T.isPrefixOf` s = do
let metaValueParser = do
inp <- option "" $ take1WhileP (const True)
case runIdentity
(commonmarkWith (specFor opts) "metadata value" inp) of
Left _ -> mzero
Right (Cm bls :: Cm () Blocks)
-> return $ return $ B.toMetaValue bls
res <- runParserT (do meta <- yamlMetaBlock metaValueParser
pos <- getPosition
return (meta, pos))
defaultParserState "YAML metadata" s
case res of
Left _ -> readCommonMarkBody opts s
Right (meta, pos) -> do
let dropLines 0 = id
dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
let metaLines = sourceLine pos - 1
let body = T.replicate metaLines "\n" <> dropLines metaLines s
Pandoc _ bs <- readCommonMarkBody opts body
return $ Pandoc (runF meta defaultParserState) bs
| otherwise = readCommonMarkBody opts s
| isEnabled Ext_yaml_metadata_block opts = do
let sources = toSources s
let toks = concatMap sourceToToks (unSources sources)
res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
pos <- getPosition
return (meta, pos))
defaultParserState "YAML metadata" (toSources s)
case res of
Left _ -> readCommonMarkBody opts sources toks
Right (meta, pos) -> do
-- strip off metadata section and parse body
let body = dropWhile (\t -> tokPos t < pos) toks
Pandoc _ bs <- readCommonMarkBody opts sources body
return $ Pandoc (runF meta defaultParserState) bs
| otherwise = do
let sources = toSources s
let toks = concatMap sourceToToks (unSources sources)
readCommonMarkBody opts sources toks
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMarkBody opts s
sourceToToks :: (SourcePos, Text) -> [Tok]
sourceToToks (pos, s) = tokenize (sourceName pos) s
metaValueParser :: Monad m
=> ReaderOptions -> ParserT Sources st m (Future st MetaValue)
metaValueParser opts = do
inp <- option "" $ T.pack <$> many1 anyChar
let toks = concatMap sourceToToks (unSources (toSources inp))
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left _ -> mzero
Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
readCommonMarkBody opts s toks
| isEnabled Ext_sourcepos opts =
case runIdentity (commonmarkWith (specFor opts) "" s) of
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
| otherwise =
case runIdentity (commonmarkWith (specFor opts) "" s) of
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls

View file

@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
import Text.Pandoc.Shared (crFilter)
-- | Read creole from an input string and return a Pandoc document.
readCreole :: PandocMonad m
readCreole :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readCreole opts s = do
res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
let sources = ensureFinalNewlines 2 (toSources s)
res <- readWithM parseCreole def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
type CRLParser = ParserT Text ParserState
type CRLParser = ParserT Sources ParserState
--
-- Utility functions

View file

@ -24,21 +24,22 @@ import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Control.Monad.Except (throwError)
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read CSL JSON from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCslJson _opts t =
case cslJsonToReferences (UTF8.fromText t) of
readCslJson :: (PandocMonad m, ToSources a)
=> ReaderOptions -> a -> m Pandoc
readCslJson _opts x =
case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
Left e -> throwError $ PandocParseError $ T.pack e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)

View file

@ -30,7 +30,8 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
@ -539,11 +540,15 @@ instance Default DBState where
, dbContent = [] }
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readDocBook _ inp = do
let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents
(TL.fromStrict . handleInstructions $ crFilter inp)
(TL.fromStrict . handleInstructions . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)

View file

@ -29,26 +29,27 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
import Text.Pandoc.Shared (trim, stringify, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
readDokuWiki :: PandocMonad m
readDokuWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readDokuWiki opts s = do
let input = crFilter s
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
let sources = toSources s
res <- runParserT parseDokuWiki def {stateOptions = opts }
(initialSourceName sources) sources
case res of
Left e -> throwError $ PandocParsecError input e
Left e -> throwError $ PandocParsecError sources e
Right d -> return d
type DWParser = ParserT Text ParserState
type DWParser = ParserT Sources ParserState
-- * Utility functions
-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
nested :: PandocMonad m => DWParser m a -> DWParser m a

View file

@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.XML.Light
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type FB2 m = StateT FB2State m
@ -63,9 +63,12 @@ instance HasMeta FB2State where
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readFB2 :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readFB2 _ inp =
case parseXMLElement $ TL.fromStrict $ crFilter inp of
case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
Left msg -> throwError $ PandocXMLError "" msg
Right el -> do
(bs, st) <- runStateT (parseRootElement el) def

View file

@ -62,21 +62,21 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (
addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
addMetaField, blocksToInlines', escapeURI, extractSpaces,
htmlSpanLikeElements, renderTags', safeRead, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: PandocMonad m
readHtml :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> a -- ^ Input to parse
-> m Pandoc
readHtml opts inp = do
let tags = stripPrefixes $ canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
(crFilter inp)
(sourcesToText $ toSources inp)
parseDoc = do
blocks <- fixPlains False . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
pos <- getPosition
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
parsed <- lift $ lift $
flip runReaderT qu $ runParserT (many pTagContents) st "text" str
flip runReaderT qu $ runParserT (many pTagContents) st "text"
(Sources [(pos, str)])
case parsed of
Left _ -> throwError $ PandocParseError $
"Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
type InlinesParser m = HTMLParser m Text
type InlinesParser m = HTMLParser m Sources
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
=> (Tag Text -> Bool)
-> ParserT Text st m Text
-> ParserT Sources st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
inp <- getInput
let ts = canonicalizeTags $
parseTagsOptions parseOptions{ optTagWarning = True,
optTagPosition = True } inp
sources <- getInput
let ts = canonicalizeTags
$ parseTagsOptions parseOptions{ optTagWarning = True,
optTagPosition = True }
$ sourcesToText sources
case ts of
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
guard $ f t
@ -1018,15 +1021,17 @@ hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag Text -> Bool)
-> ParserT Text st m (Tag Text, Text)
-> ParserT Sources st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
inp <- getInput
sources <- getInput
let inp = sourcesToText sources
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
(inp <> " ") -- add space to ensure that
(inp <> " ")
-- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)

View file

@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
import Data.List (intersperse)
import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe)
import Data.Text (Text, unpack)
import Data.Text (unpack)
import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Shared (splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: PandocMonad m
readHaddock :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
readHaddock opts s = case readHaddockEither opts
(unpack . sourcesToText . toSources $ s) of
Right result -> return result
Left e -> throwError e

View file

@ -39,10 +39,12 @@ import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readIpynb opts t = do
let src = BL.fromStrict (TE.encodeUtf8 t)
readIpynb :: (PandocMonad m, ToSources a)
=> ReaderOptions -> a -> m Pandoc
readIpynb opts x = do
let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
case eitherDecode src of
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
Left _ ->

View file

@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type JATS m = StateT JATSState m
@ -52,10 +53,14 @@ instance Default JATSState where
, jatsContent = [] }
readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readJATS :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readJATS _ inp = do
let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents (TL.fromStrict $ crFilter inp)
parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)

View file

@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
import Text.Pandoc.Error (PandocError (PandocParseError))
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (stringify)
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import qualified Text.Jira.Markup as Jira
-- | Read Jira wiki markup.
readJira :: PandocMonad m
readJira :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readJira _opts s = case parse s of
Right d -> return $ jiraToPandoc d
Left e -> throwError . PandocParseError $
"Jira parse error" `append` pack (show e)
readJira _opts inp = do
let sources = toSources inp
case parse (sourcesToText sources) of
Right d -> return $ jiraToPandoc d
Left e -> throwError . PandocParseError $
"Jira parse error" `append` pack (show e)
jiraToPandoc :: Jira.Doc -> Pandoc
jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks

View file

@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
readLaTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> a -- ^ Input to parse
-> m Pandoc
readLaTeX opts ltx = do
let sources = toSources ltx
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
(tokenize "source" (crFilter ltx))
(tokenizeSources sources)
case parsed of
Right result -> return result
Left e -> throwError $ PandocParsecError ltx e
Left e -> throwError $ PandocParsecError sources e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@ -132,11 +133,11 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT Text s m Text
=> ParserT Sources s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
let toks = tokenize "source" inp
let toks = tokenizeSources inp
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> rawLaTeXParser toks True
(do choice (map controlSeq
@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
(txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT Text s m Text
=> ParserT Sources s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
let toks = tokenize "source" inp
let toks = tokenizeSources inp
raw <- snd <$>
( rawLaTeXParser toks True
(mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@ -178,11 +179,11 @@ rawLaTeXInline = do
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
return $ raw <> T.pack finalbraces
inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
let toks = tokenize "source" inp
let toks = tokenizeSources inp
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
@ -641,7 +642,7 @@ opt = do
parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
Left e -> throwError $ PandocParsecError (untokenize toks) e
Left e -> throwError $ PandocParsecError (toSources toks) e
-- block elements:

View file

@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
Left e -> throwError $ PandocParsecError (untokenize toks) e
Left e -> throwError $ PandocParsecError (toSources toks) e

View file

@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawLaTeXParser
, applyMacros
, tokenize
, tokenizeSources
, untokenize
, untoken
, totoks
@ -248,7 +249,7 @@ withVerbatimMode parser = do
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> [Tok] -> Bool -> LP m a -> LP m a
-> ParserT Text s m (a, Text)
-> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
_ <- takeP (T.length (untokenize toks'))
void $ count (T.length (untokenize toks')) anyChar
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> Text -> ParserT Text s m Text
=> Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
@ -301,6 +302,11 @@ QuickCheck property:
> let t = T.pack s in untokenize (tokenize "random" t) == t
-}
tokenizeSources :: Sources -> [Tok]
tokenizeSources = concatMap tokenizeSource . unSources
where
tokenizeSource (pos, t) = totoks pos t
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)

View file

@ -1,3 +1,4 @@
{-# LANGUAGE FlexibleInstances #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Types
Copyright : Copyright (C) 2017-2021 John MacFarlane
@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
)
where
import Data.Text (Text)
import Text.Parsec.Pos (SourcePos)
import Text.Parsec.Pos (SourcePos, sourceName)
import Text.Pandoc.Sources
import Data.List (groupBy)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
instance ToSources [Tok] where
toSources = Sources
. map (\ts -> case ts of
Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
_ -> error "toSources [Tok] encountered empty group")
. groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
tokToText :: Tok -> Text
tokToText (Tok _ _ t) = t
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)

View file

@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
import Control.Monad.Trans (lift)
import Control.Monad.Except (throwError)
import Data.Maybe (catMaybes, isJust)
import Data.List (intersperse, intercalate)
import Data.List (intersperse)
import qualified Data.Text as T
import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Walk (query)
import Text.Pandoc.Shared (crFilter, mapLeft)
import Text.Pandoc.Shared (mapLeft)
import Text.Pandoc.Readers.Roff -- TODO explicit imports
import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString)
import qualified Data.Foldable as Foldable
@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
-- | Read man (troff) from an input string and return a Pandoc document.
readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
readMan opts txt = do
tokenz <- lexRoff (initialPos "input") (crFilter txt)
readMan :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readMan opts s = do
let Sources inps = toSources s
tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
let state = def {readerOptions = opts} :: ManState
let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
fixError e = e
eitherdoc <- readWithMTokens parseMan state
(Foldable.toList . unRoffTokens $ tokenz)
either throwError return eitherdoc
either (throwError . fixError) return eitherdoc
readWithMTokens :: PandocMonad m
=> ParserT [RoffToken] ManState m a -- ^ parser
@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
let leftF = PandocParsecError mempty
in mapLeft leftF `liftM` runParserT parser state "source" input
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
bs <- many parseBlock <* eof
@ -89,7 +96,7 @@ parseBlock = choice [ parseList
parseTable :: PandocMonad m => ManParser m Blocks
parseTable = do
modifyState $ \st -> st { tableCellsPlain = True }
updateState $ \st -> st { tableCellsPlain = True }
let isTbl Tbl{} = True
isTbl _ = False
Tbl _opts rows pos <- msatisfy isTbl
@ -135,7 +142,7 @@ parseTable = do
case res' of
Left _ -> Prelude.fail "Could not parse table cell"
Right x -> do
modifyState $ \s -> s{ tableCellsPlain = False }
updateState $ \s -> s{ tableCellsPlain = False }
return x
Right x -> return x
@ -222,7 +229,7 @@ parseTitle = do
setMeta "section" (linePartsToInlines y)
[x] -> setMeta "title" (linePartsToInlines x)
[] -> id
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
linePartsToInlines :: [LinePart] -> Inlines

View file

@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
-- import Debug.Trace (traceShowId)
type MarkdownParser m = ParserT Text ParserState m
type MarkdownParser m = ParserT Sources ParserState m
type F = Future ParserState
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
readMarkdown :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> a -- ^ Input
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
(crFilter s <> "\n\n")
(ensureFinalNewlines 3 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
parsed <- readWithM parser def{ stateOptions = opts } ""
parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
setPosition oldPos
return $ runF refs defaultParserState
parsed <- readWithM parser def{ stateOptions = opts } ""
parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@ -146,14 +147,14 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
spnl :: PandocMonad m => ParserT Text st m ()
spnl :: PandocMonad m => ParserT Sources st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
spnl' :: PandocMonad m => ParserT Text st m Text
spnl' :: PandocMonad m => ParserT Sources st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
-- hrule block
--
hrule :: PandocMonad m => ParserT Text st m (F Blocks)
hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
-> ParserT Text ParserState m Int
-> ParserT Sources ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ T.intercalate "\n" lns'
birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@ -1025,7 +1026,7 @@ para = try $ do
option (B.plain <$> result)
$ try $ do
newline
(blanklines >> return mempty)
(mempty <$ blanklines)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@ -1170,7 +1171,7 @@ lineBlock = do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
-> ParserT Text st m (Int, Int)
-> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
-> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
line <- take1WhileP (/='\n') <* newline
line <- anyLine
return $ map trim $ tail $
splitTextByIndices (init indices) line
@ -1390,7 +1391,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
scanForPipe :: PandocMonad m => ParserT Text st m ()
scanForPipe :: PandocMonad m => ParserT Sources st m ()
scanForPipe = do
inp <- getInput
case T.break (\c -> c == '\n' || c == '|') inp of
Sources inps <- getInput
let ln = case inps of
[] -> ""
((_,t):(_,t'):_) | T.null t -> t'
((_,t):_) -> t
case T.break (\c -> c == '\n' || c == '|') ln of
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
nonEndline :: PandocMonad m => ParserT Text st m Char
nonEndline :: PandocMonad m => ParserT Sources st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- mconcat <$> many1
( take1WhileP isAlphaNum
( T.pack <$> (many1 alphaNum)
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
updateLastStrPos
(do guardEnabled Ext_smart
@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> textStr completion)
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
inBrackets parser = do
char '['
contents <- manyChar parser

View file

@ -36,17 +36,18 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
readMediaWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readMediaWiki opts s = do
let sources = toSources s
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
@ -55,7 +56,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
(crFilter s <> "\n")
sources
case parsed of
Right result -> return result
Left e -> throwError e
@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwInTT :: Bool
}
type MWParser m = ParserT Text MWState m
type MWParser m = ParserT Sources MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions

View file

@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
import qualified Text.Pandoc.UTF8 as UTF8
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Text st m (Future st MetaValue)
=> ParserT Sources st m (Future st MetaValue)
-> BL.ByteString
-> ParserT Text st m (Future st Meta)
-> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc (YAML.Mapping _ _ o):_)
@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
-- Returns filtered list of references.
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Text st m (Future st MetaValue)
=> ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
-> BL.ByteString
-> ParserT Text st m (Future st [MetaValue])
-> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc o@YAML.Mapping{}:_)
@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
nodeToKey _ = Nothing
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Text st m (Future st MetaValue)
=> ParserT Sources st m (Future st MetaValue)
-> Text
-> ParserT Text st m (Future st MetaValue)
-> ParserT Sources st m (Future st MetaValue)
normalizeMetaValue pMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
@ -133,9 +133,9 @@ checkBoolean t
| otherwise = Nothing
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Text st m (Future st MetaValue)
=> ParserT Sources st m (Future st MetaValue)
-> YAML.Node YE.Pos
-> ParserT Text st m (Future st MetaValue)
-> ParserT Sources st m (Future st MetaValue)
yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> normalizeMetaValue pMetaValue t
@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
yamlToMetaValue _ _ = return $ return $ MetaString ""
yamlMap :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Text st m (Future st MetaValue)
=> ParserT Sources st m (Future st MetaValue)
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-> ParserT Text st m (Future st (M.Map Text MetaValue))
-> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- maybe (throwError $ PandocParseError
@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
=> ParserT Text st m (Future st MetaValue)
-> ParserT Text st m (Future st Meta)
=> ParserT Sources st m (Future st MetaValue)
-> ParserT Sources st m (Future st Meta)
yamlMetaBlock parser = try $ do
string "---"
blankline
@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
optional blanklines
yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
stopLine :: Monad m => ParserT Text st m ()
stopLine :: Monad m => ParserT Sources st m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()

View file

@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared (crFilter, trimr, tshow)
import Text.Pandoc.Shared (trimr, tshow)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
readMuse :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readMuse opts s = do
let input = crFilter s
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
let sources = toSources s
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
(initialSourceName sources) sources
case res of
Left e -> throwError $ PandocParsecError input e
Left e -> throwError $ PandocParsecError sources e
Right d -> return d
type F = Future MuseState
@ -82,7 +83,7 @@ instance Default MuseEnv where
, museInPara = False
}
type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
getIndent :: PandocMonad m

View file

@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
import Data.Text (Text)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Error
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
@ -32,14 +33,15 @@ import Text.Pandoc.Error
--
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: PandocMonad m
readNative :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> a
-> m Pandoc
readNative _ s =
case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
let t = sourcesToText . toSources $ s
in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)

View file

@ -24,7 +24,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.Pandoc.Shared (crFilter, blocksToInlines')
import Text.Pandoc.Shared (blocksToInlines')
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.XML.Light
import Control.Monad.Except (throwError)
@ -46,10 +47,14 @@ instance Default OPMLState where
, opmlOptions = def
}
readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readOPML opts inp = do
(bs, st') <- runStateT
(case parseXMLContents (TL.fromStrict (crFilter inp)) of
let sources = toSources inp
(bs, st') <-
runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
Left msg -> throwError $ PandocXMLError "" msg
Right ns -> mapM parseBlock ns)
def{ opmlOptions = opts }

View file

@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
import Data.Text (Text)
-- | Parse org-mode string and return a Pandoc document.
readOrg :: PandocMonad m
readOrg :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> a
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
(crFilter s <> "\n\n")
(ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e

View file

@ -29,6 +29,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Monad (guard, mplus, mzero, unless, void, when)
@ -802,7 +803,7 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: PandocMonad m
=> Text -> TeXExport -> OrgParser m (Maybe Inlines)
parseAsInlineLaTeX cs = \case
TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
TeXIgnore -> return (Just mempty)
TeXVerbatim -> return (Just $ B.str cs)

View file

@ -114,7 +114,7 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities

View file

@ -38,25 +38,24 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Printf (printf)
import Data.Time.Format
-- TODO:
-- [ ] .. parsed-literal
-- | Parse reStructuredText string and return Pandoc document.
readRST :: PandocMonad m
readRST :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> a
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
(crFilter s <> "\n\n")
(ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
type RSTParser m = ParserT Text ParserState m
type RSTParser m = ParserT Sources ParserState m
--
-- Constants and data structure definitions
@ -151,11 +150,19 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- T.concat <$>
manyTill (referenceKey <|> anchorDef <|>
noteBlock <|> citationBlock <|>
(snd <$> withRaw comment) <|>
headerBlock <|> lineClump) eof
let chunk = referenceKey
<|> anchorDef
<|> noteBlock
<|> citationBlock
<|> (snd <$> withRaw comment)
<|> headerBlock
<|> lineClump
docMinusKeys <- Sources <$>
manyTill (do pos <- getPosition
t <- chunk
return (pos, t)) eof
-- UGLY: we collapse source position information.
-- TODO: fix the parser to use the F monad instead of two passes
setInput docMinusKeys
setPosition startPos
st' <- getState
@ -348,7 +355,7 @@ singleHeader' = try $ do
-- hrule block
--
hrule :: Monad m => ParserT Text st m Blocks
hrule :: Monad m => ParserT Sources st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@ -363,7 +370,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
=> Int -> ParserT Text st m Text
=> Int -> ParserT Sources st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@ -372,7 +379,7 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
=> ParserT Text st m Text
=> ParserT Sources st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
@ -381,20 +388,20 @@ indentedBlock = try $ do
optional blanklines
return $ T.unlines lns
quotedBlock :: Monad m => ParserT Text st m Text
quotedBlock :: Monad m => ParserT Sources st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ T.unlines lns
codeBlockStart :: Monad m => ParserT Text st m Char
codeBlockStart :: Monad m => ParserT Sources st m Char
codeBlockStart = string "::" >> blankline >> blankline
codeBlock :: Monad m => ParserT Text ParserState m Blocks
codeBlock :: Monad m => ParserT Sources ParserState m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
codeBlockBody = do
lang <- stateRstHighlight <$> getState
try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["haskell","literate"], [])
$ T.intercalate "\n" lns
latexCodeBlock :: Monad m => ParserT Text st m [Text]
latexCodeBlock :: Monad m => ParserT Sources st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
birdCodeBlock :: Monad m => ParserT Text st m [Text]
birdCodeBlock :: Monad m => ParserT Sources st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (T.drop 1) lns
else lns
birdTrackLine :: Monad m => ParserT Text st m Text
birdTrackLine :: Monad m => ParserT Sources st m Text
birdTrackLine = char '>' >> anyLine
--
@ -456,7 +463,6 @@ includeDirective top fields body = do
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
oldPos <- getPosition
oldInput <- getInput
containers <- stateContainers <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@ -494,15 +500,11 @@ includeDirective top fields body = do
Nothing -> case lookup "literal" fields of
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
setPosition $ newPos (T.unpack f) 1 1
setInput $ contents' <> "\n"
bs <- optional blanklines >>
(mconcat <$> many block)
setInput oldInput
setPosition oldPos
addToSources (initialPos (T.unpack f))
(contents' <> "\n")
updateState $ \s -> s{ stateContainers =
tail $ stateContainers s }
return bs
return mempty
--
@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
bulletListStart :: Monad m => ParserT Text st m Int
bulletListStart :: Monad m => ParserT Sources st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
simpleReferenceName :: Monad m => ParserT Text st m Text
simpleReferenceName :: Monad m => ParserT Sources st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
@ -1122,7 +1124,7 @@ referenceKey = do
-- return enough blanks to replace key
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
targetURI :: Monad m => ParserT Text st m Text
targetURI :: Monad m => ParserT Sources st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
-- we need to ensure that the keys are ordered by occurrence in
-- the document.
numKeys <- M.size . stateKeys <$> getState
let key = toKey $ "_" <> T.pack (show numKeys)
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
@ -1250,13 +1254,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@ -1382,7 +1386,7 @@ hyphens = do
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
escapedChar :: Monad m => ParserT Text st m Inlines
escapedChar :: Monad m => ParserT Sources st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST

View file

@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared (safeRead)
import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
@ -122,16 +121,16 @@ instance Default RoffState where
, afterConditional = False
}
type RoffLexer m = ParserT T.Text RoffState m
type RoffLexer m = ParserT Sources RoffState m
--
-- Lexer: T.Text -> RoffToken
--
eofline :: Stream s m Char => ParsecT s u m ()
eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
spacetab :: Stream s m Char => ParsecT s u m Char
spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
spacetab = char ' ' <|> char '\t'
characterCodeMap :: M.Map T.Text Char
@ -303,8 +302,7 @@ expandString = try $ do
char '*'
cs <- escapeArg <|> countChar 1 anyChar
s <- linePartsToText <$> resolveText cs pos
getInput >>= setInput . (s <>)
return ()
addToInput s
-- Parses: '..'
quoteArg :: PandocMonad m => RoffLexer m T.Text
@ -316,7 +314,7 @@ escFont = do
font' <- if T.null font || font == "P"
then prevFont <$> getState
else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
modifyState $ \st -> st{ prevFont = currentFont st
updateState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
where
@ -372,8 +370,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
_ -> modifyState $ \st -> st{ tableTabChar = '\t' }
Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
_ -> updateState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@ -489,18 +487,18 @@ lexConditional mname = do
ifPart <- do
optional $ try $ char '\\' >> newline
lexGroup
<|> do modifyState $ \s -> s{ afterConditional = True }
<|> do updateState $ \s -> s{ afterConditional = True }
t <- manToken
modifyState $ \s -> s{ afterConditional = False }
updateState $ \s -> s{ afterConditional = False }
return t
case mbtest of
Nothing -> do
putState st -- reset state, so we don't record macros in skipped section
setState st -- reset state, so we don't record macros in skipped section
report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
putState st
setState st
return mempty
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@ -515,7 +513,7 @@ expression = do
_ -> Nothing
where
returnValue v = do
modifyState $ \st -> st{ lastExpression = v }
updateState $ \st -> st{ lastExpression = v }
return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@ -536,7 +534,7 @@ lexIncludeFile args = do
result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
Just s -> getInput >>= setInput . (s <>)
Just s -> addToInput s
return mempty
[] -> return mempty
@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToText x
modifyState $ \st ->
updateState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexMacroDef args = do -- macro definition
modifyState $ \st -> st{ roffMode = CopyMode }
updateState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
(x : y : _) -> return (linePartsToText x, linePartsToText y)
@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
modifyState $ \st ->
updateState $ \st ->
st{ customMacros = M.insert macroName ts (customMacros st)
, roffMode = NormalMode }
return mempty

View file

@ -28,22 +28,22 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.Shared (crFilter, tshow)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
readTWiki :: PandocMonad m
readTWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
(crFilter s <> "\n\n")
let sources = ensureFinalNewlines 2 (toSources s)
res <- readWithM parseTWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
type TWParser = ParserT Text ParserState
type TWParser = ParserT Sources ParserState
--
-- utility functions

View file

@ -53,30 +53,34 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared (crFilter, trim, tshow)
import Text.Pandoc.Shared (trim, tshow)
-- | Parse a Textile text and return a Pandoc document.
readTextile :: PandocMonad m
readTextile :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> a
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
(crFilter s <> "\n\n")
let sources = ensureFinalNewlines 2 (toSources s)
parsed <- readWithM parseTextile def{ stateOptions = opts } sources
case parsed of
Right result -> return result
Left e -> throwError e
type TextileParser = ParserT Sources ParserState
-- | Generate a Pandoc ADT from a textile document
parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
parseTextile :: PandocMonad m => TextileParser m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
let firstPassParser = noteBlock <|> lineClump
manyTill firstPassParser eof >>= setInput . T.concat
let firstPassParser = do
pos <- getPosition
t <- noteBlock <|> lineClump
return (pos, t)
manyTill firstPassParser eof >>= setInput . Sources
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@ -84,10 +88,10 @@ parseTextile = do
-- now parse it for real...
Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
noteMarker :: PandocMonad m => ParserT Text ParserState m Text
noteMarker :: PandocMonad m => TextileParser m Text
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
noteBlock :: PandocMonad m => ParserT Text ParserState m Text
noteBlock :: PandocMonad m => TextileParser m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@ -102,11 +106,11 @@ noteBlock = try $ do
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
parseBlocks :: PandocMonad m => TextileParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
blockParsers :: PandocMonad m => [TextileParser m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@ -121,22 +125,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
block :: PandocMonad m => ParserT Text ParserState m Blocks
block :: PandocMonad m => TextileParser m Blocks
block = do
res <- choice blockParsers <?> "block"
trace (T.take 60 $ tshow $ B.toList res)
return res
commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
commentBlock :: PandocMonad m => TextileParser m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlock :: PandocMonad m => TextileParser m Blocks
codeBlock = codeBlockTextile <|> codeBlockHtml
codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockTextile :: PandocMonad m => TextileParser m Blocks
codeBlockTextile = try $ do
string "bc." <|> string "pre."
extended <- option False (True <$ char '.')
@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockHtml :: PandocMonad m => TextileParser m Blocks
codeBlockHtml = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@ -174,7 +178,7 @@ codeBlockHtml = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
header :: PandocMonad m => ParserT Text ParserState m Blocks
header :: PandocMonad m => TextileParser m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@ -186,14 +190,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
blockQuote :: PandocMonad m => TextileParser m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
hrule :: PandocMonad m => ParserT Text st m Blocks
hrule :: PandocMonad m => TextileParser m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@ -208,39 +212,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
anyList :: PandocMonad m => ParserT Text ParserState m Blocks
anyList :: PandocMonad m => TextileParser m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
definitionList :: PandocMonad m => TextileParser m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
listStart :: PandocMonad m => ParserT Text ParserState m ()
listStart :: PandocMonad m => TextileParser m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
genericListStart :: PandocMonad m => Char -> TextileParser m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
basicDLStart :: PandocMonad m => TextileParser m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
definitionListStart :: PandocMonad m => TextileParser m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@ -281,15 +285,15 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
definitionListItem = try $ do
term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
where inlineDef :: PandocMonad m => TextileParser m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
multilineDef :: PandocMonad m => TextileParser m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@ -300,7 +304,7 @@ definitionListItem = try $ do
-- raw content
-- | A raw Html Block, optionally followed by blanklines
rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
para :: PandocMonad m => ParserT Text ParserState m Blocks
para :: PandocMonad m => TextileParser m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@ -339,7 +343,7 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@ -350,7 +354,7 @@ tableCell = try $ do
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@ -360,7 +364,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
table :: PandocMonad m => ParserT Text ParserState m Blocks
table :: PandocMonad m => TextileParser m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@ -388,7 +392,7 @@ table = try $ do
(TableFoot nullAttr [])
-- | Ignore markers for cols, thead, tfoot.
ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
ignorableRow :: PandocMonad m => TextileParser m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@ -397,7 +401,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
explicitBlockStart name = try $ do
string (T.unpack name)
attributes
@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
=> Text -- ^ block tag name
-> ParserT Text ParserState m Blocks -- ^ implicit block
-> ParserT Text ParserState m Blocks
-> TextileParser m Blocks -- ^ implicit block
-> TextileParser m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
inline :: PandocMonad m => ParserT Text ParserState m Inlines
inline :: PandocMonad m => TextileParser m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
inlineParsers :: PandocMonad m => [TextileParser m Inlines]
inlineParsers = [ str
, whitespace
, endline
@ -447,7 +451,7 @@ inlineParsers = [ str
]
-- | Inline markups
inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
inlineMarkup :: PandocMonad m => TextileParser m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
mark :: PandocMonad m => ParserT Text st m Inlines
mark :: PandocMonad m => TextileParser m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: PandocMonad m => ParserT Text st m Inlines
reg :: PandocMonad m => TextileParser m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
tm :: PandocMonad m => ParserT Text st m Inlines
tm :: PandocMonad m => TextileParser m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
copy :: PandocMonad m => ParserT Text st m Inlines
copy :: PandocMonad m => TextileParser m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
note :: PandocMonad m => ParserT Text ParserState m Inlines
note :: PandocMonad m => TextileParser m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
@ -507,13 +511,13 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
hyphenedWords :: PandocMonad m => TextileParser m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ T.intercalate "-" (x:xs)
wordChunk :: PandocMonad m => ParserT Text ParserState m Text
wordChunk :: PandocMonad m => TextileParser m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
@ -522,7 +526,7 @@ wordChunk = try $ do
return $ T.pack $ hd:tl
-- | Any string
str :: PandocMonad m => ParserT Text ParserState m Inlines
str :: PandocMonad m => TextileParser m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
@ -535,11 +539,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
whitespace :: PandocMonad m => ParserT Text st m Inlines
whitespace :: PandocMonad m => TextileParser m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
endline :: PandocMonad m => ParserT Text ParserState m Inlines
endline :: PandocMonad m => TextileParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
@ -547,18 +551,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
rawHtmlInline :: PandocMonad m => TextileParser m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
link :: PandocMonad m => ParserT Text ParserState m Inlines
link :: PandocMonad m => TextileParser m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@ -578,7 +582,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
image :: PandocMonad m => ParserT Text ParserState m Inlines
image :: PandocMonad m => TextileParser m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@ -590,51 +594,51 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
escapedInline :: PandocMonad m => TextileParser m Inlines
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
escapedEqs :: PandocMonad m => TextileParser m Inlines
escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
escapedTag :: PandocMonad m => TextileParser m Inlines
escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
symbol :: PandocMonad m => ParserT Text ParserState m Inlines
symbol :: PandocMonad m => TextileParser m Inlines
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
code :: PandocMonad m => ParserT Text ParserState m Inlines
code :: PandocMonad m => TextileParser m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
anyChar' :: PandocMonad m => ParserT Text ParserState m Char
anyChar' :: PandocMonad m => TextileParser m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
code1 :: PandocMonad m => ParserT Text ParserState m Inlines
code1 :: PandocMonad m => TextileParser m Inlines
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
code2 :: PandocMonad m => ParserT Text ParserState m Inlines
code2 :: PandocMonad m => TextileParser m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
attributes :: PandocMonad m => ParserT Text ParserState m Attr
attributes :: PandocMonad m => TextileParser m Attr
attributes = foldl' (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@ -643,11 +647,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
classes'
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle $ T.pack style
@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
=> ParserT Text st m t -- ^ surrounding parser
-> ParserT Text st m a -- ^ content parser (to be used repeatedly)
-> ParserT Text st m [a]
=> ParserT Sources st m t -- ^ surrounding parser
-> ParserT Sources st m a -- ^ content parser (to be used repeatedly)
-> ParserT Sources st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
=> ParserT Text ParserState m t -- ^ surrounding parser
=> TextileParser m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
-> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
-> TextileParser m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@ -700,7 +704,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
eof' :: Monad m => ParserT Text s m Char
eof' :: Monad m => ParserT Sources s m Char
eof' = '\n' <$ eof

View file

@ -30,23 +30,23 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Shared (crFilter, safeRead)
import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)
-- | Read TikiWiki from an input string and return a Pandoc document.
readTikiWiki :: PandocMonad m
readTikiWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readTikiWiki opts s = do
res <- readWithM parseTikiWiki def{ stateOptions = opts }
(crFilter s <> "\n\n")
let sources = ensureFinalNewlines 2 (toSources s)
res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
type TikiWikiParser = ParserT Text ParserState
type TikiWikiParser = ParserT Sources ParserState
--
-- utility functions

View file

@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
type T2T = ParserT Text ParserState (Reader T2TMeta)
type T2T = ParserT Sources ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
@ -68,15 +68,15 @@ getT2TMeta = do
(intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
readTxt2Tags :: PandocMonad m
readTxt2Tags :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> Text
-> a
-> m Pandoc
readTxt2Tags opts s = do
let sources = ensureFinalNewlines 2 (toSources s)
meta <- getT2TMeta
let parsed = flip runReader meta $
readWithM parseT2T (def {stateOptions = opts}) $
crFilter s <> "\n\n"
readWithM parseT2T (def {stateOptions = opts}) sources
case parsed of
Right result -> return result
Left e -> throwError e

View file

@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
stateOptions, uri, manyTillChar, manyChar, textStr,
many1Char, countChar, many1TillChar)
import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
many1Char, countChar, many1TillChar,
alphaNum, anyChar, char, newline, noneOf, oneOf,
space, spaces, string)
import Text.Pandoc.Sources (ToSources(..), Sources)
import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
isURI, tshow)
import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
spaces, string)
import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readVimwiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
-> a
-> m Pandoc
readVimwiki opts s = do
res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
let sources = toSources s
res <- readWithM parseVimwiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right result -> return result
type VwParser = ParserT Text ParserState
type VwParser = ParserT Sources ParserState
-- constants

View file

@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
(tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
<> go (T.drop 1 s2)
{-# DEPRECATED crFilter "readers filter crs automatically" #-}
-- | Strip out DOS line endings.
crFilter :: T.Text -> T.Text
crFilter = T.filter (/= '\r')

195
src/Text/Pandoc/Sources.hs Normal file
View file

@ -0,0 +1,195 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Sources
Copyright : Copyright (C) 2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Defines Sources object to be used as input to pandoc parsers and redefines Char
parsers so they get source position information from it.
-}
module Text.Pandoc.Sources
( Sources(..)
, ToSources(..)
, UpdateSourcePos(..)
, sourcesToText
, initialSourceName
, addToSources
, ensureFinalNewlines
, addToInput
, satisfy
, oneOf
, noneOf
, anyChar
, char
, string
, newline
, space
, spaces
, letter
, digit
, hexDigit
, alphaNum
)
where
import qualified Text.Parsec as P
import Text.Parsec (Stream(..), ParsecT)
import Text.Parsec.Pos as P
import Data.Text (Text)
import qualified Data.Text as T
import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
import Data.String (IsString(..))
import qualified Data.List.NonEmpty as NonEmpty
-- | A list of inputs labeled with source positions. It is assumed
-- that the 'Text's have @\n@ line endings.
newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
deriving (Show, Semigroup, Monoid)
instance Monad m => Stream Sources m Char where
uncons (Sources []) = return Nothing
uncons (Sources ((pos,t):rest)) =
case T.uncons t of
Nothing -> uncons (Sources rest)
Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
instance IsString Sources where
fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
class ToSources a where
toSources :: a -> Sources
instance ToSources Text where
toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
instance ToSources [(FilePath, Text)] where
toSources = Sources
. map (\(fp,t) ->
(P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
instance ToSources Sources where
toSources = id
sourcesToText :: Sources -> Text
sourcesToText (Sources xs) = mconcat $ map snd xs
addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
addToSources pos t = do
curpos <- P.getPosition
Sources xs <- P.getInput
let xs' = case xs of
[] -> []
((_,t'):rest) -> (curpos,t'):rest
P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
ensureFinalNewlines :: Int -- ^ number of trailing newlines
-> Sources
-> Sources
ensureFinalNewlines n (Sources xs) =
case NonEmpty.nonEmpty xs of
Nothing -> Sources [(initialPos "", T.replicate n "\n")]
Just lst ->
case NonEmpty.last lst of
(spos, t) ->
case T.length (T.takeWhileEnd (=='\n') t) of
len | len >= n -> Sources xs
| otherwise -> Sources (NonEmpty.init lst ++
[(spos,
t <> T.replicate (n - len) "\n")])
class UpdateSourcePos s c where
updateSourcePos :: SourcePos -> c -> s -> SourcePos
instance UpdateSourcePos Text Char where
updateSourcePos pos c _ = updatePosChar pos c
instance UpdateSourcePos Sources Char where
updateSourcePos pos c sources =
case sources of
Sources [] -> updatePosChar pos c
Sources ((_,t):(pos',_):_)
| T.null t -> pos'
Sources _ ->
case c of
'\n' -> incSourceLine (setSourceColumn pos 1) 1
'\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
_ -> incSourceColumn pos 1
-- | Get name of first source in 'Sources'.
initialSourceName :: Sources -> FilePath
initialSourceName (Sources []) = ""
initialSourceName (Sources ((pos,_):_)) = sourceName pos
-- | Add some text to the beginning of the input sources.
-- This simplifies code that expands macros.
addToInput :: Monad m => Text -> ParsecT Sources u m ()
addToInput t = do
Sources xs <- P.getInput
case xs of
[] -> P.setInput $ Sources [(initialPos "",t)]
(pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
-- We need to redefine the parsers in Text.Parsec.Char so that they
-- update source positions properly from the Sources stream.
satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> (Char -> Bool) -> ParsecT s u m Char
satisfy f = P.tokenPrim show updateSourcePos matcher
where
matcher c = if f c then Just c else Nothing
oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> [Char] -> ParsecT s u m Char
oneOf cs = satisfy (`elem` cs)
noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> [Char] -> ParsecT s u m Char
noneOf cs = satisfy (`notElem` cs)
anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
anyChar = satisfy (const True)
char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> Char -> ParsecT s u m Char
char c = satisfy (== c)
string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> [Char] -> ParsecT s u m [Char]
string = mapM char
newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
newline = satisfy (== '\n')
space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
space = satisfy isSpace
spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m ()
spaces = P.skipMany space P.<?> "white space"
letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
letter = satisfy isLetter
alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
alphaNum = satisfy isAlphaNum
digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
digit = satisfy isDigit
hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
=> ParsecT s u m Char
hexDigit = satisfy isHexDigit

View file

@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
, testGroup "lhs"
[ test (purely $ readMarkdown def{ readerExtensions = enableExtension
Ext_literate_haskell pandocExtensions })
"inverse bird tracks and html" $
"> a\n\n< b\n\n<div>\n"
"inverse bird tracks and html"
$ ("> a\n\n< b\n\n<div>\n" :: Text)
=?> codeBlockWith ("",["haskell","literate"],[]) "a"
<>
codeBlockWith ("",["haskell"],[]) "b"