Change reader types, allowing better tracking of source positions.
Previously, when multiple file arguments were provided, pandoc simply concatenated them and passed the contents to the readers, which took a Text argument. As a result, the readers had no way of knowing which file was the source of any particular bit of text. This meant that we couldn't report accurate source positions on errors or include accurate source positions as attributes in the AST. More seriously, it meant that we couldn't resolve resource paths relative to the files containing them (see e.g. #5501, #6632, #6384, #3752). Add Text.Pandoc.Sources (exported module), with a `Sources` type and a `ToSources` class. A `Sources` wraps a list of `(SourcePos, Text)` pairs. [API change] A parsec `Stream` instance is provided for `Sources`. The module also exports versions of parsec's `satisfy` and other Char parsers that track source positions accurately from a `Sources` stream (or any instance of the new `UpdateSourcePos` class). Text.Pandoc.Parsing now exports these modified Char parsers instead of the ones parsec provides. Modified parsers to use a `Sources` as stream [API change]. The readers that previously took a `Text` argument have been modified to take any instance of `ToSources`. So, they may still be used with a `Text`, but they can also be used with a `Sources` object. In Text.Pandoc.Error, modified the constructor PandocParsecError to take a `Sources` rather than a `Text` as first argument, so parse error locations can be accurately reported. T.P.Error: showPos, do not print "-" as source name.
This commit is contained in:
parent
295d93e96b
commit
6e45607f99
46 changed files with 1024 additions and 616 deletions
|
@ -9,6 +9,7 @@
|
|||
# Ignore some builtin hints
|
||||
#
|
||||
- ignore: {name: "Avoid lambda"}
|
||||
- ignore: {name: "Use bimap"}
|
||||
- ignore: {name: "Eta reduce"}
|
||||
- ignore: {name: "Evaluate"}
|
||||
- ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
|
||||
|
|
|
@ -513,6 +513,7 @@ library
|
|||
Text.Pandoc.Options,
|
||||
Text.Pandoc.Extensions,
|
||||
Text.Pandoc.Shared,
|
||||
Text.Pandoc.Sources,
|
||||
Text.Pandoc.MediaBag,
|
||||
Text.Pandoc.Error,
|
||||
Text.Pandoc.Filter,
|
||||
|
|
|
@ -160,9 +160,11 @@ convertWithOpts opts = do
|
|||
else optTabStop opts)
|
||||
|
||||
|
||||
let readSources :: [FilePath] -> PandocIO Text
|
||||
readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
|
||||
mapM readSource srcs
|
||||
let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
|
||||
readSources srcs =
|
||||
mapM (\fp -> do
|
||||
t <- readSource fp
|
||||
return (if fp == "-" then "" else fp, convertTabs t)) srcs
|
||||
|
||||
|
||||
outputSettings <- optToOutputSettings opts
|
||||
|
|
|
@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
|
|||
where
|
||||
pMetaString = pure . MetaString <$> P.manyChar P.anyChar
|
||||
runEverything p =
|
||||
runPure (P.readWithM p (def :: P.ParserState) "")
|
||||
runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
|
||||
>>= fmap (Meta . flip P.runF def)
|
||||
yamlToMeta _ = return mempty
|
||||
|
||||
|
|
|
@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
|
|||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
|
||||
readBibtexString :: Variant -- ^ bibtex or biblatex
|
||||
readBibtexString :: ToSources a
|
||||
=> Variant -- ^ bibtex or biblatex
|
||||
-> Locale -- ^ Locale
|
||||
-> (Text -> Bool) -- ^ Filter on citation ids
|
||||
-> Text -- ^ bibtex/biblatex text
|
||||
-> a -- ^ bibtex/biblatex text
|
||||
-> Either ParseError [Reference Inlines]
|
||||
readBibtexString variant locale idpred contents = do
|
||||
case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
|
||||
|
@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
|
|||
filter (\item -> idpred (identifier item) &&
|
||||
entryType item /= "xdata"))
|
||||
(fromMaybe defaultLang $ localeLanguage locale, Map.empty)
|
||||
"" contents of
|
||||
"" (toSources contents) of
|
||||
Left err -> Left err
|
||||
Right xs -> return xs
|
||||
|
||||
|
@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
|
|||
-- a map of bibtex "string" macros
|
||||
type StringMap = Map.Map Text Text
|
||||
|
||||
type BibParser = Parser Text (Lang, StringMap)
|
||||
type BibParser = Parser Sources (Lang, StringMap)
|
||||
|
||||
data Item = Item{ identifier :: Text
|
||||
, sourcePos :: SourcePos
|
||||
|
@ -804,7 +805,7 @@ bibEntries = do
|
|||
(bibComment <|> bibPreamble <|> bibString))
|
||||
|
||||
bibSkip :: BibParser ()
|
||||
bibSkip = () <$ take1WhileP (/='@')
|
||||
bibSkip = skipMany1 (satisfy (/='@'))
|
||||
|
||||
bibComment :: BibParser ()
|
||||
bibComment = do
|
||||
|
@ -829,6 +830,9 @@ bibString = do
|
|||
updateState (\(l,m) -> (l, Map.insert k v m))
|
||||
return ()
|
||||
|
||||
take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
|
||||
take1WhileP f = T.pack <$> many1 (satisfy f)
|
||||
|
||||
inBraces :: BibParser Text
|
||||
inBraces = do
|
||||
char '{'
|
||||
|
|
|
@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
|
|||
import Data.Typeable (Typeable)
|
||||
import Data.Word (Word8)
|
||||
import Data.Text (Text)
|
||||
import Data.List (sortOn)
|
||||
import qualified Data.Text as T
|
||||
import Data.Ord (Down(..))
|
||||
import GHC.Generics (Generic)
|
||||
import Network.HTTP.Client (HttpException)
|
||||
import System.Exit (ExitCode (..), exitWith)
|
||||
import System.IO (stderr)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (Sources(..))
|
||||
import Text.Printf (printf)
|
||||
import Text.Parsec.Error
|
||||
import Text.Parsec.Pos hiding (Line)
|
||||
import Text.Pandoc.Shared (tshow)
|
||||
import Citeproc (CiteprocError, prettyCiteprocError)
|
||||
|
||||
type Input = Text
|
||||
|
||||
data PandocError = PandocIOError Text IOError
|
||||
| PandocHttpError Text HttpException
|
||||
| PandocShouldNeverHappenError Text
|
||||
| PandocSomeError Text
|
||||
| PandocParseError Text
|
||||
| PandocParsecError Input ParseError
|
||||
| PandocParsecError Sources ParseError
|
||||
| PandocMakePDFError Text
|
||||
| PandocOptionError Text
|
||||
| PandocSyntaxMapError Text
|
||||
|
@ -81,22 +82,28 @@ renderError e =
|
|||
"Please report this to pandoc's developers: " <> s
|
||||
PandocSomeError s -> s
|
||||
PandocParseError s -> s
|
||||
PandocParsecError input err' ->
|
||||
PandocParsecError (Sources inputs) err' ->
|
||||
let errPos = errorPos err'
|
||||
errLine = sourceLine errPos
|
||||
errColumn = sourceColumn errPos
|
||||
ls = T.lines input <> [""]
|
||||
errorInFile = if length ls > errLine - 1
|
||||
then T.concat ["\n", ls !! (errLine - 1)
|
||||
,"\n", T.replicate (errColumn - 1) " "
|
||||
,"^"]
|
||||
else ""
|
||||
in "\nError at " <> tshow err' <>
|
||||
-- if error comes from a chunk or included file,
|
||||
-- then we won't get the right text this way:
|
||||
if sourceName errPos == "source"
|
||||
then errorInFile
|
||||
else ""
|
||||
errFile = sourceName errPos
|
||||
errorInFile =
|
||||
case sortOn (Down . sourceLine . fst)
|
||||
[ (pos,t)
|
||||
| (pos,t) <- inputs
|
||||
, sourceName pos == errFile
|
||||
, sourceLine pos <= errLine
|
||||
] of
|
||||
[] -> ""
|
||||
((pos,txt):_) ->
|
||||
let ls = T.lines txt <> [""]
|
||||
ln = errLine - sourceLine pos
|
||||
in if length ls > ln - 1
|
||||
then T.concat ["\n", ls !! (ln - 1)
|
||||
,"\n", T.replicate (errColumn - 1) " "
|
||||
,"^"]
|
||||
else ""
|
||||
in "\nError at " <> tshow err' <> errorInFile
|
||||
PandocMakePDFError s -> s
|
||||
PandocOptionError s -> s
|
||||
PandocSyntaxMapError s -> s
|
||||
|
|
|
@ -241,9 +241,11 @@ instance ToJSON LogMessage where
|
|||
showPos :: SourcePos -> Text
|
||||
showPos pos = Text.pack $ sn ++ "line " ++
|
||||
show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
|
||||
where sn = if sourceName pos == "source" || sourceName pos == ""
|
||||
then ""
|
||||
else sourceName pos ++ " "
|
||||
where
|
||||
sn' = sourceName pos
|
||||
sn = if sn' == "source" || sn' == "" || sn' == "-"
|
||||
then ""
|
||||
else sn' ++ " "
|
||||
|
||||
encodeLogMessages :: [LogMessage] -> BL.ByteString
|
||||
encodeLogMessages ms =
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE IncoherentInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Parsing
|
||||
|
@ -19,8 +18,7 @@
|
|||
A utility library with parsers used in pandoc readers.
|
||||
-}
|
||||
|
||||
module Text.Pandoc.Parsing ( take1WhileP,
|
||||
takeP,
|
||||
module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
|
||||
countChar,
|
||||
textStr,
|
||||
anyLine,
|
||||
|
@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
|
|||
getInput,
|
||||
setInput,
|
||||
unexpected,
|
||||
char,
|
||||
letter,
|
||||
digit,
|
||||
alphaNum,
|
||||
skipMany,
|
||||
skipMany1,
|
||||
spaces,
|
||||
space,
|
||||
anyChar,
|
||||
satisfy,
|
||||
newline,
|
||||
string,
|
||||
count,
|
||||
eof,
|
||||
noneOf,
|
||||
oneOf,
|
||||
lookAhead,
|
||||
notFollowedBy,
|
||||
many,
|
||||
|
@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
|
|||
SourcePos,
|
||||
getPosition,
|
||||
setPosition,
|
||||
sourceName,
|
||||
setSourceName,
|
||||
sourceColumn,
|
||||
sourceLine,
|
||||
setSourceColumn,
|
||||
|
@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
|
|||
where
|
||||
|
||||
import Control.Monad.Identity
|
||||
( guard,
|
||||
join,
|
||||
unless,
|
||||
when,
|
||||
void,
|
||||
liftM2,
|
||||
liftM,
|
||||
Identity(..),
|
||||
MonadPlus(mzero) )
|
||||
import Control.Monad.Reader
|
||||
( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
|
||||
import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
|
||||
isPunctuation, isSpace, ord, toLower, toUpper)
|
||||
import Data.Default
|
||||
import Data.Default ( Default(..) )
|
||||
import Data.Functor (($>))
|
||||
import Data.List (intercalate, transpose)
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Set as Set
|
||||
import Data.String
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Text.HTML.TagSoup.Entity (lookupEntity)
|
||||
|
@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
|
|||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
|
||||
import Text.Pandoc.Definition
|
||||
( Target,
|
||||
nullMeta,
|
||||
nullAttr,
|
||||
Meta,
|
||||
ColWidth(ColWidthDefault, ColWidth),
|
||||
TableFoot(TableFoot),
|
||||
TableBody(TableBody),
|
||||
Attr,
|
||||
TableHead(TableHead),
|
||||
Row(..),
|
||||
Alignment(..),
|
||||
Inline(Str),
|
||||
ListNumberDelim(..),
|
||||
ListAttributes,
|
||||
ListNumberStyle(..) )
|
||||
import Text.Pandoc.Logging
|
||||
( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
|
||||
import Text.Pandoc.Options
|
||||
( extensionEnabled,
|
||||
Extension(Ext_old_dashes, Ext_tex_math_dollars,
|
||||
Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
|
||||
Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
|
||||
ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
|
||||
import Text.Pandoc.Readers.LaTeX.Types (Macro)
|
||||
import Text.Pandoc.Shared
|
||||
( uniqueIdent,
|
||||
tshow,
|
||||
mapLeft,
|
||||
compactify,
|
||||
trim,
|
||||
trimr,
|
||||
splitTextByIndices,
|
||||
safeRead,
|
||||
trimMath,
|
||||
schemes,
|
||||
escapeURI )
|
||||
import Text.Pandoc.Sources
|
||||
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
import Text.Parsec hiding (token)
|
||||
import Text.Parsec.Pos (initialPos, newPos, updatePosString)
|
||||
|
||||
import Control.Monad.Except
|
||||
import Text.Parsec
|
||||
( between,
|
||||
setSourceName,
|
||||
Parsec,
|
||||
Column,
|
||||
Line,
|
||||
incSourceLine,
|
||||
incSourceColumn,
|
||||
setSourceLine,
|
||||
setSourceColumn,
|
||||
sourceLine,
|
||||
sourceColumn,
|
||||
sourceName,
|
||||
setSourceName,
|
||||
setPosition,
|
||||
getPosition,
|
||||
updateState,
|
||||
setState,
|
||||
getState,
|
||||
optionMaybe,
|
||||
optional,
|
||||
option,
|
||||
endBy1,
|
||||
endBy,
|
||||
sepEndBy1,
|
||||
sepEndBy,
|
||||
sepBy1,
|
||||
sepBy,
|
||||
try,
|
||||
choice,
|
||||
(<?>),
|
||||
(<|>),
|
||||
manyTill,
|
||||
many1,
|
||||
many,
|
||||
notFollowedBy,
|
||||
lookAhead,
|
||||
eof,
|
||||
count,
|
||||
skipMany1,
|
||||
skipMany,
|
||||
unexpected,
|
||||
setInput,
|
||||
getInput,
|
||||
anyToken,
|
||||
tokenPrim,
|
||||
parse,
|
||||
runParserT,
|
||||
runParser,
|
||||
ParseError,
|
||||
ParsecT,
|
||||
SourcePos,
|
||||
Stream(..) )
|
||||
import Text.Parsec.Pos (initialPos, newPos)
|
||||
import Control.Monad.Except ( MonadError(throwError) )
|
||||
import Text.Pandoc.Error
|
||||
( PandocError(PandocParseError, PandocParsecError) )
|
||||
|
||||
type Parser t s = Parsec t s
|
||||
|
||||
type ParserT = ParsecT
|
||||
|
||||
|
||||
-- | Reader monad wrapping the parser state. This is used to possibly delay
|
||||
-- evaluation until all relevant information has been parsed and made available
|
||||
-- in the parser state.
|
||||
|
@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
|
|||
mappend = (<>)
|
||||
|
||||
-- | Like @count@, but packs its result
|
||||
countChar :: (Stream s m Char, Monad m)
|
||||
countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
|
||||
=> Int
|
||||
-> ParsecT s st m Char
|
||||
-> ParsecT s st m Text
|
||||
countChar n = fmap T.pack . count n
|
||||
|
||||
-- | Like @string@, but uses @Text@.
|
||||
textStr :: Stream s m Char => Text -> ParsecT s u m Text
|
||||
textStr :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> Text -> ParsecT s u m Text
|
||||
textStr t = string (T.unpack t) $> t
|
||||
|
||||
-- | Parse characters while a predicate is true.
|
||||
take1WhileP :: Monad m
|
||||
=> (Char -> Bool)
|
||||
-> ParserT Text st m Text
|
||||
take1WhileP f = do
|
||||
-- needed to persuade parsec that this won't match an empty string:
|
||||
c <- satisfy f
|
||||
inp <- getInput
|
||||
pos <- getPosition
|
||||
let (t, rest) = T.span f inp
|
||||
setInput rest
|
||||
setPosition $
|
||||
if f '\t' || f '\n'
|
||||
then updatePosString pos $ T.unpack t
|
||||
else incSourceColumn pos (T.length t)
|
||||
return $ T.singleton c <> t
|
||||
|
||||
-- Parse n characters of input (or the rest of the input if
|
||||
-- there aren't n characters).
|
||||
takeP :: Monad m => Int -> ParserT Text st m Text
|
||||
takeP n = do
|
||||
guard (n > 0)
|
||||
-- faster than 'count n anyChar'
|
||||
inp <- getInput
|
||||
pos <- getPosition
|
||||
let (xs, rest) = T.splitAt n inp
|
||||
-- needed to persuade parsec that this won't match an empty string:
|
||||
anyChar
|
||||
setInput rest
|
||||
setPosition $ updatePosString pos $ T.unpack xs
|
||||
return xs
|
||||
|
||||
-- | Parse any line of text
|
||||
anyLine :: Monad m => ParserT Text st m Text
|
||||
-- | Parse any line of text, returning the contents without the
|
||||
-- final newline.
|
||||
anyLine :: Monad m => ParserT Sources st m Text
|
||||
anyLine = do
|
||||
-- This is much faster than:
|
||||
-- manyTill anyChar newline
|
||||
inp <- getInput
|
||||
pos <- getPosition
|
||||
case T.break (=='\n') inp of
|
||||
(this, T.uncons -> Just ('\n', rest)) -> do
|
||||
-- needed to persuade parsec that this won't match an empty string:
|
||||
anyChar
|
||||
setInput rest
|
||||
setPosition $ incSourceLine (setSourceColumn pos 1) 1
|
||||
return this
|
||||
_ -> mzero
|
||||
case inp of
|
||||
Sources [] -> mzero
|
||||
Sources ((fp,t):inps) ->
|
||||
-- we assume that lines don't span different input files
|
||||
case T.break (=='\n') t of
|
||||
(this, rest)
|
||||
| T.null rest
|
||||
, not (null inps) ->
|
||||
-- line may span different input files, so do it
|
||||
-- character by character
|
||||
T.pack <$> manyTill anyChar newline
|
||||
| otherwise -> do -- either end of inputs or newline in rest
|
||||
setInput $ Sources ((fp, rest):inps)
|
||||
char '\n' -- needed so parsec knows we won't match empty string
|
||||
-- and so source pos is updated
|
||||
return this
|
||||
|
||||
-- | Parse any line, include the final newline in the output
|
||||
anyLineNewline :: Monad m => ParserT Text st m Text
|
||||
anyLineNewline :: Monad m => ParserT Sources st m Text
|
||||
anyLineNewline = (<> "\n") <$> anyLine
|
||||
|
||||
-- | Parse indent by specified number of spaces (or equiv. tabs)
|
||||
indentWith :: Stream s m Char
|
||||
indentWith :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> HasReaderOptions st
|
||||
=> Int -> ParserT s st m Text
|
||||
indentWith num = do
|
||||
|
@ -399,11 +460,13 @@ notFollowedBy' p = try $ join $ do a <- try p
|
|||
return (return ())
|
||||
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
|
||||
|
||||
oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
|
||||
oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
|
||||
oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
|
||||
|
||||
-- TODO: This should be re-implemented in a Text-aware way
|
||||
oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
|
||||
oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> (Char -> Char -> Bool) -> [String] -> ParserT s st m String
|
||||
oneOfStrings'' _ [] = Prelude.fail "no strings"
|
||||
oneOfStrings'' matches strs = try $ do
|
||||
c <- anyChar
|
||||
|
@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
|
|||
-- | Parses one of a list of strings. If the list contains
|
||||
-- two strings one of which is a prefix of the other, the longer
|
||||
-- string will be matched if possible.
|
||||
oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
|
||||
oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> [Text] -> ParserT s st m Text
|
||||
oneOfStrings = oneOfStrings' (==)
|
||||
|
||||
-- | Parses one of a list of strings (tried in order), case insensitive.
|
||||
|
||||
-- TODO: This will not be accurate with general Unicode (neither
|
||||
-- Text.toLower nor Text.toCaseFold can be implemented with a map)
|
||||
oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
|
||||
oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> [Text] -> ParserT s st m Text
|
||||
oneOfStringsCI = oneOfStrings' ciMatch
|
||||
where ciMatch x y = toLower' x == toLower' y
|
||||
-- this optimizes toLower by checking common ASCII case
|
||||
|
@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
|
|||
| otherwise = toLower c
|
||||
|
||||
-- | Parses a space or tab.
|
||||
spaceChar :: Stream s m Char => ParserT s st m Char
|
||||
spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Char
|
||||
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
|
||||
|
||||
-- | Parses a nonspace, nonnewline character.
|
||||
nonspaceChar :: Stream s m Char => ParserT s st m Char
|
||||
nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Char
|
||||
nonspaceChar = satisfy (not . isSpaceChar)
|
||||
|
||||
isSpaceChar :: Char -> Bool
|
||||
|
@ -451,21 +518,24 @@ isSpaceChar '\r' = True
|
|||
isSpaceChar _ = False
|
||||
|
||||
-- | Skips zero or more spaces or tabs.
|
||||
skipSpaces :: Stream s m Char => ParserT s st m ()
|
||||
skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
skipSpaces = skipMany spaceChar
|
||||
|
||||
-- | Skips zero or more spaces or tabs, then reads a newline.
|
||||
blankline :: Stream s m Char => ParserT s st m Char
|
||||
blankline :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Char
|
||||
blankline = try $ skipSpaces >> newline
|
||||
|
||||
-- | Parses one or more blank lines and returns a string of newlines.
|
||||
blanklines :: Stream s m Char => ParserT s st m Text
|
||||
blanklines :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Text
|
||||
blanklines = T.pack <$> many1 blankline
|
||||
|
||||
-- | Gobble n spaces; if tabs are encountered, expand them
|
||||
-- and gobble some or all of their spaces, leaving the rest.
|
||||
gobbleSpaces :: (HasReaderOptions st, Monad m)
|
||||
=> Int -> ParserT Text st m ()
|
||||
=> Int -> ParserT Sources st m ()
|
||||
gobbleSpaces 0 = return ()
|
||||
gobbleSpaces n
|
||||
| n < 0 = error "gobbleSpaces called with negative number"
|
||||
|
@ -473,18 +543,26 @@ gobbleSpaces n
|
|||
char ' ' <|> eatOneSpaceOfTab
|
||||
gobbleSpaces (n - 1)
|
||||
|
||||
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
|
||||
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
|
||||
eatOneSpaceOfTab = do
|
||||
char '\t'
|
||||
lookAhead (char '\t')
|
||||
pos <- getPosition
|
||||
tabstop <- getOption readerTabStop
|
||||
-- replace the tab on the input stream with spaces
|
||||
let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
|
||||
inp <- getInput
|
||||
setInput $ T.replicate (tabstop - 1) " " <> inp
|
||||
return ' '
|
||||
setInput $
|
||||
case inp of
|
||||
Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
|
||||
Sources ((fp,t):rest) ->
|
||||
-- drop the tab and add spaces
|
||||
Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
|
||||
char ' '
|
||||
|
||||
-- | Gobble up to n spaces; if tabs are encountered, expand them
|
||||
-- and gobble some or all of their spaces, leaving the rest.
|
||||
gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
|
||||
=> Int -> ParserT Text st m Int
|
||||
=> Int -> ParserT Sources st m Int
|
||||
gobbleAtMostSpaces 0 = return 0
|
||||
gobbleAtMostSpaces n
|
||||
| n < 0 = error "gobbleAtMostSpaces called with negative number"
|
||||
|
@ -493,7 +571,8 @@ gobbleAtMostSpaces n
|
|||
(+ 1) <$> gobbleAtMostSpaces (n - 1)
|
||||
|
||||
-- | Parses material enclosed between start and end parsers.
|
||||
enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
|
||||
enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m t -- ^ start parser
|
||||
-> ParserT s st m end -- ^ end parser
|
||||
-> ParserT s st m a -- ^ content parser (to be used repeatedly)
|
||||
-> ParserT s st m [a]
|
||||
|
@ -501,39 +580,41 @@ enclosed start end parser = try $
|
|||
start >> notFollowedBy space >> many1Till parser end
|
||||
|
||||
-- | Parse string, case insensitive.
|
||||
stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
|
||||
stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> Text -> ParserT s st m Text
|
||||
stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
|
||||
|
||||
stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
|
||||
stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> String -> ParserT s st m String
|
||||
stringAnyCase' [] = string ""
|
||||
stringAnyCase' (x:xs) = do
|
||||
firstChar <- char (toUpper x) <|> char (toLower x)
|
||||
rest <- stringAnyCase' xs
|
||||
return (firstChar:rest)
|
||||
|
||||
-- TODO rewrite by just adding to Sources stream?
|
||||
-- | Parse contents of 'str' using 'parser' and return result.
|
||||
parseFromString :: (Stream s m Char, IsString s)
|
||||
=> ParserT s st m r
|
||||
parseFromString :: Monad m
|
||||
=> ParserT Sources st m r
|
||||
-> Text
|
||||
-> ParserT s st m r
|
||||
-> ParserT Sources st m r
|
||||
parseFromString parser str = do
|
||||
oldPos <- getPosition
|
||||
setPosition $ initialPos " chunk"
|
||||
setPosition $ initialPos "chunk"
|
||||
oldInput <- getInput
|
||||
setInput $ fromString $ T.unpack str
|
||||
setInput $ toSources str
|
||||
result <- parser
|
||||
spaces
|
||||
eof
|
||||
setInput oldInput
|
||||
setPosition oldPos
|
||||
return result
|
||||
|
||||
-- | Like 'parseFromString' but specialized for 'ParserState'.
|
||||
-- This resets 'stateLastStrPos', which is almost always what we want.
|
||||
parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
|
||||
=> ParserT s u m a
|
||||
parseFromString' :: (Monad m, HasLastStrPosition u)
|
||||
=> ParserT Sources u m a
|
||||
-> Text
|
||||
-> ParserT s u m a
|
||||
-> ParserT Sources u m a
|
||||
parseFromString' parser str = do
|
||||
oldLastStrPos <- getLastStrPos <$> getState
|
||||
updateState $ setLastStrPos Nothing
|
||||
|
@ -542,7 +623,7 @@ parseFromString' parser str = do
|
|||
return res
|
||||
|
||||
-- | Parse raw line block up to and including blank lines.
|
||||
lineClump :: Monad m => ParserT Text st m Text
|
||||
lineClump :: Monad m => ParserT Sources st m Text
|
||||
lineClump = blanklines
|
||||
<|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
|
||||
|
||||
|
@ -551,7 +632,7 @@ lineClump = blanklines
|
|||
-- pairs of open and close, which must be different. For example,
|
||||
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
|
||||
-- and return "hello (there)".
|
||||
charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
|
||||
charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
|
||||
-> ParserT s st m Text
|
||||
charsInBalanced open close parser = try $ do
|
||||
char open
|
||||
|
@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
|
|||
-- Auxiliary functions for romanNumeral:
|
||||
|
||||
-- | Parses a roman numeral (uppercase or lowercase), returns number.
|
||||
romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
|
||||
romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
|
||||
-> ParserT s st m Int
|
||||
romanNumeral upperCase = do
|
||||
let rchar uc = char $ if upperCase then uc else toLower uc
|
||||
|
@ -606,7 +687,7 @@ romanNumeral upperCase = do
|
|||
|
||||
-- | Parses an email address; returns original and corresponding
|
||||
-- escaped mailto: URI.
|
||||
emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
|
||||
emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
|
||||
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
|
||||
where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
|
||||
in (full, escapeURI $ "mailto:" <> full)
|
||||
|
@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
|
|||
isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
|
||||
|
||||
|
||||
uriScheme :: Stream s m Char => ParserT s st m Text
|
||||
uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
|
||||
uriScheme = oneOfStringsCI (Set.toList schemes)
|
||||
|
||||
-- | Parses a URI. Returns pair of original and URI-escaped version.
|
||||
uri :: Stream s m Char => ParserT s st m (Text, Text)
|
||||
uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
|
||||
uri = try $ do
|
||||
scheme <- uriScheme
|
||||
char ':'
|
||||
|
@ -677,7 +758,7 @@ uri = try $ do
|
|||
uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
|
||||
return (T.pack $ [l] ++ chunk ++ [r])
|
||||
|
||||
mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
|
||||
mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
|
||||
mathInlineWith op cl = try $ do
|
||||
textStr op
|
||||
when (op == "$") $ notFollowedBy space
|
||||
|
@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
|
|||
notFollowedBy digit -- to prevent capture of $5
|
||||
return $ trimMath $ T.concat words'
|
||||
where
|
||||
inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
|
||||
inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
|
||||
inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
|
||||
|
||||
inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
|
||||
inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
|
||||
inBalancedBraces' 0 "" = do
|
||||
c <- anyChar
|
||||
if c == '{'
|
||||
|
@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
|
|||
'{' -> inBalancedBraces' (numOpen + 1) (c:xs)
|
||||
_ -> inBalancedBraces' numOpen (c:xs)
|
||||
|
||||
mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
|
||||
mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
|
||||
mathDisplayWith op cl = try $ fmap T.pack $ do
|
||||
textStr op
|
||||
many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
|
||||
(try $ textStr cl)
|
||||
|
||||
mathDisplay :: (HasReaderOptions st, Stream s m Char)
|
||||
mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Text
|
||||
mathDisplay =
|
||||
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
|
||||
|
@ -733,7 +814,7 @@ mathDisplay =
|
|||
<|> (guardEnabled Ext_tex_math_double_backslash >>
|
||||
mathDisplayWith "\\\\[" "\\\\]")
|
||||
|
||||
mathInline :: (HasReaderOptions st , Stream s m Char)
|
||||
mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Text
|
||||
mathInline =
|
||||
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
|
||||
|
@ -746,7 +827,7 @@ mathInline =
|
|||
-- displacement (the difference between the source column at the end
|
||||
-- and the source column at the beginning). Vertical displacement
|
||||
-- (source row) is ignored.
|
||||
withHorizDisplacement :: Stream s m Char
|
||||
withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m a -- ^ Parser to apply
|
||||
-> ParserT s st m (a, Int) -- ^ (result, displacement)
|
||||
withHorizDisplacement parser = do
|
||||
|
@ -758,30 +839,37 @@ withHorizDisplacement parser = do
|
|||
-- | Applies a parser and returns the raw string that was parsed,
|
||||
-- along with the value produced by the parser.
|
||||
withRaw :: Monad m
|
||||
=> ParsecT Text st m a
|
||||
-> ParsecT Text st m (a, Text)
|
||||
=> ParsecT Sources st m a
|
||||
-> ParsecT Sources st m (a, Text)
|
||||
withRaw parser = do
|
||||
pos1 <- getPosition
|
||||
inp <- getInput
|
||||
inps1 <- getInput
|
||||
result <- parser
|
||||
pos2 <- getPosition
|
||||
let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
|
||||
let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
|
||||
let inplines = take ((l2 - l1) + 1) $ T.lines inp
|
||||
let raw = case inplines of
|
||||
[] -> ""
|
||||
[l] -> T.take (c2 - c1) l
|
||||
ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
|
||||
return (result, raw)
|
||||
inps2 <- getInput
|
||||
-- 'raw' is the difference between inps1 and inps2
|
||||
return (result, sourcesDifference inps1 inps2)
|
||||
|
||||
sourcesDifference :: Sources -> Sources -> Text
|
||||
sourcesDifference (Sources is1) (Sources is2) = go is1 is2
|
||||
where
|
||||
go inps1 inps2 =
|
||||
case (inps1, inps2) of
|
||||
([], _) -> mempty
|
||||
(_, []) -> mconcat $ map snd inps1
|
||||
((p1,t1):rest1, (p2, t2):rest2)
|
||||
| p1 == p2
|
||||
, t1 == t2 -> go rest1 rest2
|
||||
| p1 == p2
|
||||
, t1 /= t2 -> fromMaybe mempty $ T.stripSuffix t2 t1
|
||||
| otherwise -> t1 <> go rest1 inps2
|
||||
|
||||
-- | Parses backslash, then applies character parser.
|
||||
escaped :: Stream s m Char
|
||||
escaped :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Char -- ^ Parser for character to escape
|
||||
-> ParserT s st m Char
|
||||
escaped parser = try $ char '\\' >> parser
|
||||
|
||||
-- | Parse character entity.
|
||||
characterReference :: Stream s m Char => ParserT s st m Char
|
||||
characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
|
||||
characterReference = try $ do
|
||||
char '&'
|
||||
ent <- many1Till nonspaceChar (char ';')
|
||||
|
@ -794,19 +882,19 @@ characterReference = try $ do
|
|||
_ -> Prelude.fail "entity not found"
|
||||
|
||||
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
|
||||
upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
upperRoman = do
|
||||
num <- romanNumeral True
|
||||
return (UpperRoman, num)
|
||||
|
||||
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
|
||||
lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
lowerRoman = do
|
||||
num <- romanNumeral False
|
||||
return (LowerRoman, num)
|
||||
|
||||
-- | Parses a decimal numeral and returns (Decimal, number).
|
||||
decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
decimal = do
|
||||
num <- many1 digit
|
||||
return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
|
||||
|
@ -815,7 +903,7 @@ decimal = do
|
|||
-- returns (DefaultStyle, [next example number]). The next
|
||||
-- example number is incremented in parser state, and the label
|
||||
-- (if present) is added to the label table.
|
||||
exampleNum :: Stream s m Char
|
||||
exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s ParserState m (ListNumberStyle, Int)
|
||||
exampleNum = do
|
||||
char '@'
|
||||
|
@ -834,37 +922,37 @@ exampleNum = do
|
|||
return (Example, num)
|
||||
|
||||
-- | Parses a '#' returns (DefaultStyle, 1).
|
||||
defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
defaultNum = do
|
||||
char '#'
|
||||
return (DefaultStyle, 1)
|
||||
|
||||
-- | Parses a lowercase letter and returns (LowerAlpha, number).
|
||||
lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
lowerAlpha = do
|
||||
ch <- satisfy isAsciiLower
|
||||
return (LowerAlpha, ord ch - ord 'a' + 1)
|
||||
|
||||
-- | Parses an uppercase letter and returns (UpperAlpha, number).
|
||||
upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
upperAlpha = do
|
||||
ch <- satisfy isAsciiUpper
|
||||
return (UpperAlpha, ord ch - ord 'A' + 1)
|
||||
|
||||
-- | Parses a roman numeral i or I
|
||||
romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
|
||||
romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
|
||||
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
|
||||
(char 'I' >> return (UpperRoman, 1))
|
||||
|
||||
-- | Parses an ordered list marker and returns list attributes.
|
||||
anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
|
||||
anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
|
||||
anyOrderedListMarker = choice
|
||||
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
|
||||
numParser <- [decimal, exampleNum, defaultNum, romanOne,
|
||||
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
|
||||
|
||||
-- | Parses a list number (num) followed by a period, returns list attributes.
|
||||
inPeriod :: Stream s m Char
|
||||
inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m (ListNumberStyle, Int)
|
||||
-> ParserT s st m ListAttributes
|
||||
inPeriod num = try $ do
|
||||
|
@ -876,7 +964,7 @@ inPeriod num = try $ do
|
|||
return (start, style, delim)
|
||||
|
||||
-- | Parses a list number (num) followed by a paren, returns list attributes.
|
||||
inOneParen :: Stream s m Char
|
||||
inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m (ListNumberStyle, Int)
|
||||
-> ParserT s st m ListAttributes
|
||||
inOneParen num = try $ do
|
||||
|
@ -885,7 +973,7 @@ inOneParen num = try $ do
|
|||
return (start, style, OneParen)
|
||||
|
||||
-- | Parses a list number (num) enclosed in parens, returns list attributes.
|
||||
inTwoParens :: Stream s m Char
|
||||
inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m (ListNumberStyle, Int)
|
||||
-> ParserT s st m ListAttributes
|
||||
inTwoParens num = try $ do
|
||||
|
@ -896,7 +984,7 @@ inTwoParens num = try $ do
|
|||
|
||||
-- | Parses an ordered list marker with a given style and delimiter,
|
||||
-- returns number.
|
||||
orderedListMarker :: Stream s m Char
|
||||
orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ListNumberStyle
|
||||
-> ListNumberDelim
|
||||
-> ParserT s ParserState m Int
|
||||
|
@ -919,10 +1007,10 @@ orderedListMarker style delim = do
|
|||
return start
|
||||
|
||||
-- | Parses a character reference and returns a Str element.
|
||||
charRef :: Stream s m Char => ParserT s st m Inline
|
||||
charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
|
||||
charRef = Str . T.singleton <$> characterReference
|
||||
|
||||
lineBlockLine :: Monad m => ParserT Text st m Text
|
||||
lineBlockLine :: Monad m => ParserT Sources st m Text
|
||||
lineBlockLine = try $ do
|
||||
char '|'
|
||||
char ' '
|
||||
|
@ -932,11 +1020,11 @@ lineBlockLine = try $ do
|
|||
continuations <- many (try $ char ' ' >> anyLine)
|
||||
return $ white <> T.unwords (line : continuations)
|
||||
|
||||
blankLineBlockLine :: Stream s m Char => ParserT s st m Char
|
||||
blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
|
||||
blankLineBlockLine = try (char '|' >> blankline)
|
||||
|
||||
-- | Parses an RST-style line block and returns a list of strings.
|
||||
lineBlockLines :: Monad m => ParserT Text st m [Text]
|
||||
lineBlockLines :: Monad m => ParserT Sources st m [Text]
|
||||
lineBlockLines = try $ do
|
||||
lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
|
||||
skipMany blankline
|
||||
|
@ -944,7 +1032,8 @@ lineBlockLines = try $ do
|
|||
|
||||
-- | Parse a table using 'headerParser', 'rowParser',
|
||||
-- 'lineParser', and 'footerParser'.
|
||||
tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
|
||||
tableWith :: (Stream s m Char, UpdateSourcePos s Char,
|
||||
HasReaderOptions st, Monad mf)
|
||||
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
|
||||
-> ([Int] -> ParserT s st m (mf [Blocks]))
|
||||
-> ParserT s st m sep
|
||||
|
@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
|
|||
|
||||
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
|
||||
|
||||
tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
|
||||
tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
|
||||
HasReaderOptions st, Monad mf)
|
||||
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
|
||||
-> ([Int] -> ParserT s st m (mf [Blocks]))
|
||||
-> ParserT s st m sep
|
||||
|
@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
|
|||
-- (which may be grid), then the rows,
|
||||
-- which may be grid, separated by blank lines, and
|
||||
-- ending with a footer (dashed line followed by blank line).
|
||||
gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
|
||||
Monad mf, IsString s)
|
||||
=> ParserT s st m (mf Blocks) -- ^ Block list parser
|
||||
gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
|
||||
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
|
||||
-> Bool -- ^ Headerless table
|
||||
-> ParserT s st m (mf Blocks)
|
||||
-> ParserT Sources st m (mf Blocks)
|
||||
gridTableWith blocks headless =
|
||||
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
|
||||
(gridTableSep '-') gridTableFooter
|
||||
|
||||
gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
|
||||
Monad mf, IsString s)
|
||||
=> ParserT s st m (mf Blocks) -- ^ Block list parser
|
||||
gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
|
||||
Monad mf)
|
||||
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
|
||||
-> Bool -- ^ Headerless table
|
||||
-> ParserT s st m (TableComponents mf)
|
||||
-> ParserT Sources st m (TableComponents mf)
|
||||
gridTableWith' blocks headless =
|
||||
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
|
||||
(gridTableSep '-') gridTableFooter
|
||||
|
@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
|
|||
gridTableSplitLine indices line = map removeFinalBar $ tail $
|
||||
splitTextByIndices (init indices) $ trimr line
|
||||
|
||||
gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
|
||||
gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
|
||||
gridPart ch = do
|
||||
leftColon <- option False (True <$ char ':')
|
||||
dashes <- many1 (char ch)
|
||||
|
@ -1050,7 +1139,7 @@ gridPart ch = do
|
|||
(False, False) -> AlignDefault
|
||||
return ((lengthDashes, lengthDashes + 1), alignment)
|
||||
|
||||
gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
|
||||
gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
|
||||
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
|
||||
|
||||
removeFinalBar :: Text -> Text
|
||||
|
@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
|
|||
go c = T.any (== c) " \t"
|
||||
|
||||
-- | Separator between rows of grid table.
|
||||
gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
|
||||
gridTableSep :: Monad m => Char -> ParserT Sources st m Char
|
||||
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
|
||||
|
||||
-- | Parse header for a grid table.
|
||||
gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
|
||||
gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
|
||||
=> Bool -- ^ Headerless table
|
||||
-> ParserT s st m (mf Blocks)
|
||||
-> ParserT s st m (mf [Blocks], [Alignment], [Int])
|
||||
-> ParserT Sources st m (mf Blocks)
|
||||
-> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
|
||||
gridTableHeader True _ = do
|
||||
optional blanklines
|
||||
dashes <- gridDashedLines '-'
|
||||
|
@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
|
|||
heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
|
||||
return (heads, aligns, indices)
|
||||
|
||||
gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
|
||||
gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
|
||||
gridTableRawLine indices = do
|
||||
char '|'
|
||||
line <- many1Till anyChar newline
|
||||
return (gridTableSplitLine indices $ T.pack line)
|
||||
|
||||
-- | Parse row of grid table.
|
||||
gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
|
||||
=> ParserT s st m (mf Blocks)
|
||||
gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
|
||||
=> ParserT Sources st m (mf Blocks)
|
||||
-> [Int]
|
||||
-> ParserT s st m (mf [Blocks])
|
||||
-> ParserT Sources st m (mf [Blocks])
|
||||
gridTableRow blocks indices = do
|
||||
colLines <- many1 (gridTableRawLine indices)
|
||||
let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
|
||||
|
@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
|
|||
Just (c, _) -> c == ' '
|
||||
|
||||
-- | Parse footer for a grid table.
|
||||
gridTableFooter :: Stream s m Char => ParserT s st m ()
|
||||
gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
|
||||
gridTableFooter = optional blanklines
|
||||
|
||||
---
|
||||
|
||||
-- | Removes the ParsecT layer from the monad transformer stack
|
||||
readWithM :: Monad m
|
||||
=> ParserT Text st m a -- ^ parser
|
||||
-> st -- ^ initial state
|
||||
-> Text -- ^ input
|
||||
readWithM :: (Monad m, ToSources t)
|
||||
=> ParserT Sources st m a -- ^ parser
|
||||
-> st -- ^ initial state
|
||||
-> t -- ^ input
|
||||
-> m (Either PandocError a)
|
||||
readWithM parser state input =
|
||||
mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
|
||||
mapLeft (PandocParsecError sources)
|
||||
<$> runParserT parser state (initialSourceName sources) sources
|
||||
where
|
||||
sources = toSources input
|
||||
|
||||
-- | Parse a string with a given parser and state
|
||||
readWith :: Parser Text st a
|
||||
readWith :: ToSources t
|
||||
=> Parser Sources st a
|
||||
-> st
|
||||
-> Text
|
||||
-> t
|
||||
-> Either PandocError a
|
||||
readWith p t inp = runIdentity $ readWithM p t inp
|
||||
|
||||
-- | Parse a string with @parser@ (for testing).
|
||||
testStringWith :: Show a
|
||||
=> ParserT Text ParserState Identity a
|
||||
=> ParserT Sources ParserState Identity a
|
||||
-> Text
|
||||
-> IO ()
|
||||
testStringWith parser str = UTF8.putStrLn $ tshow $
|
||||
readWith parser defaultParserState str
|
||||
readWith parser defaultParserState (toSources str)
|
||||
|
||||
-- | Parsing options.
|
||||
data ParserState = ParserState
|
||||
|
@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
|
|||
updateState $ updateIdentifierList $ Set.insert ident
|
||||
return (ident,classes,kvs)
|
||||
|
||||
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
|
||||
HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
smartPunctuation inlineParser = do
|
||||
guardEnabled Ext_smart
|
||||
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
|
||||
|
||||
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
|
||||
|
||||
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
singleQuoted inlineParser = do
|
||||
|
@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
|
|||
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
|
||||
<|> pure "\8217"
|
||||
|
||||
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
|
||||
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
-> ParserT s st m Inlines
|
||||
doubleQuoted inlineParser = do
|
||||
|
@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
|
|||
context' <- getQuoteContext
|
||||
when (context' == context) $ Prelude.fail "already inside quotes"
|
||||
|
||||
charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
|
||||
charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
|
||||
charOrRef cs =
|
||||
oneOf cs <|> try (do c <- characterReference
|
||||
guard (c `elem` cs)
|
||||
return c)
|
||||
|
||||
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
|
||||
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
singleQuoteStart = do
|
||||
failIfInQuoteContext InSingleQuote
|
||||
|
@ -1449,7 +1548,7 @@ singleQuoteStart = do
|
|||
charOrRef "'\8216\145"
|
||||
void $ lookAhead (satisfy (not . isSpaceChar))
|
||||
|
||||
singleQuoteEnd :: Stream s m Char
|
||||
singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
singleQuoteEnd = try $ do
|
||||
charOrRef "'\8217\146"
|
||||
|
@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
|
|||
|
||||
doubleQuoteStart :: (HasLastStrPosition st,
|
||||
HasQuoteContext st m,
|
||||
Stream s m Char)
|
||||
Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
doubleQuoteStart = do
|
||||
failIfInQuoteContext InDoubleQuote
|
||||
|
@ -1465,21 +1564,21 @@ doubleQuoteStart = do
|
|||
try $ do charOrRef "\"\8220\147"
|
||||
void $ lookAhead (satisfy (not . isSpaceChar))
|
||||
|
||||
doubleQuoteEnd :: Stream s m Char
|
||||
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m ()
|
||||
doubleQuoteEnd = void (charOrRef "\"\8221\148")
|
||||
|
||||
apostrophe :: Stream s m Char => ParserT s st m Inlines
|
||||
apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
|
||||
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
|
||||
|
||||
doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
|
||||
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
|
||||
doubleCloseQuote = B.str "\8221" <$ char '"'
|
||||
|
||||
ellipses :: Stream s m Char
|
||||
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
ellipses = try (string "..." >> return (B.str "\8230"))
|
||||
|
||||
dash :: (HasReaderOptions st, Stream s m Char)
|
||||
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParserT s st m Inlines
|
||||
dash = try $ do
|
||||
oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
|
||||
|
@ -1506,7 +1605,7 @@ nested p = do
|
|||
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
|
||||
return res
|
||||
|
||||
citeKey :: (Stream s m Char, HasLastStrPosition st)
|
||||
citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
|
||||
=> ParserT s st m (Bool, Text)
|
||||
citeKey = try $ do
|
||||
guard =<< notAfterString
|
||||
|
@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
|
|||
insertIncludedFile blocks totoks dirs f =
|
||||
runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
|
||||
|
||||
-- TODO: replace this with something using addToSources.
|
||||
-- | Parse content of include file as future blocks. Circular includes result in
|
||||
-- an @PandocParseError@.
|
||||
insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
|
||||
=> ParserT Text st m (Future st Blocks)
|
||||
=> ParserT Sources st m (Future st Blocks)
|
||||
-> [FilePath] -> FilePath
|
||||
-> ParserT Text st m (Future st Blocks)
|
||||
insertIncludedFileF p = insertIncludedFile' p id
|
||||
-> ParserT Sources st m (Future st Blocks)
|
||||
insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{-# LANGUAGE MonoLocalBinds #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
|
@ -72,6 +73,7 @@ import Text.Pandoc.Error
|
|||
import Text.Pandoc.Extensions
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Readers.CommonMark
|
||||
import Text.Pandoc.Readers.Markdown
|
||||
import Text.Pandoc.Readers.Creole
|
||||
import Text.Pandoc.Readers.DocBook
|
||||
import Text.Pandoc.Readers.Docx
|
||||
|
@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
|
|||
import Text.Pandoc.Readers.JATS (readJATS)
|
||||
import Text.Pandoc.Readers.Jira (readJira)
|
||||
import Text.Pandoc.Readers.LaTeX
|
||||
import Text.Pandoc.Readers.Markdown
|
||||
import Text.Pandoc.Readers.MediaWiki
|
||||
import Text.Pandoc.Readers.Muse
|
||||
import Text.Pandoc.Readers.Native
|
||||
|
@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
|
|||
import Text.Pandoc.Readers.CslJson
|
||||
import Text.Pandoc.Readers.BibTeX
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
|
||||
data Reader m = TextReader (forall a . ToSources a =>
|
||||
ReaderOptions -> a -> m Pandoc)
|
||||
| ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
|
||||
|
||||
-- | Association list of formats and readers.
|
||||
readers :: PandocMonad m => [(Text, Reader m)]
|
||||
readers = [ ("native" , TextReader readNative)
|
||||
,("json" , TextReader readJSON)
|
||||
,("markdown" , TextReader readMarkdown)
|
||||
,("markdown_strict" , TextReader readMarkdown)
|
||||
,("markdown_phpextra" , TextReader readMarkdown)
|
||||
,("markdown_github" , TextReader readMarkdown)
|
||||
,("markdown_mmd", TextReader readMarkdown)
|
||||
,("commonmark" , TextReader readCommonMark)
|
||||
,("commonmark_x" , TextReader readCommonMark)
|
||||
,("creole" , TextReader readCreole)
|
||||
,("dokuwiki" , TextReader readDokuWiki)
|
||||
,("gfm" , TextReader readCommonMark)
|
||||
,("rst" , TextReader readRST)
|
||||
,("mediawiki" , TextReader readMediaWiki)
|
||||
,("vimwiki" , TextReader readVimwiki)
|
||||
,("docbook" , TextReader readDocBook)
|
||||
,("opml" , TextReader readOPML)
|
||||
,("org" , TextReader readOrg)
|
||||
,("textile" , TextReader readTextile) -- TODO : textile+lhs
|
||||
,("html" , TextReader readHtml)
|
||||
,("jats" , TextReader readJATS)
|
||||
,("jira" , TextReader readJira)
|
||||
,("latex" , TextReader readLaTeX)
|
||||
,("haddock" , TextReader readHaddock)
|
||||
,("twiki" , TextReader readTWiki)
|
||||
,("tikiwiki" , TextReader readTikiWiki)
|
||||
,("docx" , ByteStringReader readDocx)
|
||||
,("odt" , ByteStringReader readOdt)
|
||||
,("t2t" , TextReader readTxt2Tags)
|
||||
,("epub" , ByteStringReader readEPUB)
|
||||
,("muse" , TextReader readMuse)
|
||||
,("man" , TextReader readMan)
|
||||
,("fb2" , TextReader readFB2)
|
||||
,("ipynb" , TextReader readIpynb)
|
||||
,("csv" , TextReader readCSV)
|
||||
,("csljson" , TextReader readCslJson)
|
||||
,("bibtex" , TextReader readBibTeX)
|
||||
,("biblatex" , TextReader readBibLaTeX)
|
||||
readers = [("native" , TextReader readNative)
|
||||
,("json" , TextReader readJSON)
|
||||
,("markdown" , TextReader readMarkdown)
|
||||
,("markdown_strict" , TextReader readMarkdown)
|
||||
,("markdown_phpextra" , TextReader readMarkdown)
|
||||
,("markdown_github" , TextReader readMarkdown)
|
||||
,("markdown_mmd", TextReader readMarkdown)
|
||||
,("commonmark" , TextReader readCommonMark)
|
||||
,("commonmark_x" , TextReader readCommonMark)
|
||||
,("creole" , TextReader readCreole)
|
||||
,("dokuwiki" , TextReader readDokuWiki)
|
||||
,("gfm" , TextReader readCommonMark)
|
||||
,("rst" , TextReader readRST)
|
||||
,("mediawiki" , TextReader readMediaWiki)
|
||||
,("vimwiki" , TextReader readVimwiki)
|
||||
,("docbook" , TextReader readDocBook)
|
||||
,("opml" , TextReader readOPML)
|
||||
,("org" , TextReader readOrg)
|
||||
,("textile" , TextReader readTextile) -- TODO : textile+lhs
|
||||
,("html" , TextReader readHtml)
|
||||
,("jats" , TextReader readJATS)
|
||||
,("jira" , TextReader readJira)
|
||||
,("latex" , TextReader readLaTeX)
|
||||
,("haddock" , TextReader readHaddock)
|
||||
,("twiki" , TextReader readTWiki)
|
||||
,("tikiwiki" , TextReader readTikiWiki)
|
||||
,("docx" , ByteStringReader readDocx)
|
||||
,("odt" , ByteStringReader readOdt)
|
||||
,("t2t" , TextReader readTxt2Tags)
|
||||
,("epub" , ByteStringReader readEPUB)
|
||||
,("muse" , TextReader readMuse)
|
||||
,("man" , TextReader readMan)
|
||||
,("fb2" , TextReader readFB2)
|
||||
,("ipynb" , TextReader readIpynb)
|
||||
,("csv" , TextReader readCSV)
|
||||
,("csljson" , TextReader readCslJson)
|
||||
,("bibtex" , TextReader readBibTeX)
|
||||
,("biblatex" , TextReader readBibLaTeX)
|
||||
]
|
||||
|
||||
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
|
||||
|
@ -173,9 +176,13 @@ getReader s =
|
|||
return (r, exts)
|
||||
|
||||
-- | Read pandoc document from JSON format.
|
||||
readJSON :: PandocMonad m
|
||||
=> ReaderOptions -> Text -> m Pandoc
|
||||
readJSON _ t =
|
||||
case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
|
||||
readJSON :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readJSON _ s =
|
||||
case eitherDecode' . BL.fromStrict . UTF8.fromText
|
||||
. sourcesToText . toSources $ s of
|
||||
Right doc -> return doc
|
||||
Left e -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
|
||||
Left e -> throwError $ PandocParseError ("JSON parse error: "
|
||||
<> T.pack e)
|
||||
|
|
|
@ -23,30 +23,33 @@ where
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Builder (setMeta, cite, str)
|
||||
import Data.Text (Text)
|
||||
import Citeproc (Lang(..), parseLang)
|
||||
import Citeproc.Locale (getLocale)
|
||||
import Text.Pandoc.Error (PandocError(..))
|
||||
import Text.Pandoc.Class (PandocMonad, lookupEnv)
|
||||
import Text.Pandoc.Citeproc.BibTeX as BibTeX
|
||||
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
|
||||
import Text.Pandoc.Sources (ToSources(..))
|
||||
import Control.Monad.Except (throwError)
|
||||
|
||||
-- | Read BibTeX from an input string and return a Pandoc document.
|
||||
-- The document will have only metadata, with an empty body.
|
||||
-- The metadata will contain a `references` field with the
|
||||
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||
readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readBibTeX :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readBibTeX = readBibTeX' BibTeX.Bibtex
|
||||
|
||||
-- | Read BibLaTeX from an input string and return a Pandoc document.
|
||||
-- The document will have only metadata, with an empty body.
|
||||
-- The metadata will contain a `references` field with the
|
||||
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||
readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readBibLaTeX :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readBibLaTeX = readBibTeX' BibTeX.Biblatex
|
||||
|
||||
readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
|
||||
readBibTeX' :: (PandocMonad m, ToSources a)
|
||||
=> Variant -> ReaderOptions -> a -> m Pandoc
|
||||
readBibTeX' variant _opts t = do
|
||||
mblangEnv <- lookupEnv "LANG"
|
||||
let defaultLang = Lang "en" Nothing (Just "US") [] [] []
|
||||
|
@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
|
|||
Left _ -> throwError $ PandocCiteprocError e
|
||||
Right l -> return l
|
||||
case BibTeX.readBibtexString variant locale (const True) t of
|
||||
Left e -> throwError $ PandocParsecError t e
|
||||
Left e -> throwError $ PandocParsecError (toSources t) e
|
||||
Right refs -> return $ setMeta "references"
|
||||
(map referenceToMetaValue refs)
|
||||
. setMeta "nocite"
|
||||
|
|
|
@ -13,23 +13,23 @@
|
|||
Conversion from CSV to a 'Pandoc' table.
|
||||
-}
|
||||
module Text.Pandoc.Readers.CSV ( readCSV ) where
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
|
||||
import Text.Pandoc.Definition
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Shared (crFilter)
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
import Text.Pandoc.Options (ReaderOptions)
|
||||
import Control.Monad.Except (throwError)
|
||||
|
||||
readCSV :: PandocMonad m
|
||||
readCSV :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readCSV _opts s =
|
||||
case parseCSV defaultCSVOptions (crFilter s) of
|
||||
readCSV _opts s = do
|
||||
let txt = sourcesToText $ toSources s
|
||||
case parseCSV defaultCSVOptions txt of
|
||||
Right (r:rs) -> return $ B.doc $ B.table capt
|
||||
(zip aligns widths)
|
||||
(TableHead nullAttr hdrs)
|
||||
|
@ -45,4 +45,4 @@ readCSV _opts s =
|
|||
aligns = replicate numcols AlignDefault
|
||||
widths = replicate numcols ColWidthDefault
|
||||
Right [] -> return $ B.doc mempty
|
||||
Left e -> throwError $ PandocParsecError s e
|
||||
Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e
|
||||
|
|
|
@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
|
|||
import Control.Monad.Except
|
||||
import Data.Functor.Identity (runIdentity)
|
||||
import Data.Typeable
|
||||
import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
|
||||
runF, defaultParserState, take1WhileP, option)
|
||||
import Text.Pandoc.Parsing (runParserT, getPosition,
|
||||
runF, defaultParserState, option, many1, anyChar,
|
||||
Sources(..), ToSources(..), ParserT, Future,
|
||||
sourceName)
|
||||
import qualified Data.Text as T
|
||||
|
||||
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
|
||||
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readCommonMark :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readCommonMark opts s
|
||||
| isEnabled Ext_yaml_metadata_block opts
|
||||
, "---" `T.isPrefixOf` s = do
|
||||
let metaValueParser = do
|
||||
inp <- option "" $ take1WhileP (const True)
|
||||
case runIdentity
|
||||
(commonmarkWith (specFor opts) "metadata value" inp) of
|
||||
Left _ -> mzero
|
||||
Right (Cm bls :: Cm () Blocks)
|
||||
-> return $ return $ B.toMetaValue bls
|
||||
res <- runParserT (do meta <- yamlMetaBlock metaValueParser
|
||||
pos <- getPosition
|
||||
return (meta, pos))
|
||||
defaultParserState "YAML metadata" s
|
||||
case res of
|
||||
Left _ -> readCommonMarkBody opts s
|
||||
Right (meta, pos) -> do
|
||||
let dropLines 0 = id
|
||||
dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
|
||||
let metaLines = sourceLine pos - 1
|
||||
let body = T.replicate metaLines "\n" <> dropLines metaLines s
|
||||
Pandoc _ bs <- readCommonMarkBody opts body
|
||||
return $ Pandoc (runF meta defaultParserState) bs
|
||||
| otherwise = readCommonMarkBody opts s
|
||||
| isEnabled Ext_yaml_metadata_block opts = do
|
||||
let sources = toSources s
|
||||
let toks = concatMap sourceToToks (unSources sources)
|
||||
res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
|
||||
pos <- getPosition
|
||||
return (meta, pos))
|
||||
defaultParserState "YAML metadata" (toSources s)
|
||||
case res of
|
||||
Left _ -> readCommonMarkBody opts sources toks
|
||||
Right (meta, pos) -> do
|
||||
-- strip off metadata section and parse body
|
||||
let body = dropWhile (\t -> tokPos t < pos) toks
|
||||
Pandoc _ bs <- readCommonMarkBody opts sources body
|
||||
return $ Pandoc (runF meta defaultParserState) bs
|
||||
| otherwise = do
|
||||
let sources = toSources s
|
||||
let toks = concatMap sourceToToks (unSources sources)
|
||||
readCommonMarkBody opts sources toks
|
||||
|
||||
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readCommonMarkBody opts s
|
||||
sourceToToks :: (SourcePos, Text) -> [Tok]
|
||||
sourceToToks (pos, s) = tokenize (sourceName pos) s
|
||||
|
||||
metaValueParser :: Monad m
|
||||
=> ReaderOptions -> ParserT Sources st m (Future st MetaValue)
|
||||
metaValueParser opts = do
|
||||
inp <- option "" $ T.pack <$> many1 anyChar
|
||||
let toks = concatMap sourceToToks (unSources (toSources inp))
|
||||
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
|
||||
Left _ -> mzero
|
||||
Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
|
||||
|
||||
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
|
||||
readCommonMarkBody opts s toks
|
||||
| isEnabled Ext_sourcepos opts =
|
||||
case runIdentity (commonmarkWith (specFor opts) "" s) of
|
||||
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
|
||||
Left err -> throwError $ PandocParsecError s err
|
||||
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
|
||||
| otherwise =
|
||||
case runIdentity (commonmarkWith (specFor opts) "" s) of
|
||||
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
|
||||
Left err -> throwError $ PandocParsecError s err
|
||||
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
|
||||
|
||||
|
|
|
@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (enclosed)
|
||||
import Text.Pandoc.Shared (crFilter)
|
||||
|
||||
|
||||
-- | Read creole from an input string and return a Pandoc document.
|
||||
readCreole :: PandocMonad m
|
||||
readCreole :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readCreole opts s = do
|
||||
res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
|
||||
let sources = ensureFinalNewlines 2 (toSources s)
|
||||
res <- readWithM parseCreole def{ stateOptions = opts } sources
|
||||
case res of
|
||||
Left e -> throwError e
|
||||
Right d -> return d
|
||||
|
||||
type CRLParser = ParserT Text ParserState
|
||||
type CRLParser = ParserT Sources ParserState
|
||||
|
||||
--
|
||||
-- Utility functions
|
||||
|
|
|
@ -24,21 +24,22 @@ import Text.Pandoc.Options
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Builder (setMeta, cite, str)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Text.Pandoc.Error (PandocError(..))
|
||||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
|
||||
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
|
||||
import Control.Monad.Except (throwError)
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
-- | Read CSL JSON from an input string and return a Pandoc document.
|
||||
-- The document will have only metadata, with an empty body.
|
||||
-- The metadata will contain a `references` field with the
|
||||
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||
readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readCslJson _opts t =
|
||||
case cslJsonToReferences (UTF8.fromText t) of
|
||||
readCslJson :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readCslJson _opts x =
|
||||
case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
|
||||
Left e -> throwError $ PandocParseError $ T.pack e
|
||||
Right refs -> return $ setMeta "references"
|
||||
(map referenceToMetaValue refs)
|
||||
|
|
|
@ -30,7 +30,8 @@ import Text.Pandoc.Builder
|
|||
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Logging (LogMessage(..))
|
||||
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
|
||||
import Text.Pandoc.Shared (safeRead, extractSpaces)
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
import Text.TeXMath (readMathML, writeTeX)
|
||||
import Text.Pandoc.XML.Light
|
||||
|
||||
|
@ -539,11 +540,15 @@ instance Default DBState where
|
|||
, dbContent = [] }
|
||||
|
||||
|
||||
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readDocBook :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readDocBook _ inp = do
|
||||
let sources = toSources inp
|
||||
tree <- either (throwError . PandocXMLError "") return $
|
||||
parseXMLContents
|
||||
(TL.fromStrict . handleInstructions $ crFilter inp)
|
||||
(TL.fromStrict . handleInstructions . sourcesToText $ sources)
|
||||
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
|
||||
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
|
||||
|
||||
|
|
|
@ -29,26 +29,27 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Error (PandocError (PandocParsecError))
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (enclosed, nested)
|
||||
import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
|
||||
import Text.Pandoc.Shared (trim, stringify, tshow)
|
||||
|
||||
-- | Read DokuWiki from an input string and return a Pandoc document.
|
||||
readDokuWiki :: PandocMonad m
|
||||
readDokuWiki :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readDokuWiki opts s = do
|
||||
let input = crFilter s
|
||||
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
|
||||
let sources = toSources s
|
||||
res <- runParserT parseDokuWiki def {stateOptions = opts }
|
||||
(initialSourceName sources) sources
|
||||
case res of
|
||||
Left e -> throwError $ PandocParsecError input e
|
||||
Left e -> throwError $ PandocParsecError sources e
|
||||
Right d -> return d
|
||||
|
||||
type DWParser = ParserT Text ParserState
|
||||
type DWParser = ParserT Sources ParserState
|
||||
|
||||
-- * Utility functions
|
||||
|
||||
-- | Parse end-of-line, which can be either a newline or end-of-file.
|
||||
eol :: Stream s m Char => ParserT s st m ()
|
||||
eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
|
||||
eol = void newline <|> eof
|
||||
|
||||
nested :: PandocMonad m => DWParser m a -> DWParser m a
|
||||
|
|
|
@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
|
|||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Shared (crFilter)
|
||||
import Text.Pandoc.XML.Light
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
type FB2 m = StateT FB2State m
|
||||
|
||||
|
@ -63,9 +63,12 @@ instance HasMeta FB2State where
|
|||
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
|
||||
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
|
||||
|
||||
readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readFB2 :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readFB2 _ inp =
|
||||
case parseXMLElement $ TL.fromStrict $ crFilter inp of
|
||||
case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
|
||||
Left msg -> throwError $ PandocXMLError "" msg
|
||||
Right el -> do
|
||||
(bs, st) <- runStateT (parseRootElement el) def
|
||||
|
|
|
@ -62,21 +62,21 @@ import Text.Pandoc.Options (
|
|||
extensionEnabled)
|
||||
import Text.Pandoc.Parsing hiding ((<|>))
|
||||
import Text.Pandoc.Shared (
|
||||
addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
|
||||
addMetaField, blocksToInlines', escapeURI, extractSpaces,
|
||||
htmlSpanLikeElements, renderTags', safeRead, tshow)
|
||||
import Text.Pandoc.Walk
|
||||
import Text.Parsec.Error
|
||||
import Text.TeXMath (readMathML, writeTeX)
|
||||
|
||||
-- | Convert HTML-formatted string to 'Pandoc' document.
|
||||
readHtml :: PandocMonad m
|
||||
readHtml :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
|
||||
-> a -- ^ Input to parse
|
||||
-> m Pandoc
|
||||
readHtml opts inp = do
|
||||
let tags = stripPrefixes $ canonicalizeTags $
|
||||
parseTagsOptions parseOptions{ optTagPosition = True }
|
||||
(crFilter inp)
|
||||
(sourcesToText $ toSources inp)
|
||||
parseDoc = do
|
||||
blocks <- fixPlains False . mconcat <$> manyTill block eof
|
||||
meta <- stateMeta . parserState <$> getState
|
||||
|
@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
|
|||
|
||||
pTagText :: PandocMonad m => TagParser m Inlines
|
||||
pTagText = try $ do
|
||||
pos <- getPosition
|
||||
(TagText str) <- pSatisfy isTagText
|
||||
st <- getState
|
||||
qu <- ask
|
||||
parsed <- lift $ lift $
|
||||
flip runReaderT qu $ runParserT (many pTagContents) st "text" str
|
||||
flip runReaderT qu $ runParserT (many pTagContents) st "text"
|
||||
(Sources [(pos, str)])
|
||||
case parsed of
|
||||
Left _ -> throwError $ PandocParseError $
|
||||
"Could not parse `" <> str <> "'"
|
||||
Right result -> return $ mconcat result
|
||||
|
||||
type InlinesParser m = HTMLParser m Text
|
||||
type InlinesParser m = HTMLParser m Sources
|
||||
|
||||
pTagContents :: PandocMonad m => InlinesParser m Inlines
|
||||
pTagContents =
|
||||
|
@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
|
|||
-- | Matches a stretch of HTML in balanced tags.
|
||||
htmlInBalanced :: Monad m
|
||||
=> (Tag Text -> Bool)
|
||||
-> ParserT Text st m Text
|
||||
-> ParserT Sources st m Text
|
||||
htmlInBalanced f = try $ do
|
||||
lookAhead (char '<')
|
||||
inp <- getInput
|
||||
let ts = canonicalizeTags $
|
||||
parseTagsOptions parseOptions{ optTagWarning = True,
|
||||
optTagPosition = True } inp
|
||||
sources <- getInput
|
||||
let ts = canonicalizeTags
|
||||
$ parseTagsOptions parseOptions{ optTagWarning = True,
|
||||
optTagPosition = True }
|
||||
$ sourcesToText sources
|
||||
case ts of
|
||||
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
|
||||
guard $ f t
|
||||
|
@ -1018,15 +1021,17 @@ hasTagWarning _ = False
|
|||
-- | Matches a tag meeting a certain condition.
|
||||
htmlTag :: (HasReaderOptions st, Monad m)
|
||||
=> (Tag Text -> Bool)
|
||||
-> ParserT Text st m (Tag Text, Text)
|
||||
-> ParserT Sources st m (Tag Text, Text)
|
||||
htmlTag f = try $ do
|
||||
lookAhead (char '<')
|
||||
startpos <- getPosition
|
||||
inp <- getInput
|
||||
sources <- getInput
|
||||
let inp = sourcesToText sources
|
||||
let ts = canonicalizeTags $ parseTagsOptions
|
||||
parseOptions{ optTagWarning = False
|
||||
, optTagPosition = True }
|
||||
(inp <> " ") -- add space to ensure that
|
||||
(inp <> " ")
|
||||
-- add space to ensure that
|
||||
-- we get a TagPosition after the tag
|
||||
(next, ln, col) <- case ts of
|
||||
(TagPosition{} : next : TagPosition ln col : _)
|
||||
|
|
|
@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
|
|||
import Data.List (intersperse)
|
||||
import Data.List.NonEmpty (nonEmpty)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Text (Text, unpack)
|
||||
import Data.Text (unpack)
|
||||
import qualified Data.Text as T
|
||||
import Documentation.Haddock.Parser
|
||||
import Documentation.Haddock.Types as H
|
||||
|
@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
import Text.Pandoc.Shared (splitTextBy, trim)
|
||||
|
||||
|
||||
-- | Parse Haddock markup and return a 'Pandoc' document.
|
||||
readHaddock :: PandocMonad m
|
||||
readHaddock :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
|
||||
readHaddock opts s = case readHaddockEither opts
|
||||
(unpack . sourcesToText . toSources $ s) of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
||||
|
|
|
@ -39,10 +39,12 @@ import Data.Aeson as Aeson
|
|||
import Control.Monad.Except (throwError)
|
||||
import Text.Pandoc.Readers.Markdown (readMarkdown)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readIpynb opts t = do
|
||||
let src = BL.fromStrict (TE.encodeUtf8 t)
|
||||
readIpynb :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readIpynb opts x = do
|
||||
let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
|
||||
case eitherDecode src of
|
||||
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
|
||||
Left _ ->
|
||||
|
|
|
@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
|
|||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc.Class.PandocMonad (PandocMonad)
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
|
||||
import Text.Pandoc.Shared (safeRead, extractSpaces)
|
||||
import Text.TeXMath (readMathML, writeTeX)
|
||||
import Text.Pandoc.XML.Light
|
||||
import qualified Data.Set as S (fromList, member)
|
||||
import Data.Set ((\\))
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
type JATS m = StateT JATSState m
|
||||
|
||||
|
@ -52,10 +53,14 @@ instance Default JATSState where
|
|||
, jatsContent = [] }
|
||||
|
||||
|
||||
readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readJATS :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readJATS _ inp = do
|
||||
let sources = toSources inp
|
||||
tree <- either (throwError . PandocXMLError "") return $
|
||||
parseXMLContents (TL.fromStrict $ crFilter inp)
|
||||
parseXMLContents (TL.fromStrict . sourcesToText $ sources)
|
||||
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
|
||||
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
|
||||
|
||||
|
|
|
@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
|
|||
import Text.Pandoc.Error (PandocError (PandocParseError))
|
||||
import Text.Pandoc.Options (ReaderOptions)
|
||||
import Text.Pandoc.Shared (stringify)
|
||||
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
import qualified Text.Jira.Markup as Jira
|
||||
|
||||
-- | Read Jira wiki markup.
|
||||
readJira :: PandocMonad m
|
||||
readJira :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readJira _opts s = case parse s of
|
||||
Right d -> return $ jiraToPandoc d
|
||||
Left e -> throwError . PandocParseError $
|
||||
"Jira parse error" `append` pack (show e)
|
||||
readJira _opts inp = do
|
||||
let sources = toSources inp
|
||||
case parse (sourcesToText sources) of
|
||||
Right d -> return $ jiraToPandoc d
|
||||
Left e -> throwError . PandocParseError $
|
||||
"Jira parse error" `append` pack (show e)
|
||||
|
||||
jiraToPandoc :: Jira.Doc -> Pandoc
|
||||
jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
|
||||
|
|
|
@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
|
|||
-- import Debug.Trace (traceShowId)
|
||||
|
||||
-- | Parse LaTeX from string and return 'Pandoc' document.
|
||||
readLaTeX :: PandocMonad m
|
||||
readLaTeX :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
|
||||
-> a -- ^ Input to parse
|
||||
-> m Pandoc
|
||||
readLaTeX opts ltx = do
|
||||
let sources = toSources ltx
|
||||
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
|
||||
(tokenize "source" (crFilter ltx))
|
||||
(tokenizeSources sources)
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError ltx e
|
||||
Left e -> throwError $ PandocParsecError sources e
|
||||
|
||||
parseLaTeX :: PandocMonad m => LP m Pandoc
|
||||
parseLaTeX = do
|
||||
|
@ -132,11 +133,11 @@ resolveRefs _ x = x
|
|||
|
||||
|
||||
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
|
||||
=> ParserT Text s m Text
|
||||
=> ParserT Sources s m Text
|
||||
rawLaTeXBlock = do
|
||||
lookAhead (try (char '\\' >> letter))
|
||||
inp <- getInput
|
||||
let toks = tokenize "source" inp
|
||||
let toks = tokenizeSources inp
|
||||
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
|
||||
<|> rawLaTeXParser toks True
|
||||
(do choice (map controlSeq
|
||||
|
@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
|
|||
(txt <> untokenize rawargs)
|
||||
|
||||
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
|
||||
=> ParserT Text s m Text
|
||||
=> ParserT Sources s m Text
|
||||
rawLaTeXInline = do
|
||||
lookAhead (try (char '\\' >> letter))
|
||||
inp <- getInput
|
||||
let toks = tokenize "source" inp
|
||||
let toks = tokenizeSources inp
|
||||
raw <- snd <$>
|
||||
( rawLaTeXParser toks True
|
||||
(mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
|
||||
|
@ -178,11 +179,11 @@ rawLaTeXInline = do
|
|||
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
|
||||
return $ raw <> T.pack finalbraces
|
||||
|
||||
inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
|
||||
inlineCommand = do
|
||||
lookAhead (try (char '\\' >> letter))
|
||||
inp <- getInput
|
||||
let toks = tokenize "source" inp
|
||||
let toks = tokenizeSources inp
|
||||
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
|
||||
inlines
|
||||
|
||||
|
@ -641,7 +642,7 @@ opt = do
|
|||
parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError (untokenize toks) e
|
||||
Left e -> throwError $ PandocParsecError (toSources toks) e
|
||||
|
||||
-- block elements:
|
||||
|
||||
|
|
|
@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
|
|||
runParserT (mconcat <$> many inline) st "bracketed option" toks
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError (untokenize toks) e
|
||||
Left e -> throwError $ PandocParsecError (toSources toks) e
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
|
|||
, rawLaTeXParser
|
||||
, applyMacros
|
||||
, tokenize
|
||||
, tokenizeSources
|
||||
, untokenize
|
||||
, untoken
|
||||
, totoks
|
||||
|
@ -248,7 +249,7 @@ withVerbatimMode parser = do
|
|||
|
||||
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
|
||||
=> [Tok] -> Bool -> LP m a -> LP m a
|
||||
-> ParserT Text s m (a, Text)
|
||||
-> ParserT Sources s m (a, Text)
|
||||
rawLaTeXParser toks retokenize parser valParser = do
|
||||
pstate <- getState
|
||||
let lstate = def{ sOptions = extractReaderOptions pstate }
|
||||
|
@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
|
|||
Left _ -> mzero
|
||||
Right ((val, raw), st) -> do
|
||||
updateState (updateMacros (sMacros st <>))
|
||||
_ <- takeP (T.length (untokenize toks'))
|
||||
void $ count (T.length (untokenize toks')) anyChar
|
||||
let result = untokenize raw
|
||||
-- ensure we end with space if input did, see #4442
|
||||
let result' =
|
||||
|
@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
|
|||
return (val, result')
|
||||
|
||||
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
|
||||
=> Text -> ParserT Text s m Text
|
||||
=> Text -> ParserT Sources s m Text
|
||||
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
|
||||
do let retokenize = untokenize <$> many (satisfyTok (const True))
|
||||
pstate <- getState
|
||||
|
@ -301,6 +302,11 @@ QuickCheck property:
|
|||
> let t = T.pack s in untokenize (tokenize "random" t) == t
|
||||
-}
|
||||
|
||||
tokenizeSources :: Sources -> [Tok]
|
||||
tokenizeSources = concatMap tokenizeSource . unSources
|
||||
where
|
||||
tokenizeSource (pos, t) = totoks pos t
|
||||
|
||||
tokenize :: SourceName -> Text -> [Tok]
|
||||
tokenize sourcename = totoks (initialPos sourcename)
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.LaTeX.Types
|
||||
Copyright : Copyright (C) 2017-2021 John MacFarlane
|
||||
|
@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
|
|||
)
|
||||
where
|
||||
import Data.Text (Text)
|
||||
import Text.Parsec.Pos (SourcePos)
|
||||
import Text.Parsec.Pos (SourcePos, sourceName)
|
||||
import Text.Pandoc.Sources
|
||||
import Data.List (groupBy)
|
||||
|
||||
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
|
||||
Esc1 | Esc2 | Arg Int
|
||||
|
@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
|
|||
data Tok = Tok SourcePos TokType Text
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instance ToSources [Tok] where
|
||||
toSources = Sources
|
||||
. map (\ts -> case ts of
|
||||
Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
|
||||
_ -> error "toSources [Tok] encountered empty group")
|
||||
. groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
|
||||
|
||||
tokToText :: Tok -> Text
|
||||
tokToText (Tok _ _ t) = t
|
||||
|
||||
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
|
|||
import Control.Monad.Trans (lift)
|
||||
import Control.Monad.Except (throwError)
|
||||
import Data.Maybe (catMaybes, isJust)
|
||||
import Data.List (intersperse, intercalate)
|
||||
import Data.List (intersperse)
|
||||
import qualified Data.Text as T
|
||||
import Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
|
||||
|
@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Walk (query)
|
||||
import Text.Pandoc.Shared (crFilter, mapLeft)
|
||||
import Text.Pandoc.Shared (mapLeft)
|
||||
import Text.Pandoc.Readers.Roff -- TODO explicit imports
|
||||
import Text.Parsec hiding (tokenPrim)
|
||||
import qualified Text.Parsec as Parsec
|
||||
import Text.Parsec.Pos (updatePosString)
|
||||
import qualified Data.Foldable as Foldable
|
||||
|
@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
|
|||
|
||||
|
||||
-- | Read man (troff) from an input string and return a Pandoc document.
|
||||
readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
|
||||
readMan opts txt = do
|
||||
tokenz <- lexRoff (initialPos "input") (crFilter txt)
|
||||
readMan :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readMan opts s = do
|
||||
let Sources inps = toSources s
|
||||
tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
|
||||
let state = def {readerOptions = opts} :: ManState
|
||||
let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
|
||||
fixError e = e
|
||||
eitherdoc <- readWithMTokens parseMan state
|
||||
(Foldable.toList . unRoffTokens $ tokenz)
|
||||
either throwError return eitherdoc
|
||||
either (throwError . fixError) return eitherdoc
|
||||
|
||||
|
||||
readWithMTokens :: PandocMonad m
|
||||
=> ParserT [RoffToken] ManState m a -- ^ parser
|
||||
|
@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
|
|||
-> [RoffToken] -- ^ input
|
||||
-> m (Either PandocError a)
|
||||
readWithMTokens parser state input =
|
||||
let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
|
||||
let leftF = PandocParsecError mempty
|
||||
in mapLeft leftF `liftM` runParserT parser state "source" input
|
||||
|
||||
|
||||
parseMan :: PandocMonad m => ManParser m Pandoc
|
||||
parseMan = do
|
||||
bs <- many parseBlock <* eof
|
||||
|
@ -89,7 +96,7 @@ parseBlock = choice [ parseList
|
|||
|
||||
parseTable :: PandocMonad m => ManParser m Blocks
|
||||
parseTable = do
|
||||
modifyState $ \st -> st { tableCellsPlain = True }
|
||||
updateState $ \st -> st { tableCellsPlain = True }
|
||||
let isTbl Tbl{} = True
|
||||
isTbl _ = False
|
||||
Tbl _opts rows pos <- msatisfy isTbl
|
||||
|
@ -135,7 +142,7 @@ parseTable = do
|
|||
case res' of
|
||||
Left _ -> Prelude.fail "Could not parse table cell"
|
||||
Right x -> do
|
||||
modifyState $ \s -> s{ tableCellsPlain = False }
|
||||
updateState $ \s -> s{ tableCellsPlain = False }
|
||||
return x
|
||||
Right x -> return x
|
||||
|
||||
|
@ -222,7 +229,7 @@ parseTitle = do
|
|||
setMeta "section" (linePartsToInlines y)
|
||||
[x] -> setMeta "title" (linePartsToInlines x)
|
||||
[] -> id
|
||||
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
|
||||
updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
|
||||
return mempty
|
||||
|
||||
linePartsToInlines :: [LinePart] -> Inlines
|
||||
|
|
|
@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
|
|||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
|
||||
-- import Debug.Trace (traceShowId)
|
||||
|
||||
type MarkdownParser m = ParserT Text ParserState m
|
||||
type MarkdownParser m = ParserT Sources ParserState m
|
||||
|
||||
type F = Future ParserState
|
||||
|
||||
-- | Read markdown from an input string and return a Pandoc document.
|
||||
readMarkdown :: PandocMonad m
|
||||
readMarkdown :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> a -- ^ Input
|
||||
-> m Pandoc
|
||||
readMarkdown opts s = do
|
||||
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
|
||||
(crFilter s <> "\n\n")
|
||||
(ensureFinalNewlines 3 (toSources s))
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
|
|||
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
|
||||
setPosition oldPos
|
||||
return $ runF meta defaultParserState
|
||||
parsed <- readWithM parser def{ stateOptions = opts } ""
|
||||
parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
|
|||
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
|
||||
setPosition oldPos
|
||||
return $ runF refs defaultParserState
|
||||
parsed <- readWithM parser def{ stateOptions = opts } ""
|
||||
parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
@ -146,14 +147,14 @@ inList = do
|
|||
ctx <- stateParserContext <$> getState
|
||||
guard (ctx == ListItemState)
|
||||
|
||||
spnl :: PandocMonad m => ParserT Text st m ()
|
||||
spnl :: PandocMonad m => ParserT Sources st m ()
|
||||
spnl = try $ do
|
||||
skipSpaces
|
||||
optional newline
|
||||
skipSpaces
|
||||
notFollowedBy (char '\n')
|
||||
|
||||
spnl' :: PandocMonad m => ParserT Text st m Text
|
||||
spnl' :: PandocMonad m => ParserT Sources st m Text
|
||||
spnl' = try $ do
|
||||
xs <- many spaceChar
|
||||
ys <- option "" $ try $ (:) <$> newline
|
||||
|
@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
|
|||
-- hrule block
|
||||
--
|
||||
|
||||
hrule :: PandocMonad m => ParserT Text st m (F Blocks)
|
||||
hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
|
||||
hrule = try $ do
|
||||
skipSpaces
|
||||
start <- satisfy isHruleChar
|
||||
|
@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
|
|||
blockDelimiter :: PandocMonad m
|
||||
=> (Char -> Bool)
|
||||
-> Maybe Int
|
||||
-> ParserT Text ParserState m Int
|
||||
-> ParserT Sources ParserState m Int
|
||||
blockDelimiter f len = try $ do
|
||||
skipNonindentSpaces
|
||||
c <- lookAhead (satisfy f)
|
||||
|
@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
|
|||
blanklines
|
||||
return $ T.intercalate "\n" lns'
|
||||
|
||||
birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
|
||||
birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
|
||||
birdTrackLine c = try $ do
|
||||
char c
|
||||
-- allow html tags on left margin:
|
||||
|
@ -1025,7 +1026,7 @@ para = try $ do
|
|||
option (B.plain <$> result)
|
||||
$ try $ do
|
||||
newline
|
||||
(blanklines >> return mempty)
|
||||
(mempty <$ blanklines)
|
||||
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
|
||||
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
|
||||
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
|
||||
|
@ -1170,7 +1171,7 @@ lineBlock = do
|
|||
-- and the length including trailing space.
|
||||
dashedLine :: PandocMonad m
|
||||
=> Char
|
||||
-> ParserT Text st m (Int, Int)
|
||||
-> ParserT Sources st m (Int, Int)
|
||||
dashedLine ch = do
|
||||
dashes <- many1 (char ch)
|
||||
sp <- many spaceChar
|
||||
|
@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
|
|||
-> MarkdownParser m [Text]
|
||||
rawTableLine indices = do
|
||||
notFollowedBy' (blanklines' <|> tableFooter)
|
||||
line <- take1WhileP (/='\n') <* newline
|
||||
line <- anyLine
|
||||
return $ map trim $ tail $
|
||||
splitTextByIndices (init indices) line
|
||||
|
||||
|
@ -1390,7 +1391,7 @@ pipeTableCell =
|
|||
return $ B.plain <$> result)
|
||||
<|> return mempty
|
||||
|
||||
pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
|
||||
pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
|
||||
pipeTableHeaderPart = try $ do
|
||||
skipMany spaceChar
|
||||
left <- optionMaybe (char ':')
|
||||
|
@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
|
|||
(Just _,Just _) -> AlignCenter, len)
|
||||
|
||||
-- Succeed only if current line contains a pipe.
|
||||
scanForPipe :: PandocMonad m => ParserT Text st m ()
|
||||
scanForPipe :: PandocMonad m => ParserT Sources st m ()
|
||||
scanForPipe = do
|
||||
inp <- getInput
|
||||
case T.break (\c -> c == '\n' || c == '|') inp of
|
||||
Sources inps <- getInput
|
||||
let ln = case inps of
|
||||
[] -> ""
|
||||
((_,t):(_,t'):_) | T.null t -> t'
|
||||
((_,t):_) -> t
|
||||
case T.break (\c -> c == '\n' || c == '|') ln of
|
||||
(_, T.uncons -> Just ('|', _)) -> return ()
|
||||
_ -> mzero
|
||||
|
||||
|
@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
|
|||
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
|
||||
regsp = skipMany spaceChar >> return B.space
|
||||
|
||||
nonEndline :: PandocMonad m => ParserT Text st m Char
|
||||
nonEndline :: PandocMonad m => ParserT Sources st m Char
|
||||
nonEndline = satisfy (/='\n')
|
||||
|
||||
str :: PandocMonad m => MarkdownParser m (F Inlines)
|
||||
str = do
|
||||
result <- mconcat <$> many1
|
||||
( take1WhileP isAlphaNum
|
||||
( T.pack <$> (many1 alphaNum)
|
||||
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
|
||||
updateLastStrPos
|
||||
(do guardEnabled Ext_smart
|
||||
|
@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
|
|||
s <- rawLaTeXInline
|
||||
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
|
||||
|
||||
rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
|
||||
rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
|
||||
rawConTeXtEnvironment = try $ do
|
||||
string "\\start"
|
||||
completion <- inBrackets (letter <|> digit <|> spaceChar)
|
||||
|
@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
|
|||
(try $ string "\\stop" >> textStr completion)
|
||||
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
|
||||
|
||||
inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
|
||||
inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
|
||||
inBrackets parser = do
|
||||
char '['
|
||||
contents <- manyChar parser
|
||||
|
|
|
@ -36,17 +36,18 @@ import Text.Pandoc.Logging
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (nested)
|
||||
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
|
||||
import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
|
||||
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
|
||||
trim, splitTextBy, tshow)
|
||||
import Text.Pandoc.Walk (walk)
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
|
||||
-- | Read mediawiki from an input string and return a Pandoc document.
|
||||
readMediaWiki :: PandocMonad m
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
readMediaWiki :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readMediaWiki opts s = do
|
||||
let sources = toSources s
|
||||
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
|
||||
, mwMaxNestingLevel = 4
|
||||
, mwNextLinkNumber = 1
|
||||
|
@ -55,7 +56,7 @@ readMediaWiki opts s = do
|
|||
, mwLogMessages = []
|
||||
, mwInTT = False
|
||||
}
|
||||
(crFilter s <> "\n")
|
||||
sources
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
|
|||
, mwInTT :: Bool
|
||||
}
|
||||
|
||||
type MWParser m = ParserT Text MWState m
|
||||
type MWParser m = ParserT Sources MWState m
|
||||
|
||||
instance HasReaderOptions MWState where
|
||||
extractReaderOptions = mwOptions
|
||||
|
|
|
@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
|
|||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
|
||||
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> BL.ByteString
|
||||
-> ParserT Text st m (Future st Meta)
|
||||
-> ParserT Sources st m (Future st Meta)
|
||||
yamlBsToMeta pMetaValue bstr = do
|
||||
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
|
||||
Right (YAML.Doc (YAML.Mapping _ _ o):_)
|
||||
|
@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
|
|||
|
||||
-- Returns filtered list of references.
|
||||
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> (Text -> Bool) -- ^ Filter for id
|
||||
-> BL.ByteString
|
||||
-> ParserT Text st m (Future st [MetaValue])
|
||||
-> ParserT Sources st m (Future st [MetaValue])
|
||||
yamlBsToRefs pMetaValue idpred bstr =
|
||||
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
|
||||
Right (YAML.Doc o@YAML.Mapping{}:_)
|
||||
|
@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
|
|||
nodeToKey _ = Nothing
|
||||
|
||||
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> Text
|
||||
-> ParserT Text st m (Future st MetaValue)
|
||||
-> ParserT Sources st m (Future st MetaValue)
|
||||
normalizeMetaValue pMetaValue x =
|
||||
-- Note: a standard quoted or unquoted YAML value will
|
||||
-- not end in a newline, but a "block" set off with
|
||||
|
@ -133,9 +133,9 @@ checkBoolean t
|
|||
| otherwise = Nothing
|
||||
|
||||
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> YAML.Node YE.Pos
|
||||
-> ParserT Text st m (Future st MetaValue)
|
||||
-> ParserT Sources st m (Future st MetaValue)
|
||||
yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
|
||||
case x of
|
||||
YAML.SStr t -> normalizeMetaValue pMetaValue t
|
||||
|
@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
|
|||
yamlToMetaValue _ _ = return $ return $ MetaString ""
|
||||
|
||||
yamlMap :: (PandocMonad m, HasLastStrPosition st)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
|
||||
-> ParserT Text st m (Future st (M.Map Text MetaValue))
|
||||
-> ParserT Sources st m (Future st (M.Map Text MetaValue))
|
||||
yamlMap pMetaValue o = do
|
||||
kvs <- forM (M.toList o) $ \(key, v) -> do
|
||||
k <- maybe (throwError $ PandocParseError
|
||||
|
@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
|
|||
|
||||
-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
|
||||
yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
|
||||
=> ParserT Text st m (Future st MetaValue)
|
||||
-> ParserT Text st m (Future st Meta)
|
||||
=> ParserT Sources st m (Future st MetaValue)
|
||||
-> ParserT Sources st m (Future st Meta)
|
||||
yamlMetaBlock parser = try $ do
|
||||
string "---"
|
||||
blankline
|
||||
|
@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
|
|||
optional blanklines
|
||||
yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
|
||||
|
||||
stopLine :: Monad m => ParserT Text st m ()
|
||||
stopLine :: Monad m => ParserT Sources st m ()
|
||||
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
|
||||
|
|
|
@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
|
|||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Shared (crFilter, trimr, tshow)
|
||||
import Text.Pandoc.Shared (trimr, tshow)
|
||||
|
||||
-- | Read Muse from an input string and return a Pandoc document.
|
||||
readMuse :: PandocMonad m
|
||||
readMuse :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readMuse opts s = do
|
||||
let input = crFilter s
|
||||
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
|
||||
let sources = toSources s
|
||||
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
|
||||
(initialSourceName sources) sources
|
||||
case res of
|
||||
Left e -> throwError $ PandocParsecError input e
|
||||
Left e -> throwError $ PandocParsecError sources e
|
||||
Right d -> return d
|
||||
|
||||
type F = Future MuseState
|
||||
|
@ -82,7 +83,7 @@ instance Default MuseEnv where
|
|||
, museInPara = False
|
||||
}
|
||||
|
||||
type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
|
||||
type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
|
||||
|
||||
instance HasReaderOptions MuseState where
|
||||
extractReaderOptions = museOptions
|
||||
|
@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
|
|||
-- * Parsers
|
||||
|
||||
-- | Parse end-of-line, which can be either a newline or end-of-file.
|
||||
eol :: Stream s m Char => ParserT s st m ()
|
||||
eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
|
||||
eol = void newline <|> eof
|
||||
|
||||
getIndent :: PandocMonad m
|
||||
|
|
|
@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
|
|||
import Data.Text (Text)
|
||||
import Text.Pandoc.Class.PandocMonad (PandocMonad)
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
-- | Read native formatted text and return a Pandoc document.
|
||||
-- The input may be a full pandoc document, a block list, a block,
|
||||
|
@ -32,14 +33,15 @@ import Text.Pandoc.Error
|
|||
--
|
||||
-- > Pandoc nullMeta [Plain [Str "hi"]]
|
||||
--
|
||||
readNative :: PandocMonad m
|
||||
readNative :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readNative _ s =
|
||||
case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
|
||||
Right doc -> return doc
|
||||
Left _ -> throwError $ PandocParseError "couldn't read native"
|
||||
let t = sourcesToText . toSources $ s
|
||||
in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
|
||||
Right doc -> return doc
|
||||
Left _ -> throwError $ PandocParseError "couldn't read native"
|
||||
|
||||
readBlocks :: Text -> Either PandocError [Block]
|
||||
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
|
||||
|
|
|
@ -24,7 +24,8 @@ import Text.Pandoc.Options
|
|||
import Text.Pandoc.Error (PandocError(..))
|
||||
import Text.Pandoc.Readers.HTML (readHtml)
|
||||
import Text.Pandoc.Readers.Markdown (readMarkdown)
|
||||
import Text.Pandoc.Shared (crFilter, blocksToInlines')
|
||||
import Text.Pandoc.Shared (blocksToInlines')
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
import Text.Pandoc.XML.Light
|
||||
import Control.Monad.Except (throwError)
|
||||
|
||||
|
@ -46,10 +47,14 @@ instance Default OPMLState where
|
|||
, opmlOptions = def
|
||||
}
|
||||
|
||||
readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readOPML :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readOPML opts inp = do
|
||||
(bs, st') <- runStateT
|
||||
(case parseXMLContents (TL.fromStrict (crFilter inp)) of
|
||||
let sources = toSources inp
|
||||
(bs, st') <-
|
||||
runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
|
||||
Left msg -> throwError $ PandocXMLError "" msg
|
||||
Right ns -> mapM parseBlock ns)
|
||||
def{ opmlOptions = opts }
|
||||
|
|
|
@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing (reportLogMessages)
|
||||
import Text.Pandoc.Shared (crFilter)
|
||||
|
||||
import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.Reader (runReaderT)
|
||||
|
||||
import Data.Text (Text)
|
||||
|
||||
-- | Parse org-mode string and return a Pandoc document.
|
||||
readOrg :: PandocMonad m
|
||||
readOrg :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readOrg opts s = do
|
||||
parsed <- flip runReaderT def $
|
||||
readWithM parseOrg (optionsToParserState opts)
|
||||
(crFilter s <> "\n\n")
|
||||
(ensureFinalNewlines 2 (toSources s))
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
|
|
@ -29,6 +29,7 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
||||
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
|
||||
import Text.Pandoc.Sources (ToSources(..))
|
||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||
|
||||
import Control.Monad (guard, mplus, mzero, unless, void, when)
|
||||
|
@ -802,7 +803,7 @@ inlineLaTeX = try $ do
|
|||
parseAsInlineLaTeX :: PandocMonad m
|
||||
=> Text -> TeXExport -> OrgParser m (Maybe Inlines)
|
||||
parseAsInlineLaTeX cs = \case
|
||||
TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
|
||||
TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
|
||||
TeXIgnore -> return (Just mempty)
|
||||
TeXVerbatim -> return (Just $ B.str cs)
|
||||
|
||||
|
|
|
@ -114,7 +114,7 @@ import Control.Monad (guard)
|
|||
import Control.Monad.Reader (ReaderT)
|
||||
|
||||
-- | The parser used to read org files.
|
||||
type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
|
||||
type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
|
|
|
@ -38,25 +38,24 @@ import Text.Pandoc.Options
|
|||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Shared
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Printf (printf)
|
||||
import Data.Time.Format
|
||||
|
||||
-- TODO:
|
||||
-- [ ] .. parsed-literal
|
||||
|
||||
-- | Parse reStructuredText string and return Pandoc document.
|
||||
readRST :: PandocMonad m
|
||||
readRST :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readRST opts s = do
|
||||
parsed <- readWithM parseRST def{ stateOptions = opts }
|
||||
(crFilter s <> "\n\n")
|
||||
(ensureFinalNewlines 2 (toSources s))
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
||||
type RSTParser m = ParserT Text ParserState m
|
||||
type RSTParser m = ParserT Sources ParserState m
|
||||
|
||||
--
|
||||
-- Constants and data structure definitions
|
||||
|
@ -151,11 +150,19 @@ parseRST = do
|
|||
startPos <- getPosition
|
||||
-- go through once just to get list of reference keys and notes
|
||||
-- docMinusKeys is the raw document with blanks where the keys were...
|
||||
docMinusKeys <- T.concat <$>
|
||||
manyTill (referenceKey <|> anchorDef <|>
|
||||
noteBlock <|> citationBlock <|>
|
||||
(snd <$> withRaw comment) <|>
|
||||
headerBlock <|> lineClump) eof
|
||||
let chunk = referenceKey
|
||||
<|> anchorDef
|
||||
<|> noteBlock
|
||||
<|> citationBlock
|
||||
<|> (snd <$> withRaw comment)
|
||||
<|> headerBlock
|
||||
<|> lineClump
|
||||
docMinusKeys <- Sources <$>
|
||||
manyTill (do pos <- getPosition
|
||||
t <- chunk
|
||||
return (pos, t)) eof
|
||||
-- UGLY: we collapse source position information.
|
||||
-- TODO: fix the parser to use the F monad instead of two passes
|
||||
setInput docMinusKeys
|
||||
setPosition startPos
|
||||
st' <- getState
|
||||
|
@ -348,7 +355,7 @@ singleHeader' = try $ do
|
|||
-- hrule block
|
||||
--
|
||||
|
||||
hrule :: Monad m => ParserT Text st m Blocks
|
||||
hrule :: Monad m => ParserT Sources st m Blocks
|
||||
hrule = try $ do
|
||||
chr <- oneOf underlineChars
|
||||
count 3 (char chr)
|
||||
|
@ -363,7 +370,7 @@ hrule = try $ do
|
|||
|
||||
-- read a line indented by a given string
|
||||
indentedLine :: (HasReaderOptions st, Monad m)
|
||||
=> Int -> ParserT Text st m Text
|
||||
=> Int -> ParserT Sources st m Text
|
||||
indentedLine indents = try $ do
|
||||
lookAhead spaceChar
|
||||
gobbleAtMostSpaces indents
|
||||
|
@ -372,7 +379,7 @@ indentedLine indents = try $ do
|
|||
-- one or more indented lines, possibly separated by blank lines.
|
||||
-- any amount of indentation will work.
|
||||
indentedBlock :: (HasReaderOptions st, Monad m)
|
||||
=> ParserT Text st m Text
|
||||
=> ParserT Sources st m Text
|
||||
indentedBlock = try $ do
|
||||
indents <- length <$> lookAhead (many1 spaceChar)
|
||||
lns <- many1 $ try $ do b <- option "" blanklines
|
||||
|
@ -381,20 +388,20 @@ indentedBlock = try $ do
|
|||
optional blanklines
|
||||
return $ T.unlines lns
|
||||
|
||||
quotedBlock :: Monad m => ParserT Text st m Text
|
||||
quotedBlock :: Monad m => ParserT Sources st m Text
|
||||
quotedBlock = try $ do
|
||||
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
lns <- many1 $ lookAhead (char quote) >> anyLine
|
||||
optional blanklines
|
||||
return $ T.unlines lns
|
||||
|
||||
codeBlockStart :: Monad m => ParserT Text st m Char
|
||||
codeBlockStart :: Monad m => ParserT Sources st m Char
|
||||
codeBlockStart = string "::" >> blankline >> blankline
|
||||
|
||||
codeBlock :: Monad m => ParserT Text ParserState m Blocks
|
||||
codeBlock :: Monad m => ParserT Sources ParserState m Blocks
|
||||
codeBlock = try $ codeBlockStart >> codeBlockBody
|
||||
|
||||
codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
|
||||
codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
|
||||
codeBlockBody = do
|
||||
lang <- stateRstHighlight <$> getState
|
||||
try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
|
||||
|
@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
|
|||
return $ B.codeBlockWith ("", ["haskell","literate"], [])
|
||||
$ T.intercalate "\n" lns
|
||||
|
||||
latexCodeBlock :: Monad m => ParserT Text st m [Text]
|
||||
latexCodeBlock :: Monad m => ParserT Sources st m [Text]
|
||||
latexCodeBlock = try $ do
|
||||
try (latexBlockLine "\\begin{code}")
|
||||
many1Till anyLine (try $ latexBlockLine "\\end{code}")
|
||||
where
|
||||
latexBlockLine s = skipMany spaceChar >> string s >> blankline
|
||||
|
||||
birdCodeBlock :: Monad m => ParserT Text st m [Text]
|
||||
birdCodeBlock :: Monad m => ParserT Sources st m [Text]
|
||||
birdCodeBlock = filterSpace <$> many1 birdTrackLine
|
||||
where filterSpace lns =
|
||||
-- if (as is normal) there is always a space after >, drop it
|
||||
|
@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
|
|||
then map (T.drop 1) lns
|
||||
else lns
|
||||
|
||||
birdTrackLine :: Monad m => ParserT Text st m Text
|
||||
birdTrackLine :: Monad m => ParserT Sources st m Text
|
||||
birdTrackLine = char '>' >> anyLine
|
||||
|
||||
--
|
||||
|
@ -456,7 +463,6 @@ includeDirective top fields body = do
|
|||
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
|
||||
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
|
||||
oldPos <- getPosition
|
||||
oldInput <- getInput
|
||||
containers <- stateContainers <$> getState
|
||||
when (f `elem` containers) $
|
||||
throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
|
||||
|
@ -494,15 +500,11 @@ includeDirective top fields body = do
|
|||
Nothing -> case lookup "literal" fields of
|
||||
Just _ -> return $ B.rawBlock "rst" contents'
|
||||
Nothing -> do
|
||||
setPosition $ newPos (T.unpack f) 1 1
|
||||
setInput $ contents' <> "\n"
|
||||
bs <- optional blanklines >>
|
||||
(mconcat <$> many block)
|
||||
setInput oldInput
|
||||
setPosition oldPos
|
||||
addToSources (initialPos (T.unpack f))
|
||||
(contents' <> "\n")
|
||||
updateState $ \s -> s{ stateContainers =
|
||||
tail $ stateContainers s }
|
||||
return bs
|
||||
return mempty
|
||||
|
||||
|
||||
--
|
||||
|
@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
|
|||
definitionList = B.definitionList <$> many1 definitionListItem
|
||||
|
||||
-- parses bullet list start and returns its length (inc. following whitespace)
|
||||
bulletListStart :: Monad m => ParserT Text st m Int
|
||||
bulletListStart :: Monad m => ParserT Sources st m Int
|
||||
bulletListStart = try $ do
|
||||
notFollowedBy' hrule -- because hrules start out just like lists
|
||||
marker <- oneOf bulletListMarkers
|
||||
|
@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
|
|||
-- plus isolated (no two adjacent) internal hyphens, underscores,
|
||||
-- periods, colons and plus signs; no whitespace or other characters
|
||||
-- are allowed.
|
||||
simpleReferenceName :: Monad m => ParserT Text st m Text
|
||||
simpleReferenceName :: Monad m => ParserT Sources st m Text
|
||||
simpleReferenceName = do
|
||||
x <- alphaNum
|
||||
xs <- many $ alphaNum
|
||||
|
@ -1122,7 +1124,7 @@ referenceKey = do
|
|||
-- return enough blanks to replace key
|
||||
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
|
||||
|
||||
targetURI :: Monad m => ParserT Text st m Text
|
||||
targetURI :: Monad m => ParserT Sources st m Text
|
||||
targetURI = do
|
||||
skipSpaces
|
||||
optional $ try $ newline >> notFollowedBy blankline
|
||||
|
@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
|
|||
anonymousKey = try $ do
|
||||
oneOfStrings [".. __:", "__"]
|
||||
src <- targetURI
|
||||
pos <- getPosition
|
||||
let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
|
||||
-- we need to ensure that the keys are ordered by occurrence in
|
||||
-- the document.
|
||||
numKeys <- M.size . stateKeys <$> getState
|
||||
let key = toKey $ "_" <> T.pack (show numKeys)
|
||||
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
|
||||
stateKeys s }
|
||||
|
||||
|
@ -1250,13 +1254,13 @@ headerBlock = do
|
|||
-- Grid tables TODO:
|
||||
-- - column spans
|
||||
|
||||
dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
|
||||
dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
|
||||
dashedLine ch = do
|
||||
dashes <- many1 (char ch)
|
||||
sp <- many (char ' ')
|
||||
return (length dashes, length $ dashes ++ sp)
|
||||
|
||||
simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
|
||||
simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
|
||||
simpleDashedLines ch = try $ many1 (dashedLine ch)
|
||||
|
||||
-- Parse a table row separator
|
||||
|
@ -1382,7 +1386,7 @@ hyphens = do
|
|||
-- don't want to treat endline after hyphen or dash as a space
|
||||
return $ B.str result
|
||||
|
||||
escapedChar :: Monad m => ParserT Text st m Inlines
|
||||
escapedChar :: Monad m => ParserT Sources st m Inlines
|
||||
escapedChar = do c <- escaped anyChar
|
||||
return $ if c == ' ' || c == '\n' || c == '\r'
|
||||
-- '\ ' is null in RST
|
||||
|
|
|
@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Shared (safeRead)
|
||||
import Text.Parsec hiding (tokenPrim)
|
||||
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
|
||||
import qualified Data.Sequence as Seq
|
||||
import qualified Data.Foldable as Foldable
|
||||
|
@ -122,16 +121,16 @@ instance Default RoffState where
|
|||
, afterConditional = False
|
||||
}
|
||||
|
||||
type RoffLexer m = ParserT T.Text RoffState m
|
||||
type RoffLexer m = ParserT Sources RoffState m
|
||||
|
||||
--
|
||||
-- Lexer: T.Text -> RoffToken
|
||||
--
|
||||
|
||||
eofline :: Stream s m Char => ParsecT s u m ()
|
||||
eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
|
||||
eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
|
||||
|
||||
spacetab :: Stream s m Char => ParsecT s u m Char
|
||||
spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
|
||||
spacetab = char ' ' <|> char '\t'
|
||||
|
||||
characterCodeMap :: M.Map T.Text Char
|
||||
|
@ -303,8 +302,7 @@ expandString = try $ do
|
|||
char '*'
|
||||
cs <- escapeArg <|> countChar 1 anyChar
|
||||
s <- linePartsToText <$> resolveText cs pos
|
||||
getInput >>= setInput . (s <>)
|
||||
return ()
|
||||
addToInput s
|
||||
|
||||
-- Parses: '..'
|
||||
quoteArg :: PandocMonad m => RoffLexer m T.Text
|
||||
|
@ -316,7 +314,7 @@ escFont = do
|
|||
font' <- if T.null font || font == "P"
|
||||
then prevFont <$> getState
|
||||
else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
|
||||
modifyState $ \st -> st{ prevFont = currentFont st
|
||||
updateState $ \st -> st{ prevFont = currentFont st
|
||||
, currentFont = font' }
|
||||
return [Font font']
|
||||
where
|
||||
|
@ -372,8 +370,8 @@ lexTable pos = do
|
|||
spaces
|
||||
opts <- try tableOptions <|> [] <$ optional (char ';')
|
||||
case lookup "tab" opts of
|
||||
Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
|
||||
_ -> modifyState $ \st -> st{ tableTabChar = '\t' }
|
||||
Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
|
||||
_ -> updateState $ \st -> st{ tableTabChar = '\t' }
|
||||
spaces
|
||||
skipMany lexComment
|
||||
spaces
|
||||
|
@ -489,18 +487,18 @@ lexConditional mname = do
|
|||
ifPart <- do
|
||||
optional $ try $ char '\\' >> newline
|
||||
lexGroup
|
||||
<|> do modifyState $ \s -> s{ afterConditional = True }
|
||||
<|> do updateState $ \s -> s{ afterConditional = True }
|
||||
t <- manToken
|
||||
modifyState $ \s -> s{ afterConditional = False }
|
||||
updateState $ \s -> s{ afterConditional = False }
|
||||
return t
|
||||
case mbtest of
|
||||
Nothing -> do
|
||||
putState st -- reset state, so we don't record macros in skipped section
|
||||
setState st -- reset state, so we don't record macros in skipped section
|
||||
report $ SkippedContent (T.cons '.' mname) pos
|
||||
return mempty
|
||||
Just True -> return ifPart
|
||||
Just False -> do
|
||||
putState st
|
||||
setState st
|
||||
return mempty
|
||||
|
||||
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
|
||||
|
@ -515,7 +513,7 @@ expression = do
|
|||
_ -> Nothing
|
||||
where
|
||||
returnValue v = do
|
||||
modifyState $ \st -> st{ lastExpression = v }
|
||||
updateState $ \st -> st{ lastExpression = v }
|
||||
return v
|
||||
|
||||
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
|
||||
|
@ -536,7 +534,7 @@ lexIncludeFile args = do
|
|||
result <- readFileFromDirs dirs $ T.unpack fp
|
||||
case result of
|
||||
Nothing -> report $ CouldNotLoadIncludeFile fp pos
|
||||
Just s -> getInput >>= setInput . (s <>)
|
||||
Just s -> addToInput s
|
||||
return mempty
|
||||
[] -> return mempty
|
||||
|
||||
|
@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
|
|||
(x:ys) -> do
|
||||
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
|
||||
let stringName = linePartsToText x
|
||||
modifyState $ \st ->
|
||||
updateState $ \st ->
|
||||
st{ customMacros = M.insert stringName ts (customMacros st) }
|
||||
return mempty
|
||||
|
||||
lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
|
||||
lexMacroDef args = do -- macro definition
|
||||
modifyState $ \st -> st{ roffMode = CopyMode }
|
||||
updateState $ \st -> st{ roffMode = CopyMode }
|
||||
(macroName, stopMacro) <-
|
||||
case args of
|
||||
(x : y : _) -> return (linePartsToText x, linePartsToText y)
|
||||
|
@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
|
|||
_ <- lexArgs
|
||||
return ()
|
||||
ts <- mconcat <$> manyTill manToken stop
|
||||
modifyState $ \st ->
|
||||
updateState $ \st ->
|
||||
st{ customMacros = M.insert macroName ts (customMacros st)
|
||||
, roffMode = NormalMode }
|
||||
return mempty
|
||||
|
|
|
@ -28,22 +28,22 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (enclosed, nested)
|
||||
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
|
||||
import Text.Pandoc.Shared (crFilter, tshow)
|
||||
import Text.Pandoc.Shared (tshow)
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
|
||||
-- | Read twiki from an input string and return a Pandoc document.
|
||||
readTWiki :: PandocMonad m
|
||||
readTWiki :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readTWiki opts s = do
|
||||
res <- readWithM parseTWiki def{ stateOptions = opts }
|
||||
(crFilter s <> "\n\n")
|
||||
let sources = ensureFinalNewlines 2 (toSources s)
|
||||
res <- readWithM parseTWiki def{ stateOptions = opts } sources
|
||||
case res of
|
||||
Left e -> throwError e
|
||||
Right d -> return d
|
||||
|
||||
type TWParser = ParserT Text ParserState
|
||||
type TWParser = ParserT Sources ParserState
|
||||
|
||||
--
|
||||
-- utility functions
|
||||
|
|
|
@ -53,30 +53,34 @@ import Text.Pandoc.Options
|
|||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
|
||||
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
|
||||
import Text.Pandoc.Shared (crFilter, trim, tshow)
|
||||
import Text.Pandoc.Shared (trim, tshow)
|
||||
|
||||
-- | Parse a Textile text and return a Pandoc document.
|
||||
readTextile :: PandocMonad m
|
||||
readTextile :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -- ^ Reader options
|
||||
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readTextile opts s = do
|
||||
parsed <- readWithM parseTextile def{ stateOptions = opts }
|
||||
(crFilter s <> "\n\n")
|
||||
let sources = ensureFinalNewlines 2 (toSources s)
|
||||
parsed <- readWithM parseTextile def{ stateOptions = opts } sources
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
||||
type TextileParser = ParserT Sources ParserState
|
||||
|
||||
-- | Generate a Pandoc ADT from a textile document
|
||||
parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
|
||||
parseTextile :: PandocMonad m => TextileParser m Pandoc
|
||||
parseTextile = do
|
||||
many blankline
|
||||
startPos <- getPosition
|
||||
-- go through once just to get list of reference keys and notes
|
||||
-- docMinusKeys is the raw document with blanks where the keys/notes were...
|
||||
let firstPassParser = noteBlock <|> lineClump
|
||||
manyTill firstPassParser eof >>= setInput . T.concat
|
||||
let firstPassParser = do
|
||||
pos <- getPosition
|
||||
t <- noteBlock <|> lineClump
|
||||
return (pos, t)
|
||||
manyTill firstPassParser eof >>= setInput . Sources
|
||||
setPosition startPos
|
||||
st' <- getState
|
||||
let reversedNotes = stateNotes st'
|
||||
|
@ -84,10 +88,10 @@ parseTextile = do
|
|||
-- now parse it for real...
|
||||
Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
|
||||
|
||||
noteMarker :: PandocMonad m => ParserT Text ParserState m Text
|
||||
noteMarker :: PandocMonad m => TextileParser m Text
|
||||
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
|
||||
|
||||
noteBlock :: PandocMonad m => ParserT Text ParserState m Text
|
||||
noteBlock :: PandocMonad m => TextileParser m Text
|
||||
noteBlock = try $ do
|
||||
startPos <- getPosition
|
||||
ref <- noteMarker
|
||||
|
@ -102,11 +106,11 @@ noteBlock = try $ do
|
|||
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
|
||||
|
||||
-- | Parse document blocks
|
||||
parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
parseBlocks :: PandocMonad m => TextileParser m Blocks
|
||||
parseBlocks = mconcat <$> manyTill block eof
|
||||
|
||||
-- | Block parsers list tried in definition order
|
||||
blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
|
||||
blockParsers :: PandocMonad m => [TextileParser m Blocks]
|
||||
blockParsers = [ codeBlock
|
||||
, header
|
||||
, blockQuote
|
||||
|
@ -121,22 +125,22 @@ blockParsers = [ codeBlock
|
|||
]
|
||||
|
||||
-- | Any block in the order of definition of blockParsers
|
||||
block :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
block :: PandocMonad m => TextileParser m Blocks
|
||||
block = do
|
||||
res <- choice blockParsers <?> "block"
|
||||
trace (T.take 60 $ tshow $ B.toList res)
|
||||
return res
|
||||
|
||||
commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
commentBlock :: PandocMonad m => TextileParser m Blocks
|
||||
commentBlock = try $ do
|
||||
string "###."
|
||||
manyTill anyLine blanklines
|
||||
return mempty
|
||||
|
||||
codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
codeBlock :: PandocMonad m => TextileParser m Blocks
|
||||
codeBlock = codeBlockTextile <|> codeBlockHtml
|
||||
|
||||
codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
codeBlockTextile :: PandocMonad m => TextileParser m Blocks
|
||||
codeBlockTextile = try $ do
|
||||
string "bc." <|> string "pre."
|
||||
extended <- option False (True <$ char '.')
|
||||
|
@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
|
|||
trimTrailingNewlines = T.dropWhileEnd (=='\n')
|
||||
|
||||
-- | Code Blocks in Textile are between <pre> and </pre>
|
||||
codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
codeBlockHtml :: PandocMonad m => TextileParser m Blocks
|
||||
codeBlockHtml = try $ do
|
||||
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
|
||||
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
|
||||
|
@ -174,7 +178,7 @@ codeBlockHtml = try $ do
|
|||
return $ B.codeBlockWith (ident,classes,kvs) result'''
|
||||
|
||||
-- | Header of the form "hN. content" with N in 1..6
|
||||
header :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
header :: PandocMonad m => TextileParser m Blocks
|
||||
header = try $ do
|
||||
char 'h'
|
||||
level <- digitToInt <$> oneOf "123456"
|
||||
|
@ -186,14 +190,14 @@ header = try $ do
|
|||
return $ B.headerWith attr' level name
|
||||
|
||||
-- | Blockquote of the form "bq. content"
|
||||
blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
blockQuote :: PandocMonad m => TextileParser m Blocks
|
||||
blockQuote = try $ do
|
||||
string "bq" >> attributes >> char '.' >> whitespace
|
||||
B.blockQuote <$> para
|
||||
|
||||
-- Horizontal rule
|
||||
|
||||
hrule :: PandocMonad m => ParserT Text st m Blocks
|
||||
hrule :: PandocMonad m => TextileParser m Blocks
|
||||
hrule = try $ do
|
||||
skipSpaces
|
||||
start <- oneOf "-*"
|
||||
|
@ -208,39 +212,39 @@ hrule = try $ do
|
|||
-- | Can be a bullet list or an ordered list. This implementation is
|
||||
-- strict in the nesting, sublist must start at exactly "parent depth
|
||||
-- plus one"
|
||||
anyList :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
anyList :: PandocMonad m => TextileParser m Blocks
|
||||
anyList = try $ anyListAtDepth 1 <* blanklines
|
||||
|
||||
-- | This allow one type of list to be nested into an other type,
|
||||
-- provided correct nesting
|
||||
anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
|
||||
anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
|
||||
anyListAtDepth depth = choice [ bulletListAtDepth depth,
|
||||
orderedListAtDepth depth,
|
||||
definitionList ]
|
||||
|
||||
-- | Bullet List of given depth, depth being the number of leading '*'
|
||||
bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
|
||||
bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
|
||||
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
|
||||
|
||||
-- | Bullet List Item of given depth, depth being the number of
|
||||
-- leading '*'
|
||||
bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
|
||||
bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
|
||||
bulletListItemAtDepth = genericListItemAtDepth '*'
|
||||
|
||||
-- | Ordered List of given depth, depth being the number of
|
||||
-- leading '#'
|
||||
orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
|
||||
orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
|
||||
orderedListAtDepth depth = try $ do
|
||||
items <- many1 (orderedListItemAtDepth depth)
|
||||
return $ B.orderedList items
|
||||
|
||||
-- | Ordered List Item of given depth, depth being the number of
|
||||
-- leading '#'
|
||||
orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
|
||||
orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
|
||||
orderedListItemAtDepth = genericListItemAtDepth '#'
|
||||
|
||||
-- | Common implementation of list items
|
||||
genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
|
||||
genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
|
||||
genericListItemAtDepth c depth = try $ do
|
||||
count depth (char c) >> attributes >> whitespace
|
||||
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
|
||||
|
@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
|
|||
return $ contents <> sublist
|
||||
|
||||
-- | A definition list is a set of consecutive definition items
|
||||
definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
definitionList :: PandocMonad m => TextileParser m Blocks
|
||||
definitionList = try $ B.definitionList <$> many1 definitionListItem
|
||||
|
||||
-- | List start character.
|
||||
listStart :: PandocMonad m => ParserT Text ParserState m ()
|
||||
listStart :: PandocMonad m => TextileParser m ()
|
||||
listStart = genericListStart '*'
|
||||
<|> () <$ genericListStart '#'
|
||||
<|> () <$ definitionListStart
|
||||
|
||||
genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
|
||||
genericListStart :: PandocMonad m => Char -> TextileParser m ()
|
||||
genericListStart c = () <$ try (many1 (char c) >> whitespace)
|
||||
|
||||
basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
|
||||
basicDLStart :: PandocMonad m => TextileParser m ()
|
||||
basicDLStart = do
|
||||
char '-'
|
||||
whitespace
|
||||
notFollowedBy newline
|
||||
|
||||
definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
definitionListStart :: PandocMonad m => TextileParser m Inlines
|
||||
definitionListStart = try $ do
|
||||
basicDLStart
|
||||
trimInlines . mconcat <$>
|
||||
|
@ -281,15 +285,15 @@ definitionListStart = try $ do
|
|||
-- the term defined, then spaces and ":=". The definition follows, on
|
||||
-- the same single line, or spaned on multiple line, after a line
|
||||
-- break.
|
||||
definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
|
||||
definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
|
||||
definitionListItem = try $ do
|
||||
term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
|
||||
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
|
||||
return (term, def')
|
||||
where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
|
||||
where inlineDef :: PandocMonad m => TextileParser m [Blocks]
|
||||
inlineDef = liftM (\d -> [B.plain d])
|
||||
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
|
||||
multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
|
||||
multilineDef :: PandocMonad m => TextileParser m [Blocks]
|
||||
multilineDef = try $ do
|
||||
optional whitespace >> newline
|
||||
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
|
||||
|
@ -300,7 +304,7 @@ definitionListItem = try $ do
|
|||
-- raw content
|
||||
|
||||
-- | A raw Html Block, optionally followed by blanklines
|
||||
rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
|
||||
rawHtmlBlock = try $ do
|
||||
skipMany spaceChar
|
||||
(_,b) <- htmlTag isBlockTag
|
||||
|
@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
|
|||
return $ B.rawBlock "html" b
|
||||
|
||||
-- | Raw block of LaTeX content
|
||||
rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
|
||||
rawLaTeXBlock' = do
|
||||
guardEnabled Ext_raw_tex
|
||||
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
|
||||
|
||||
|
||||
-- | In textile, paragraphs are separated by blank lines.
|
||||
para :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
para :: PandocMonad m => TextileParser m Blocks
|
||||
para = B.para . trimInlines . mconcat <$> many1 inline
|
||||
|
||||
-- Tables
|
||||
|
@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
|
|||
toAlignment '=' = AlignCenter
|
||||
toAlignment _ = AlignDefault
|
||||
|
||||
cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
|
||||
cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
|
||||
cellAttributes = try $ do
|
||||
isHeader <- option False (True <$ char '_')
|
||||
-- we just ignore colspan and rowspan markers:
|
||||
|
@ -339,7 +343,7 @@ cellAttributes = try $ do
|
|||
return (isHeader, alignment)
|
||||
|
||||
-- | A table cell spans until a pipe |
|
||||
tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
|
||||
tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
|
||||
tableCell = try $ do
|
||||
char '|'
|
||||
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
|
||||
|
@ -350,7 +354,7 @@ tableCell = try $ do
|
|||
return ((isHeader, alignment), B.plain content)
|
||||
|
||||
-- | A table row is made of many table cells
|
||||
tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
|
||||
tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
|
||||
tableRow = try $ do
|
||||
-- skip optional row attributes
|
||||
optional $ try $ do
|
||||
|
@ -360,7 +364,7 @@ tableRow = try $ do
|
|||
many1 tableCell <* char '|' <* blankline
|
||||
|
||||
-- | A table with an optional header.
|
||||
table :: PandocMonad m => ParserT Text ParserState m Blocks
|
||||
table :: PandocMonad m => TextileParser m Blocks
|
||||
table = try $ do
|
||||
-- ignore table attributes
|
||||
caption <- option mempty $ try $ do
|
||||
|
@ -388,7 +392,7 @@ table = try $ do
|
|||
(TableFoot nullAttr [])
|
||||
|
||||
-- | Ignore markers for cols, thead, tfoot.
|
||||
ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
|
||||
ignorableRow :: PandocMonad m => TextileParser m ()
|
||||
ignorableRow = try $ do
|
||||
char '|'
|
||||
oneOf ":^-~"
|
||||
|
@ -397,7 +401,7 @@ ignorableRow = try $ do
|
|||
_ <- anyLine
|
||||
return ()
|
||||
|
||||
explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
|
||||
explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
|
||||
explicitBlockStart name = try $ do
|
||||
string (T.unpack name)
|
||||
attributes
|
||||
|
@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
|
|||
-- However, they can be used to set HTML/CSS attributes when needed.
|
||||
maybeExplicitBlock :: PandocMonad m
|
||||
=> Text -- ^ block tag name
|
||||
-> ParserT Text ParserState m Blocks -- ^ implicit block
|
||||
-> ParserT Text ParserState m Blocks
|
||||
-> TextileParser m Blocks -- ^ implicit block
|
||||
-> TextileParser m Blocks
|
||||
maybeExplicitBlock name blk = try $ do
|
||||
optional $ explicitBlockStart name
|
||||
blk
|
||||
|
@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
|
|||
|
||||
|
||||
-- | Any inline element
|
||||
inline :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
inline :: PandocMonad m => TextileParser m Inlines
|
||||
inline = choice inlineParsers <?> "inline"
|
||||
|
||||
-- | Inline parsers tried in order
|
||||
inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
|
||||
inlineParsers :: PandocMonad m => [TextileParser m Inlines]
|
||||
inlineParsers = [ str
|
||||
, whitespace
|
||||
, endline
|
||||
|
@ -447,7 +451,7 @@ inlineParsers = [ str
|
|||
]
|
||||
|
||||
-- | Inline markups
|
||||
inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
inlineMarkup :: PandocMonad m => TextileParser m Inlines
|
||||
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
|
||||
, simpleInline (string "**") B.strong
|
||||
, simpleInline (string "__") B.emph
|
||||
|
@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
|
|||
]
|
||||
|
||||
-- | Trademark, registered, copyright
|
||||
mark :: PandocMonad m => ParserT Text st m Inlines
|
||||
mark :: PandocMonad m => TextileParser m Inlines
|
||||
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
|
||||
|
||||
reg :: PandocMonad m => ParserT Text st m Inlines
|
||||
reg :: PandocMonad m => TextileParser m Inlines
|
||||
reg = do
|
||||
oneOf "Rr"
|
||||
char ')'
|
||||
return $ B.str "\174"
|
||||
|
||||
tm :: PandocMonad m => ParserT Text st m Inlines
|
||||
tm :: PandocMonad m => TextileParser m Inlines
|
||||
tm = do
|
||||
oneOf "Tt"
|
||||
oneOf "Mm"
|
||||
char ')'
|
||||
return $ B.str "\8482"
|
||||
|
||||
copy :: PandocMonad m => ParserT Text st m Inlines
|
||||
copy :: PandocMonad m => TextileParser m Inlines
|
||||
copy = do
|
||||
oneOf "Cc"
|
||||
char ')'
|
||||
return $ B.str "\169"
|
||||
|
||||
note :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
note :: PandocMonad m => TextileParser m Inlines
|
||||
note = try $ do
|
||||
ref <- char '[' *> many1 digit <* char ']'
|
||||
notes <- stateNotes <$> getState
|
||||
|
@ -507,13 +511,13 @@ wordBoundaries :: [Char]
|
|||
wordBoundaries = markupChars <> stringBreakers
|
||||
|
||||
-- | Parse a hyphened sequence of words
|
||||
hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
|
||||
hyphenedWords :: PandocMonad m => TextileParser m Text
|
||||
hyphenedWords = do
|
||||
x <- wordChunk
|
||||
xs <- many (try $ char '-' >> wordChunk)
|
||||
return $ T.intercalate "-" (x:xs)
|
||||
|
||||
wordChunk :: PandocMonad m => ParserT Text ParserState m Text
|
||||
wordChunk :: PandocMonad m => TextileParser m Text
|
||||
wordChunk = try $ do
|
||||
hd <- noneOf wordBoundaries
|
||||
tl <- many ( noneOf wordBoundaries <|>
|
||||
|
@ -522,7 +526,7 @@ wordChunk = try $ do
|
|||
return $ T.pack $ hd:tl
|
||||
|
||||
-- | Any string
|
||||
str :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
str :: PandocMonad m => TextileParser m Inlines
|
||||
str = do
|
||||
baseStr <- hyphenedWords
|
||||
-- RedCloth compliance : if parsed word is uppercase and immediately
|
||||
|
@ -535,11 +539,11 @@ str = do
|
|||
return $ B.str fullStr
|
||||
|
||||
-- | Some number of space chars
|
||||
whitespace :: PandocMonad m => ParserT Text st m Inlines
|
||||
whitespace :: PandocMonad m => TextileParser m Inlines
|
||||
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
|
||||
|
||||
-- | In Textile, an isolated endline character is a line break
|
||||
endline :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
endline :: PandocMonad m => TextileParser m Inlines
|
||||
endline = try $ do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
|
@ -547,18 +551,18 @@ endline = try $ do
|
|||
notFollowedBy rawHtmlBlock
|
||||
return B.linebreak
|
||||
|
||||
rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
rawHtmlInline :: PandocMonad m => TextileParser m Inlines
|
||||
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
|
||||
|
||||
-- | Raw LaTeX Inline
|
||||
rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
|
||||
rawLaTeXInline' = try $ do
|
||||
guardEnabled Ext_raw_tex
|
||||
B.rawInline "latex" <$> rawLaTeXInline
|
||||
|
||||
-- | Textile standard link syntax is "label":target. But we
|
||||
-- can also have ["label":target].
|
||||
link :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
link :: PandocMonad m => TextileParser m Inlines
|
||||
link = try $ do
|
||||
bracketed <- (True <$ char '[') <|> return False
|
||||
char '"' *> notFollowedBy (oneOf " \t\n\r")
|
||||
|
@ -578,7 +582,7 @@ link = try $ do
|
|||
else B.spanWith attr $ B.link url "" name'
|
||||
|
||||
-- | image embedding
|
||||
image :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
image :: PandocMonad m => TextileParser m Inlines
|
||||
image = try $ do
|
||||
char '!' >> notFollowedBy space
|
||||
(ident, cls, kvs) <- attributes
|
||||
|
@ -590,51 +594,51 @@ image = try $ do
|
|||
char '!'
|
||||
return $ B.imageWith attr src alt (B.str alt)
|
||||
|
||||
escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
escapedInline :: PandocMonad m => TextileParser m Inlines
|
||||
escapedInline = escapedEqs <|> escapedTag
|
||||
|
||||
escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
escapedEqs :: PandocMonad m => TextileParser m Inlines
|
||||
escapedEqs = B.str . T.pack <$>
|
||||
try (string "==" *> manyTill anyChar' (try $ string "=="))
|
||||
|
||||
-- | literal text escaped btw <notextile> tags
|
||||
escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
escapedTag :: PandocMonad m => TextileParser m Inlines
|
||||
escapedTag = B.str . T.pack <$>
|
||||
try (string "<notextile>" *>
|
||||
manyTill anyChar' (try $ string "</notextile>"))
|
||||
|
||||
-- | Any special symbol defined in wordBoundaries
|
||||
symbol :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
symbol :: PandocMonad m => TextileParser m Inlines
|
||||
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
|
||||
notFollowedBy rawHtmlBlock *>
|
||||
oneOf wordBoundaries)
|
||||
|
||||
-- | Inline code
|
||||
code :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
code :: PandocMonad m => TextileParser m Inlines
|
||||
code = code1 <|> code2
|
||||
|
||||
-- any character except a newline before a blank line
|
||||
anyChar' :: PandocMonad m => ParserT Text ParserState m Char
|
||||
anyChar' :: PandocMonad m => TextileParser m Char
|
||||
anyChar' =
|
||||
satisfy (/='\n') <|>
|
||||
try (char '\n' <* notFollowedBy blankline)
|
||||
|
||||
code1 :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
code1 :: PandocMonad m => TextileParser m Inlines
|
||||
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
|
||||
|
||||
code2 :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
code2 :: PandocMonad m => TextileParser m Inlines
|
||||
code2 = do
|
||||
htmlTag (tagOpen (=="tt") null)
|
||||
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
|
||||
|
||||
-- | Html / CSS attributes
|
||||
attributes :: PandocMonad m => ParserT Text ParserState m Attr
|
||||
attributes :: PandocMonad m => TextileParser m Attr
|
||||
attributes = foldl' (flip ($)) ("",[],[]) <$>
|
||||
try (do special <- option id specialAttribute
|
||||
attrs <- many attribute
|
||||
return (special : attrs))
|
||||
|
||||
specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
|
||||
specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
|
||||
specialAttribute = do
|
||||
alignStr <- ("center" <$ char '=') <|>
|
||||
("justify" <$ try (string "<>")) <|>
|
||||
|
@ -643,11 +647,11 @@ specialAttribute = do
|
|||
notFollowedBy spaceChar
|
||||
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
|
||||
|
||||
attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
|
||||
attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
|
||||
attribute = try $
|
||||
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
|
||||
|
||||
classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
|
||||
classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
|
||||
classIdAttr = try $ do -- (class class #id)
|
||||
char '('
|
||||
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
|
||||
|
@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
|
|||
classes'
|
||||
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
|
||||
|
||||
styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
|
||||
styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
|
||||
styleAttr = do
|
||||
style <- try $ enclosed (char '{') (char '}') anyChar'
|
||||
return $ addStyle $ T.pack style
|
||||
|
@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
|
|||
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
|
||||
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
|
||||
|
||||
langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
|
||||
langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
|
||||
langAttr = do
|
||||
lang <- try $ enclosed (char '[') (char ']') alphaNum
|
||||
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
|
||||
|
||||
-- | Parses material surrounded by a parser.
|
||||
surrounded :: (PandocMonad m, Show t)
|
||||
=> ParserT Text st m t -- ^ surrounding parser
|
||||
-> ParserT Text st m a -- ^ content parser (to be used repeatedly)
|
||||
-> ParserT Text st m [a]
|
||||
=> ParserT Sources st m t -- ^ surrounding parser
|
||||
-> ParserT Sources st m a -- ^ content parser (to be used repeatedly)
|
||||
-> ParserT Sources st m [a]
|
||||
surrounded border =
|
||||
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
|
||||
|
||||
simpleInline :: PandocMonad m
|
||||
=> ParserT Text ParserState m t -- ^ surrounding parser
|
||||
=> TextileParser m t -- ^ surrounding parser
|
||||
-> (Inlines -> Inlines) -- ^ Inline constructor
|
||||
-> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
|
||||
-> TextileParser m Inlines -- ^ content parser (to be used repeatedly)
|
||||
simpleInline border construct = try $ do
|
||||
notAfterString
|
||||
border *> notFollowedBy (oneOf " \t\n\r")
|
||||
|
@ -700,7 +704,7 @@ simpleInline border construct = try $ do
|
|||
then body
|
||||
else B.spanWith attr body
|
||||
|
||||
groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
|
||||
groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
|
||||
groupedInlineMarkup = try $ do
|
||||
char '['
|
||||
sp1 <- option mempty $ B.space <$ whitespace
|
||||
|
@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
|
|||
char ']'
|
||||
return $ sp1 <> result <> sp2
|
||||
|
||||
eof' :: Monad m => ParserT Text s m Char
|
||||
eof' :: Monad m => ParserT Sources s m Char
|
||||
eof' = '\n' <$ eof
|
||||
|
|
|
@ -30,23 +30,23 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Logging (Verbosity (..))
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (enclosed, nested)
|
||||
import Text.Pandoc.Shared (crFilter, safeRead)
|
||||
import Text.Pandoc.Shared (safeRead)
|
||||
import Text.Pandoc.XML (fromEntities)
|
||||
import Text.Printf (printf)
|
||||
|
||||
-- | Read TikiWiki from an input string and return a Pandoc document.
|
||||
readTikiWiki :: PandocMonad m
|
||||
readTikiWiki :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readTikiWiki opts s = do
|
||||
res <- readWithM parseTikiWiki def{ stateOptions = opts }
|
||||
(crFilter s <> "\n\n")
|
||||
let sources = ensureFinalNewlines 2 (toSources s)
|
||||
res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
|
||||
case res of
|
||||
Left e -> throwError e
|
||||
Right d -> return d
|
||||
|
||||
type TikiWikiParser = ParserT Text ParserState
|
||||
type TikiWikiParser = ParserT Sources ParserState
|
||||
|
||||
--
|
||||
-- utility functions
|
||||
|
|
|
@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (space, spaces, uri)
|
||||
import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
|
||||
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
|
||||
|
||||
type T2T = ParserT Text ParserState (Reader T2TMeta)
|
||||
type T2T = ParserT Sources ParserState (Reader T2TMeta)
|
||||
|
||||
-- | An object for the T2T macros meta information
|
||||
-- the contents of each field is simply substituted verbatim into the file
|
||||
|
@ -68,15 +68,15 @@ getT2TMeta = do
|
|||
(intercalate ", " inps) outp
|
||||
|
||||
-- | Read Txt2Tags from an input string returning a Pandoc document
|
||||
readTxt2Tags :: PandocMonad m
|
||||
readTxt2Tags :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readTxt2Tags opts s = do
|
||||
let sources = ensureFinalNewlines 2 (toSources s)
|
||||
meta <- getT2TMeta
|
||||
let parsed = flip runReader meta $
|
||||
readWithM parseT2T (def {stateOptions = opts}) $
|
||||
crFilter s <> "\n\n"
|
||||
readWithM parseT2T (def {stateOptions = opts}) sources
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError e
|
||||
|
|
|
@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
|
|||
many1Till, orderedListMarker, readWithM,
|
||||
registerHeader, spaceChar, stateMeta,
|
||||
stateOptions, uri, manyTillChar, manyChar, textStr,
|
||||
many1Char, countChar, many1TillChar)
|
||||
import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
|
||||
many1Char, countChar, many1TillChar,
|
||||
alphaNum, anyChar, char, newline, noneOf, oneOf,
|
||||
space, spaces, string)
|
||||
import Text.Pandoc.Sources (ToSources(..), Sources)
|
||||
import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
|
||||
isURI, tshow)
|
||||
import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
|
||||
spaces, string)
|
||||
import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
|
||||
manyTill, notFollowedBy, option, skipMany1)
|
||||
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
|
||||
|
||||
readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readVimwiki :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions
|
||||
-> a
|
||||
-> m Pandoc
|
||||
readVimwiki opts s = do
|
||||
res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
|
||||
let sources = toSources s
|
||||
res <- readWithM parseVimwiki def{ stateOptions = opts } sources
|
||||
case res of
|
||||
Left e -> throwError e
|
||||
Right result -> return result
|
||||
|
||||
type VwParser = ParserT Text ParserState
|
||||
type VwParser = ParserT Sources ParserState
|
||||
|
||||
|
||||
-- constants
|
||||
|
|
|
@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
|
|||
(tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
|
||||
<> go (T.drop 1 s2)
|
||||
|
||||
{-# DEPRECATED crFilter "readers filter crs automatically" #-}
|
||||
-- | Strip out DOS line endings.
|
||||
crFilter :: T.Text -> T.Text
|
||||
crFilter = T.filter (/= '\r')
|
||||
|
|
195
src/Text/Pandoc/Sources.hs
Normal file
195
src/Text/Pandoc/Sources.hs
Normal file
|
@ -0,0 +1,195 @@
|
|||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Sources
|
||||
Copyright : Copyright (C) 2021 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Defines Sources object to be used as input to pandoc parsers and redefines Char
|
||||
parsers so they get source position information from it.
|
||||
-}
|
||||
|
||||
module Text.Pandoc.Sources
|
||||
( Sources(..)
|
||||
, ToSources(..)
|
||||
, UpdateSourcePos(..)
|
||||
, sourcesToText
|
||||
, initialSourceName
|
||||
, addToSources
|
||||
, ensureFinalNewlines
|
||||
, addToInput
|
||||
, satisfy
|
||||
, oneOf
|
||||
, noneOf
|
||||
, anyChar
|
||||
, char
|
||||
, string
|
||||
, newline
|
||||
, space
|
||||
, spaces
|
||||
, letter
|
||||
, digit
|
||||
, hexDigit
|
||||
, alphaNum
|
||||
)
|
||||
where
|
||||
import qualified Text.Parsec as P
|
||||
import Text.Parsec (Stream(..), ParsecT)
|
||||
import Text.Parsec.Pos as P
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
|
||||
import Data.String (IsString(..))
|
||||
import qualified Data.List.NonEmpty as NonEmpty
|
||||
|
||||
-- | A list of inputs labeled with source positions. It is assumed
|
||||
-- that the 'Text's have @\n@ line endings.
|
||||
newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
|
||||
deriving (Show, Semigroup, Monoid)
|
||||
|
||||
instance Monad m => Stream Sources m Char where
|
||||
uncons (Sources []) = return Nothing
|
||||
uncons (Sources ((pos,t):rest)) =
|
||||
case T.uncons t of
|
||||
Nothing -> uncons (Sources rest)
|
||||
Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
|
||||
|
||||
instance IsString Sources where
|
||||
fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
|
||||
|
||||
class ToSources a where
|
||||
toSources :: a -> Sources
|
||||
|
||||
instance ToSources Text where
|
||||
toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
|
||||
|
||||
instance ToSources [(FilePath, Text)] where
|
||||
toSources = Sources
|
||||
. map (\(fp,t) ->
|
||||
(P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
|
||||
|
||||
instance ToSources Sources where
|
||||
toSources = id
|
||||
|
||||
sourcesToText :: Sources -> Text
|
||||
sourcesToText (Sources xs) = mconcat $ map snd xs
|
||||
|
||||
addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
|
||||
addToSources pos t = do
|
||||
curpos <- P.getPosition
|
||||
Sources xs <- P.getInput
|
||||
let xs' = case xs of
|
||||
[] -> []
|
||||
((_,t'):rest) -> (curpos,t'):rest
|
||||
P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
|
||||
|
||||
ensureFinalNewlines :: Int -- ^ number of trailing newlines
|
||||
-> Sources
|
||||
-> Sources
|
||||
ensureFinalNewlines n (Sources xs) =
|
||||
case NonEmpty.nonEmpty xs of
|
||||
Nothing -> Sources [(initialPos "", T.replicate n "\n")]
|
||||
Just lst ->
|
||||
case NonEmpty.last lst of
|
||||
(spos, t) ->
|
||||
case T.length (T.takeWhileEnd (=='\n') t) of
|
||||
len | len >= n -> Sources xs
|
||||
| otherwise -> Sources (NonEmpty.init lst ++
|
||||
[(spos,
|
||||
t <> T.replicate (n - len) "\n")])
|
||||
|
||||
class UpdateSourcePos s c where
|
||||
updateSourcePos :: SourcePos -> c -> s -> SourcePos
|
||||
|
||||
instance UpdateSourcePos Text Char where
|
||||
updateSourcePos pos c _ = updatePosChar pos c
|
||||
|
||||
instance UpdateSourcePos Sources Char where
|
||||
updateSourcePos pos c sources =
|
||||
case sources of
|
||||
Sources [] -> updatePosChar pos c
|
||||
Sources ((_,t):(pos',_):_)
|
||||
| T.null t -> pos'
|
||||
Sources _ ->
|
||||
case c of
|
||||
'\n' -> incSourceLine (setSourceColumn pos 1) 1
|
||||
'\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
|
||||
_ -> incSourceColumn pos 1
|
||||
|
||||
-- | Get name of first source in 'Sources'.
|
||||
initialSourceName :: Sources -> FilePath
|
||||
initialSourceName (Sources []) = ""
|
||||
initialSourceName (Sources ((pos,_):_)) = sourceName pos
|
||||
|
||||
-- | Add some text to the beginning of the input sources.
|
||||
-- This simplifies code that expands macros.
|
||||
addToInput :: Monad m => Text -> ParsecT Sources u m ()
|
||||
addToInput t = do
|
||||
Sources xs <- P.getInput
|
||||
case xs of
|
||||
[] -> P.setInput $ Sources [(initialPos "",t)]
|
||||
(pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
|
||||
|
||||
-- We need to redefine the parsers in Text.Parsec.Char so that they
|
||||
-- update source positions properly from the Sources stream.
|
||||
|
||||
satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> (Char -> Bool) -> ParsecT s u m Char
|
||||
satisfy f = P.tokenPrim show updateSourcePos matcher
|
||||
where
|
||||
matcher c = if f c then Just c else Nothing
|
||||
|
||||
oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> [Char] -> ParsecT s u m Char
|
||||
oneOf cs = satisfy (`elem` cs)
|
||||
|
||||
noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> [Char] -> ParsecT s u m Char
|
||||
noneOf cs = satisfy (`notElem` cs)
|
||||
|
||||
anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
anyChar = satisfy (const True)
|
||||
|
||||
char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> Char -> ParsecT s u m Char
|
||||
char c = satisfy (== c)
|
||||
|
||||
string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> [Char] -> ParsecT s u m [Char]
|
||||
string = mapM char
|
||||
|
||||
newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
newline = satisfy (== '\n')
|
||||
|
||||
space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
space = satisfy isSpace
|
||||
|
||||
spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m ()
|
||||
spaces = P.skipMany space P.<?> "white space"
|
||||
|
||||
letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
letter = satisfy isLetter
|
||||
|
||||
alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
alphaNum = satisfy isAlphaNum
|
||||
|
||||
digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
digit = satisfy isDigit
|
||||
|
||||
hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
|
||||
=> ParsecT s u m Char
|
||||
hexDigit = satisfy isHexDigit
|
|
@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
|
|||
, testGroup "lhs"
|
||||
[ test (purely $ readMarkdown def{ readerExtensions = enableExtension
|
||||
Ext_literate_haskell pandocExtensions })
|
||||
"inverse bird tracks and html" $
|
||||
"> a\n\n< b\n\n<div>\n"
|
||||
"inverse bird tracks and html"
|
||||
$ ("> a\n\n< b\n\n<div>\n" :: Text)
|
||||
=?> codeBlockWith ("",["haskell","literate"],[]) "a"
|
||||
<>
|
||||
codeBlockWith ("",["haskell"],[]) "b"
|
||||
|
|
Loading…
Add table
Reference in a new issue