Working on readers.

This commit is contained in:
Jesse Rosenthal 2016-11-28 17:13:46 -05:00 committed by John MacFarlane
parent 840439ab2a
commit b53ebcdf8e
26 changed files with 1324 additions and 1153 deletions

View file

@ -69,7 +69,6 @@ module Text.Pandoc
, writers
-- * Readers: converting /to/ Pandoc format
, Reader (..)
, mkStringReader
, readDocx
, readOdt
, readMarkdown
@ -183,7 +182,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion)
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Error
import Text.Pandoc.Class (PandocMonad, runIOorExplode)
import Text.Pandoc.Class (PandocMonad, runIOorExplode, PandocExecutionError(..))
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@ -192,6 +191,7 @@ import qualified Data.Set as Set
import Text.Parsec
import Text.Parsec.Error
import qualified Text.Pandoc.UTF8 as UTF8
import Control.Monad.Except (throwError)
parseFormatSpec :: String
-> Either ParseError (String, Set Extension -> Set Extension)
@ -216,55 +216,58 @@ parseFormatSpec = parse formatSpec ""
-- TODO: when we get the PandocMonad stuff all sorted out,
-- we can simply these types considerably. Errors/MediaBag can be
-- part of the monad's internal state.
data Reader m = StringReader (ReaderOptions -> String -> m (Either PandocError Pandoc))
| ByteStringReader (ReaderOptions -> BL.ByteString -> m (Either PandocError (Pandoc,MediaBag)))
data Reader m = StringReader (ReaderOptions -> String -> m Pandoc)
| ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
mkStringReader :: (ReaderOptions -> String -> Either PandocError Pandoc) -> Reader IO
mkStringReader r = StringReader (\o s -> return $ r o s)
-- mkStringReader :: (ReaderOptions -> String -> Either PandocError Pandoc) -> Reader IO
-- mkStringReader r = StringReader (\o s -> return $ r o s)
mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader IO
mkStringReaderWithWarnings r = StringReader $ \o s ->
case r o s of
Left err -> return $ Left err
Right (doc, warnings) -> do
mapM_ warn warnings
return (Right doc)
-- mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader IO
-- mkStringReaderWithWarnings r = StringReader $ \o s ->
-- case r o s of
-- Left err -> return $ Left err
-- Right (doc, warnings) -> do
-- mapM_ warn warnings
-- return (Right doc)
mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader IO
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader IO
-- mkBSReader r = ByteStringReader (\o s -> return $ r o s)
mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader IO
mkBSReaderWithWarnings r = ByteStringReader $ \o s ->
case r o s of
Left err -> return $ Left err
Right (doc, mediaBag, warnings) -> do
mapM_ warn warnings
return $ Right (doc, mediaBag)
-- mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader IO
-- mkBSReaderWithWarnings r = ByteStringReader $ \o s ->
-- case r o s of
-- Left err -> return $ Left err
-- Right (doc, mediaBag, warnings) -> do
-- mapM_ warn warnings
-- return $ Right (doc, mediaBag)
-- | Association list of formats and readers.
readers :: [(String, Reader IO)]
readers = [ ("native" , StringReader $ \_ s -> runIOorExplode (readNative s))
,("json" , mkStringReader readJSON )
,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings)
,("commonmark" , mkStringReader readCommonMark)
,("rst" , mkStringReaderWithWarnings readRSTWithWarnings )
,("mediawiki" , mkStringReader readMediaWiki)
,("docbook" , mkStringReader readDocBook)
,("opml" , mkStringReader readOPML)
,("org" , mkStringReader readOrg)
,("textile" , mkStringReader readTextile) -- TODO : textile+lhs
,("html" , mkStringReader readHtml)
,("latex" , mkStringReader readLaTeX)
,("haddock" , mkStringReader readHaddock)
,("twiki" , mkStringReader readTWiki)
,("docx" , mkBSReaderWithWarnings readDocxWithWarnings)
,("odt" , mkBSReader readOdt)
,("t2t" , mkStringReader readTxt2TagsNoMacros)
,("epub" , mkBSReader readEPUB)
readers :: PandocMonad m => [(String, Reader m)]
readers = [ ("native" , StringReader $ \_ s -> readNative s)
,("json" , StringReader $ \o s ->
case readJSON o s of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "JSON parse error")
,("markdown" , StringReader readMarkdown)
,("markdown_strict" , StringReader readMarkdown)
,("markdown_phpextra" , StringReader readMarkdown)
,("markdown_github" , StringReader readMarkdown)
,("markdown_mmd", StringReader readMarkdown)
,("commonmark" , StringReader readCommonMark)
,("rst" , StringReader readRSTWithWarnings )
,("mediawiki" , StringReader readMediaWiki)
,("docbook" , StringReader readDocBook)
,("opml" , StringReader readOPML)
,("org" , StringReader readOrg)
,("textile" , StringReader readTextile) -- TODO : textile+lhs
,("html" , StringReader readHtml)
,("latex" , StringReader readLaTeX)
,("haddock" , StringReader readHaddock)
,("twiki" , StringReader readTWiki)
,("docx" , ByteStringReader readDocx)
,("odt" , ByteStringReader readOdt)
-- ,("t2t" , mkStringReader readTxt2TagsNoMacros)
,("epub" , ByteStringReader readEPUB)
]
data Writer m = StringWriter (WriterOptions -> Pandoc -> m String)
@ -351,7 +354,7 @@ getDefaultExtensions "epub" = Set.fromList [Ext_raw_html,
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
-- | Retrieve reader based on formatSpec (format+extensions).
getReader :: String -> Either String (Reader IO)
getReader :: PandocMonad m => String -> Either String (Reader m)
getReader s =
case parseFormatSpec s of
Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]

View file

@ -35,6 +35,7 @@ module Text.Pandoc.Class ( PandocMonad(..)
, PureState(..)
, PureEnv(..)
, getPOSIXTime
, addWarningWithPos
, PandocIO(..)
, PandocPure(..)
, PandocExecutionError(..)
@ -57,6 +58,7 @@ import qualified Text.Pandoc.Shared as IO ( fetchItem
, warn
, readDataFile)
import Text.Pandoc.Compat.Time (UTCTime)
import Text.Pandoc.Parsing (ParserT, ParserState, SourcePos)
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
, posixSecondsToUTCTime
@ -109,6 +111,14 @@ class (Functor m, Applicative m, Monad m, MonadError PandocExecutionError m) =>
getPOSIXTime :: (PandocMonad m) => m POSIXTime
getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
addWarningWithPos :: PandocMonad m
=> Maybe SourcePos
-> String
-> ParserT [Char] ParserState m ()
addWarningWithPos mbpos msg =
lift $
warn $
msg ++ maybe "" (\pos -> " " ++ show pos) mbpos
-- We can add to this as we go
data PandocExecutionError = PandocFileReadError FilePath

View file

@ -37,11 +37,12 @@ import Data.Text (unpack, pack)
import Data.List (groupBy)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Error
import Text.Pandoc.Class (PandocMonad)
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc
readCommonMark opts s = return $
nodeToPandoc $ commonmarkToNode opts' $ pack s
where opts' = if readerSmart opts
then [optNormalize, optSmart]
else [optNormalize]

View file

@ -13,10 +13,9 @@ import Control.Monad.State
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.Error (PandocError)
import Control.Monad.Except
import Data.Default
import Data.Foldable (asum)
import Text.Pandoc.Class (PandocMonad)
{-
@ -502,7 +501,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
type DB = ExceptT PandocError (State DBState)
type DB m = StateT DBState m
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@ -523,10 +522,11 @@ instance Default DBState where
, dbContent = [] }
readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree
tree = normalizeTree . parseXML . handleInstructions $ inp
readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc
readDocBook _ inp = do
let tree = normalizeTree . parseXML . handleInstructions $ inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
@ -538,7 +538,7 @@ handleInstructions xs = case break (=='<') xs of
([], '<':zs) -> '<' : handleInstructions zs
(ys, zs) -> ys ++ handleInstructions zs
getFigure :: Element -> DB Blocks
getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
tit <- case filterChild (named "title") e of
Just t -> getInlines t
@ -579,20 +579,20 @@ named s e = qName (elName e) == s
--
acceptingMetadata :: DB a -> DB a
acceptingMetadata :: PandocMonad m => DB m a -> DB m a
acceptingMetadata p = do
modify (\s -> s { dbAcceptsMeta = True } )
res <- p
modify (\s -> s { dbAcceptsMeta = False })
return res
checkInMeta :: Monoid a => DB () -> DB a
checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
checkInMeta p = do
accepts <- dbAcceptsMeta <$> get
when accepts p
return mempty
addMeta :: ToMetaValue a => String -> a -> DB ()
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
addMeta field val = modify (setMeta field val)
instance HasMeta DBState where
@ -631,7 +631,7 @@ addToStart toadd bs =
-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
-- A DocBook mediaobject is a wrapper around a set of alternative presentations
getMediaobject :: Element -> DB Inlines
getMediaobject :: PandocMonad m => Element -> DB m Inlines
getMediaobject e = do
(imageUrl, attr) <-
case filterChild (named "imageobject") e of
@ -658,11 +658,11 @@ getMediaobject e = do
else (return figTitle, "fig:")
liftM (imageWith attr imageUrl title) caption
getBlocks :: Element -> DB Blocks
getBlocks :: PandocMonad m => Element -> DB m Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
parseBlock :: Content -> DB Blocks
parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
@ -902,7 +902,7 @@ parseBlock (Elem e) =
lineItems = mapM getInlines $ filterChildren (named "line") e
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: Element -> DB Inlines
getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
strContentRecursive :: Element -> String
@ -913,7 +913,7 @@ elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: Content -> DB Inlines
parseInline :: PandocMonad m => Content -> DB m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref

View file

@ -82,7 +82,7 @@ import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Text.Pandoc.MediaBag (MediaBag)
import Data.List (delete, intersect)
import Text.TeXMath (writeTeX)
import Data.Default (Default)
@ -96,27 +96,28 @@ import qualified Data.Sequence as Seq (null)
#if !(MIN_VERSION_base(4,8,0))
import Data.Traversable (traverse)
#endif
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Error
import Control.Monad.Except
readDocxWithWarnings :: ReaderOptions
-> B.ByteString
-> Either PandocError (Pandoc, MediaBag, [String])
readDocxWithWarnings opts bytes
readDocx :: PandocMonad m
=> ReaderOptions
-> B.ByteString
-> m Pandoc
readDocx opts bytes
| Right archive <- toArchiveOrFail bytes
, Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
(meta, blks, mediaBag, warnings) <- docxToOutput opts docx
return (Pandoc meta blks, mediaBag, parserWarnings ++ warnings)
readDocxWithWarnings _ _ =
Left (ParseFailure "couldn't parse docx file")
mapM_ P.warn parserWarnings
(meta, blks) <- docxToOutput opts docx
return $ Pandoc meta blks
readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
readDocx :: ReaderOptions
readDocxWithWarnings :: PandocMonad m
=> ReaderOptions
-> B.ByteString
-> Either PandocError (Pandoc, MediaBag)
readDocx opts bytes = do
(pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes
return (pandoc, mediaBag)
-> m Pandoc
readDocxWithWarnings = readDocx
data DState = DState { docxAnchorMap :: M.Map String String
, docxMediaBag :: MediaBag
@ -137,15 +138,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions
instance Default DEnv where
def = DEnv def False
type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState))
type DocxContext m = ReaderT DEnv (StateT DState m)
evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a
evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx
addDocxWarning :: String -> DocxContext ()
addDocxWarning msg = do
warnings <- gets docxWarnings
modify $ \s -> s {docxWarnings = msg : warnings}
evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $ flip runReaderT env $ ctx
-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
@ -179,7 +175,7 @@ isEmptyPar (Paragraph _ parParts) =
isEmptyElem _ = True
isEmptyPar _ = False
bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
@ -195,7 +191,7 @@ bodyPartsToMeta' (bp : bps)
return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
mp <- bodyPartsToMeta' bps
let mp' =
@ -297,7 +293,7 @@ runStyleToTransform rPr
emph . (runStyleToTransform rPr {rUnderline = Nothing})
| otherwise = id
runToInlines :: Run -> DocxContext Inlines
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
, s `elem` codeStyles =
@ -318,8 +314,7 @@ runToInlines (Endnote bps) = do
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
mediaBag <- gets docxMediaBag
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
@ -330,7 +325,7 @@ extentToAttr (Just (w, h)) =
showDim d = show (d / 914400) ++ "in"
extentToAttr _ = nullAttr
blocksToInlinesWarn :: String -> Blocks -> DocxContext Inlines
blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
@ -338,10 +333,10 @@ blocksToInlinesWarn cmtId blks = do
notParaOrPlain (Plain _) = False
notParaOrPlain _ = True
when (not $ null $ filter notParaOrPlain blkList)
(addDocxWarning $ "Docx comment " ++ cmtId ++ " will not retain formatting")
((lift . lift) $ P.warn $ "Docx comment " ++ cmtId ++ " will not retain formatting")
return $ fromList $ blocksToInlines blkList
parPartToInlines :: ParPart -> DocxContext Inlines
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
opts <- asks docxOptions
@ -403,8 +398,7 @@ parPartToInlines (BookMark _ anchor) =
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines (Drawing fp title alt bs ext) = do
mediaBag <- gets docxMediaBag
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
parPartToInlines Chart = do
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
@ -426,10 +420,10 @@ isAnchorSpan _ = False
dummyAnchors :: [String]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: Blocks -> DocxContext Blocks
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
makeHeaderAnchor' :: Block -> DocxContext Block
makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
-- If there is an anchor already there (an anchor span in the header,
-- to be exact), we rename and associate the new id with the old one.
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
@ -463,12 +457,12 @@ singleParaToPlain blks
singleton $ Plain ils
singleParaToPlain blks = blks
cellToBlocks :: Cell -> DocxContext Blocks
cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
cellToBlocks (Cell bps) = do
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: Row -> DocxContext [Blocks]
rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
rowToBlocksList (Row cells) = do
blksList <- mapM cellToBlocks cells
return $ map singleParaToPlain blksList
@ -518,7 +512,7 @@ parStyleToTransform pPr
False -> parStyleToTransform pPr'
parStyleToTransform _ = id
bodyPartToBlocks :: BodyPart -> DocxContext Blocks
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
| not $ null $ codeDivs `intersect` (pStyle pPr) =
return
@ -597,7 +591,7 @@ bodyPartToBlocks (OMathPara e) = do
-- replace targets with generated anchors.
rewriteLink' :: Inline -> DocxContext Inline
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils ('#':target, title)) = do
anchorMap <- gets docxAnchorMap
return $ case M.lookup target anchorMap of
@ -605,23 +599,21 @@ rewriteLink' l@(Link attr ils ('#':target, title)) = do
Nothing -> l
rewriteLink' il = return il
rewriteLinks :: [Block] -> DocxContext [Block]
rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')
bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag, [String])
bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
mediaBag <- gets docxMediaBag
warnings <- gets docxWarnings
return $ (meta,
blks',
mediaBag,
warnings)
return $ (meta, blks')
docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag, [String])
docxToOutput :: PandocMonad m
=> ReaderOptions
-> Docx
-> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def

View file

@ -11,13 +11,12 @@ module Text.Pandoc.Readers.EPUB
import Text.XML.Light
import Text.Pandoc.Definition hiding (Attr)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Error
import Text.Pandoc.Walk (walk, query)
import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
import Network.URI (unEscapeString)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Control.Monad.Except (MonadError, throwError, runExcept, Except)
import Control.Monad.Except (throwError)
import Text.Pandoc.MIME (MimeType)
import qualified Text.Pandoc.Builder as B
import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry
@ -33,23 +32,25 @@ import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Map as M (Map, lookup, fromList, elems)
import Data.Monoid ((<>))
import Control.DeepSeq (deepseq, NFData)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
import Debug.Trace (trace)
type Items = M.Map String (FilePath, MimeType)
readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB opts bytes = case toArchiveOrFail bytes of
Right archive -> runEPUB $ archiveToEPUB opts $ archive
Left _ -> Left $ ParseFailure "Couldn't extract ePub file"
Right archive -> archiveToEPUB opts $ archive
Left _ -> throwError $ PandocParseError "Couldn't extract ePub file"
runEPUB :: Except PandocError a -> Either PandocError a
runEPUB = runExcept
-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept
-- Note that internal reference are aggresively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
@ -63,24 +64,21 @@ archiveToEPUB os archive = do
foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
`liftM` parseSpineElem root b) mempty spine
let ast = coverDoc <> (Pandoc meta bs)
let mediaBag = fetchImages (M.elems items) root archive ast
return $ (ast, mediaBag)
P.setMediaBag $ fetchImages (M.elems items) root archive ast
return ast
where
os' = os {readerParseRaw = True}
parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (unEscapeString -> root)
(unEscapeString -> path) = do
fname <- findEntryByPathE (root </> path) archive
html <- either throwError return .
readHtml os' .
UTF8.toStringLazy $
fromEntry fname
html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname
return $ fixInternalReferences path html
mimeToReader s _ (unEscapeString -> path)
| s `elem` imageMimes = return $ imageToPandoc path
@ -121,7 +119,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items)
parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items)
parseManifest content = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
@ -137,7 +135,7 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
@ -148,7 +146,7 @@ parseSpine is e = do
guard linear
findAttr (emptyName "idref") ref
parseMeta :: MonadError PandocError m => Element -> m Meta
parseMeta :: PandocMonad m => Element -> m Meta
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
@ -166,7 +164,7 @@ renameMeta :: String -> String
renameMeta "creator" = "author"
renameMeta s = s
getManifest :: MonadError PandocError m => Archive -> m (String, Element)
getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
@ -268,18 +266,18 @@ emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
findAttrE :: MonadError PandocError m => QName -> Element -> m String
findAttrE :: PandocMonad m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry
findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
parseXMLDocE :: MonadError PandocError m => String -> m Element
parseXMLDocE :: PandocMonad m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
findElementE :: MonadError PandocError m => QName -> Element -> m Element
findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
mkE :: MonadError PandocError m => String -> Maybe a -> m a
mkE s = maybe (throwError . ParseFailure $ s) return
mkE :: PandocMonad m => String -> Maybe a -> m a
mkE s = maybe (throwError . PandocParseError $ s) return

View file

@ -44,7 +44,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
, escapeURI, safeRead, mapLeft )
, escapeURI, safeRead )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
Ext_native_divs, Ext_native_spans))
@ -62,38 +62,46 @@ import Text.Printf (printf)
import Debug.Trace (trace)
import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift)
import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
import Text.Pandoc.Error
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Data.Monoid ((<>))
import Text.Parsec.Error
import qualified Data.Set as Set
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import Control.Monad.Except (throwError)
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
readHtml :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Either PandocError Pandoc
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
runParserT parseDoc
(HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
"source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
getError (errorMessages -> ms) = case ms of
[] -> ""
(m:_) -> messageString m
-> m Pandoc
readHtml opts inp = do
let tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
getError (errorMessages -> ms) = case ms of
[] -> ""
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
(HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
"source" tags
case result of
Right doc -> return doc
Left err -> throwError $ PandocParseError $ getError err
where
replaceNotes :: [Block] -> TagParser [Block]
replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes = walkM replaceNotes'
replaceNotes' :: Inline -> TagParser Inline
replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline
replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
where
getNotes = noteTable <$> getState
@ -113,20 +121,20 @@ data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
, inPlain :: Bool -- ^ Set if in pPlain
}
setInChapter :: HTMLParser s a -> HTMLParser s a
setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInChapter = local (\s -> s {inChapter = True})
setInPlain :: HTMLParser s a -> HTMLParser s a
setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInPlain = local (\s -> s {inPlain = True})
type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
type TagParser = HTMLParser [Tag String]
type TagParser m = HTMLParser m [Tag String]
pBody :: TagParser Blocks
pBody :: PandocMonad m => TagParser m Blocks
pBody = pInTags "body" block
pHead :: TagParser Blocks
pHead :: PandocMonad m => TagParser m Blocks
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
where pTitle = pInTags "title" inline >>= setTitle . trimInlines
setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
@ -149,7 +157,7 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
parseURIReference $ fromAttrib "href" bt }
return mempty
block :: TagParser Blocks
block :: PandocMonad m => TagParser m Blocks
block = do
tr <- getOption readerTrace
pos <- getPosition
@ -176,13 +184,16 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
namespaces :: [(String, TagParser Inlines)]
namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
mathMLNamespace :: String
mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a
eSwitch :: (PandocMonad m, Monoid a)
=> (Inlines -> a)
-> TagParser m a
-> TagParser m a
eSwitch constructor parser = try $ do
guardEnabled Ext_epub_html_exts
pSatisfy (~== TagOpen "switch" [])
@ -195,7 +206,7 @@ eSwitch constructor parser = try $ do
pSatisfy (~== TagClose "switch")
return $ maybe fallback constructor cases
eCase :: TagParser (Maybe Inlines)
eCase :: PandocMonad m => TagParser m (Maybe Inlines)
eCase = do
skipMany pBlank
TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
@ -203,7 +214,7 @@ eCase = do
Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
eFootnote :: TagParser ()
eFootnote :: PandocMonad m => TagParser m ()
eFootnote = try $ do
let notes = ["footnote", "rearnote"]
guardEnabled Ext_epub_html_exts
@ -213,10 +224,10 @@ eFootnote = try $ do
content <- pInTags tag block
addNote ident content
addNote :: String -> Blocks -> TagParser ()
addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
eNoteref :: TagParser Inlines
eNoteref :: PandocMonad m => TagParser m Inlines
eNoteref = try $ do
guardEnabled Ext_epub_html_exts
TagOpen tag attr <- lookAhead $ pAnyTag
@ -227,17 +238,17 @@ eNoteref = try $ do
return $ B.rawInline "noteref" ident
-- Strip TOC if there is one, better to generate again
eTOC :: TagParser ()
eTOC :: PandocMonad m => TagParser m ()
eTOC = try $ do
guardEnabled Ext_epub_html_exts
(TagOpen tag attr) <- lookAhead $ pAnyTag
guard (maybe False (== "toc") (lookup "type" attr))
void (pInTags tag block)
pList :: TagParser Blocks
pList :: PandocMonad m => TagParser m Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
pBulletList :: TagParser Blocks
pBulletList :: PandocMonad m => TagParser m Blocks
pBulletList = try $ do
pSatisfy (~== TagOpen "ul" [])
let nonItem = pSatisfy (\t ->
@ -249,7 +260,7 @@ pBulletList = try $ do
items <- manyTill (pListItem nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items
pListItem :: TagParser a -> TagParser Blocks
pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
pListItem nonItem = do
TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
@ -271,7 +282,7 @@ parseTypeAttr "A" = UpperAlpha
parseTypeAttr "1" = Decimal
parseTypeAttr _ = DefaultStyle
pOrderedList :: TagParser Blocks
pOrderedList :: PandocMonad m => TagParser m Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
let (start, style) = (sta', sty')
@ -302,13 +313,13 @@ pOrderedList = try $ do
items <- manyTill (pListItem nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
pDefinitionList :: TagParser Blocks
pDefinitionList :: PandocMonad m => TagParser m Blocks
pDefinitionList = try $ do
pSatisfy (~== TagOpen "dl" [])
items <- manyTill pDefListItem (pCloses "dl")
return $ B.definitionList items
pDefListItem :: TagParser (Inlines, [Blocks])
pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks])
pDefListItem = try $ do
let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
@ -334,7 +345,7 @@ fixPlains inList bs = if any isParaish bs'
plainToPara x = x
bs' = B.toList bs
pRawTag :: TagParser String
pRawTag :: PandocMonad m => TagParser m String
pRawTag = do
tag <- pAnyTag
let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
@ -342,7 +353,7 @@ pRawTag = do
then return []
else return $ renderTags' [tag]
pDiv :: TagParser Blocks
pDiv :: PandocMonad m => TagParser m Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
let isDivLike "div" = True
@ -356,7 +367,7 @@ pDiv = try $ do
else classes
return $ B.divWith (ident, classes', kvs) contents
pRawHtmlBlock :: TagParser Blocks
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
parseRaw <- getOption readerParseRaw
@ -364,21 +375,21 @@ pRawHtmlBlock = do
then return $ B.rawBlock "html" raw
else return mempty
pHtmlBlock :: String -> TagParser String
pHtmlBlock :: PandocMonad m => String -> TagParser m String
pHtmlBlock t = try $ do
open <- pSatisfy (~== TagOpen t [])
contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
return $ renderTags' $ [open] ++ contents ++ [TagClose t]
-- Sets chapter context
eSection :: TagParser Blocks
eSection :: PandocMonad m => TagParser m Blocks
eSection = try $ do
let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
let sectTag = tagOpen (`elem` sectioningContent) matchChapter
TagOpen tag _ <- lookAhead $ pSatisfy sectTag
setInChapter (pInTags tag block)
headerLevel :: String -> TagParser Int
headerLevel :: PandocMonad m => String -> TagParser m Int
headerLevel tagtype = do
let level = read (drop 1 tagtype)
(try $ do
@ -388,7 +399,7 @@ headerLevel tagtype = do
<|>
return level
eTitlePage :: TagParser ()
eTitlePage :: PandocMonad m => TagParser m ()
eTitlePage = try $ do
let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as)
let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")
@ -396,7 +407,7 @@ eTitlePage = try $ do
TagOpen tag _ <- lookAhead $ pSatisfy groupTag
() <$ pInTags tag block
pHeader :: TagParser Blocks
pHeader :: PandocMonad m => TagParser m Blocks
pHeader = try $ do
TagOpen tagtype attr <- pSatisfy $
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
@ -412,12 +423,12 @@ pHeader = try $ do
then mempty -- skip a representation of the title in the body
else B.headerWith attr' level contents
pHrule :: TagParser Blocks
pHrule :: PandocMonad m => TagParser m Blocks
pHrule = do
pSelfClosing (=="hr") (const True)
return B.horizontalRule
pTable :: TagParser Blocks
pTable :: PandocMonad m => TagParser m Blocks
pTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
@ -456,7 +467,7 @@ pTable = try $ do
else widths'
return $ B.table caption (zip aligns widths) head' rows
pCol :: TagParser Double
pCol :: PandocMonad m => TagParser m Double
pCol = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
skipMany pBlank
@ -472,7 +483,7 @@ pCol = try $ do
fromMaybe 0.0 $ safeRead ('0':'.':init x)
_ -> 0.0
pColgroup :: TagParser [Double]
pColgroup :: PandocMonad m => TagParser m [Double]
pColgroup = try $ do
pSatisfy (~== TagOpen "colgroup" [])
skipMany pBlank
@ -485,31 +496,31 @@ noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
"1" -> True
_ -> False
pCell :: String -> TagParser [Blocks]
pCell :: PandocMonad m => String -> TagParser m [Blocks]
pCell celltype = try $ do
skipMany pBlank
res <- pInTags' celltype noColOrRowSpans block
skipMany pBlank
return [res]
pBlockQuote :: TagParser Blocks
pBlockQuote :: PandocMonad m => TagParser m Blocks
pBlockQuote = do
contents <- pInTags "blockquote" block
return $ B.blockQuote $ fixPlains False contents
pPlain :: TagParser Blocks
pPlain :: PandocMonad m => TagParser m Blocks
pPlain = do
contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
if B.isNull contents
then return mempty
else return $ B.plain contents
pPara :: TagParser Blocks
pPara :: PandocMonad m => TagParser m Blocks
pPara = do
contents <- trimInlines <$> pInTags "p" inline
return $ B.para contents
pCodeBlock :: TagParser Blocks
pCodeBlock :: PandocMonad m => TagParser m Blocks
pCodeBlock = try $ do
TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
@ -529,7 +540,7 @@ tagToString (TagText s) = s
tagToString (TagOpen "br" _) = "\n"
tagToString _ = ""
inline :: TagParser Inlines
inline :: PandocMonad m => TagParser m Inlines
inline = choice
[ eNoteref
, eSwitch id inline
@ -549,30 +560,31 @@ inline = choice
, pRawHtmlInline
]
pLocation :: TagParser ()
pLocation :: PandocMonad m => TagParser m ()
pLocation = do
(TagPosition r c) <- pSat isTagPosition
setPosition $ newPos "input" r c
pSat :: (Tag String -> Bool) -> TagParser (Tag String)
pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
pSat f = do
pos <- getPosition
token show (const pos) (\x -> if f x then Just x else Nothing)
pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String)
pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
pSatisfy f = try $ optional pLocation >> pSat f
pAnyTag :: TagParser (Tag String)
pAnyTag :: PandocMonad m => TagParser m (Tag String)
pAnyTag = pSatisfy (const True)
pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool)
-> TagParser (Tag String)
pSelfClosing :: PandocMonad m
=> (String -> Bool) -> ([Attribute String] -> Bool)
-> TagParser m (Tag String)
pSelfClosing f g = do
open <- pSatisfy (tagOpen f g)
optional $ pSatisfy (tagClose f)
return open
pQ :: TagParser Inlines
pQ :: PandocMonad m => TagParser m Inlines
pQ = do
context <- asks quoteContext
let quoteType = case context of
@ -587,19 +599,19 @@ pQ = do
withQuoteContext innerQuoteContext $
pInlinesInTags "q" constructor
pEmph :: TagParser Inlines
pEmph :: PandocMonad m => TagParser m Inlines
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
pStrong :: TagParser Inlines
pStrong :: PandocMonad m => TagParser m Inlines
pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
pSuperscript :: TagParser Inlines
pSuperscript :: PandocMonad m => TagParser m Inlines
pSuperscript = pInlinesInTags "sup" B.superscript
pSubscript :: TagParser Inlines
pSubscript :: PandocMonad m => TagParser m Inlines
pSubscript = pInlinesInTags "sub" B.subscript
pStrikeout :: TagParser Inlines
pStrikeout :: PandocMonad m => TagParser m Inlines
pStrikeout = do
pInlinesInTags "s" B.strikeout <|>
pInlinesInTags "strike" B.strikeout <|>
@ -608,7 +620,7 @@ pStrikeout = do
contents <- mconcat <$> manyTill inline (pCloses "span")
return $ B.strikeout contents)
pLineBreak :: TagParser Inlines
pLineBreak :: PandocMonad m => TagParser m Inlines
pLineBreak = do
pSelfClosing (=="br") (const True)
return B.linebreak
@ -619,7 +631,7 @@ maybeFromAttrib :: String -> Tag String -> Maybe String
maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
maybeFromAttrib _ _ = Nothing
pLink :: TagParser Inlines
pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
let title = fromAttrib "title" tag
@ -639,7 +651,7 @@ pLink = try $ do
_ -> url'
return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
pImage :: TagParser Inlines
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
mbBaseHref <- baseHref <$> getState
@ -657,13 +669,13 @@ pImage = do
let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
pCode :: TagParser Inlines
pCode :: PandocMonad m => TagParser m Inlines
pCode = try $ do
(TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
result <- manyTill pAnyTag (pCloses open)
return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
pSpan :: TagParser Inlines
pSpan :: PandocMonad m => TagParser m Inlines
pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
@ -674,7 +686,7 @@ pSpan = try $ do
let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
return $ tag contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline :: PandocMonad m => TagParser m Inlines
pRawHtmlInline = do
inplain <- asks inPlain
result <- pSatisfy (tagComment (const True))
@ -689,7 +701,7 @@ pRawHtmlInline = do
mathMLToTeXMath :: String -> Either String String
mathMLToTeXMath s = writeTeX <$> readMathML s
pMath :: Bool -> TagParser Inlines
pMath :: PandocMonad m => Bool -> TagParser m Inlines
pMath inCase = try $ do
open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
-- we'll assume math tags are MathML unless specially marked
@ -705,22 +717,25 @@ pMath inCase = try $ do
Just "block" -> B.displayMath x
_ -> B.math x
pInlinesInTags :: String -> (Inlines -> Inlines)
-> TagParser Inlines
pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines)
-> TagParser m Inlines
pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pInTags :: (Monoid a) => String -> TagParser a -> TagParser a
pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a
pInTags tagtype parser = pInTags' tagtype (const True) parser
pInTags' :: (Monoid a) => String -> (Tag String -> Bool) -> TagParser a
-> TagParser a
pInTags' :: (PandocMonad m, Monoid a)
=> String
-> (Tag String -> Bool)
-> TagParser m a
-> TagParser m a
pInTags' tagtype tagtest parser = try $ do
pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-- parses p, preceeded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: String -> TagParser a -> TagParser a
pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a
pOptInTag tagtype p = try $ do
skipMany pBlank
optional $ pSatisfy (~== TagOpen tagtype [])
@ -731,7 +746,7 @@ pOptInTag tagtype p = try $ do
skipMany pBlank
return x
pCloses :: String -> TagParser ()
pCloses :: PandocMonad m => String -> TagParser m ()
pCloses tagtype = try $ do
t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
case t of
@ -744,23 +759,25 @@ pCloses tagtype = try $ do
(TagClose "table") | tagtype == "tr" -> return ()
_ -> mzero
pTagText :: TagParser Inlines
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
case flip runReader qu $ runParserT (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
parsed <- lift $ lift $
flip runReaderT qu $ runParserT (many pTagContents) st "text" str
case parsed of
Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'"
Right result -> return $ mconcat result
pBlank :: TagParser ()
pBlank :: PandocMonad m => TagParser m ()
pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
type InlinesParser = HTMLParser String
type InlinesParser m = HTMLParser m String
pTagContents :: InlinesParser Inlines
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@ -770,7 +787,7 @@ pTagContents =
<|> pSymbol
<|> pBad
pStr :: InlinesParser Inlines
pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
@ -789,13 +806,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
pSymbol :: InlinesParser Inlines
pSymbol :: PandocMonad m => InlinesParser m Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
pBad :: InlinesParser Inlines
pBad :: PandocMonad m => InlinesParser m Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@ -829,7 +846,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
pSpace :: InlinesParser Inlines
pSpace :: PandocMonad m => InlinesParser m Inlines
pSpace = many1 (satisfy isSpace) >>= \xs ->
if '\n' `elem` xs
then return B.softbreak
@ -1070,7 +1087,7 @@ instance HasHeaderMap HTMLState where
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance HasQuoteContext st (Reader HTMLLocal) where
instance PandocMonad m => HasQuoteContext st (ReaderT HTMLLocal m) where
getQuoteContext = asks quoteContext
withQuoteContext q = local (\s -> s{quoteContext = q})

View file

@ -25,14 +25,23 @@ import Text.Pandoc.Options
import Documentation.Haddock.Parser
import Documentation.Haddock.Types
import Debug.Trace (trace)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import Text.Pandoc.Error
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Either PandocError Pandoc
readHaddock opts =
readHaddock :: PandocMonad m
=> ReaderOptions
-> String
-> m Pandoc
readHaddock opts s = case readHaddockEither opts s of
Right result -> return result
Left e -> throwError e
readHaddockEither :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Either PandocExecutionError Pandoc
readHaddockEither opts =
#if MIN_VERSION_haddock_library(1,2,0)
Right . B.doc . docHToBlocks . trace' . _doc . parseParas
#else

View file

@ -56,14 +56,21 @@ import qualified Control.Exception as E
import Text.Pandoc.Highlighting (fromListingsLanguage)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Error
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..), PandocPure)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ReaderOptions -- ^ Reader options
readLaTeX :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Either PandocError Pandoc
readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
-> m Pandoc
readLaTeX opts ltx = do
parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "parsing error"
parseLaTeX :: LP Pandoc
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
@ -72,9 +79,9 @@ parseLaTeX = do
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
type LP = Parser String ParserState
type LP m = ParserT String ParserState m
anyControlSeq :: LP String
anyControlSeq :: PandocMonad m => LP m String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
@ -83,7 +90,7 @@ anyControlSeq = do
c | isLetter c -> (c:) <$> (many letter <* optional sp)
| otherwise -> return [c]
controlSeq :: String -> LP String
controlSeq :: PandocMonad m => String -> LP m String
controlSeq name = try $ do
char '\\'
case name of
@ -92,26 +99,26 @@ controlSeq name = try $ do
cs -> string cs <* notFollowedBy letter <* optional sp
return name
dimenarg :: LP String
dimenarg :: PandocMonad m => LP m String
dimenarg = try $ do
ch <- option "" $ string "="
num <- many1 digit
dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
return $ ch ++ num ++ dim
sp :: LP ()
sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline
whitespace :: LP ()
whitespace :: PandocMonad m => LP m ()
whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
endline :: LP ()
endline :: PandocMonad m => LP m ()
endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
tildeEscape :: LP Char
tildeEscape :: PandocMonad m => LP m Char
tildeEscape = try $ do
string "^^"
c <- satisfy (\x -> x >= '\0' && x <= '\128')
@ -124,29 +131,29 @@ tildeEscape = try $ do
| otherwise -> return $ chr (x + 64)
else return $ chr $ read ('0':'x':c:d)
comment :: LP ()
comment :: PandocMonad m => LP m ()
comment = do
char '%'
skipMany (satisfy (/='\n'))
optional newline
return ()
bgroup :: LP ()
bgroup :: PandocMonad m => LP m ()
bgroup = try $ do
skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
() <$ char '{'
<|> () <$ controlSeq "bgroup"
<|> () <$ controlSeq "begingroup"
egroup :: LP ()
egroup :: PandocMonad m => LP m ()
egroup = () <$ char '}'
<|> () <$ controlSeq "egroup"
<|> () <$ controlSeq "endgroup"
grouped :: Monoid a => LP a -> LP a
grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
braced :: LP String
braced :: PandocMonad m => LP m String
braced = bgroup *> (concat <$> manyTill
( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
<|> try (string "\\}")
@ -156,16 +163,16 @@ braced = bgroup *> (concat <$> manyTill
<|> count 1 anyChar
) egroup)
bracketed :: Monoid a => LP a -> LP a
bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
mathDisplay :: LP String -> LP Inlines
mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
mathInline :: LP String -> LP Inlines
mathInline :: PandocMonad m => LP m String -> LP m Inlines
mathInline p = math <$> (try p >>= applyMacros')
mathChars :: LP String
mathChars :: PandocMonad m => LP m String
mathChars =
concat <$> many (escapedChar
<|> (snd <$> withRaw braced)
@ -179,7 +186,7 @@ mathChars =
isOrdChar '\\' = False
isOrdChar _ = True
quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
quoted' f starter ender = do
startchs <- starter
smart <- getOption readerSmart
@ -194,7 +201,7 @@ quoted' f starter ender = do
_ -> startchs)
else lit startchs
doubleQuote :: LP Inlines
doubleQuote :: PandocMonad m => LP m Inlines
doubleQuote = do
quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "") (void $ char '”')
@ -202,7 +209,7 @@ doubleQuote = do
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
singleQuote :: LP Inlines
singleQuote :: PandocMonad m => LP m Inlines
singleQuote = do
smart <- getOption readerSmart
if smart
@ -210,7 +217,7 @@ singleQuote = do
<|> quoted' singleQuoted (string "") (try $ char '' >> notFollowedBy letter)
else str <$> many1 (oneOf "`\'")
inline :: LP Inlines
inline :: PandocMonad m => LP m Inlines
inline = (mempty <$ comment)
<|> (space <$ whitespace)
<|> (softbreak <$ endline)
@ -235,10 +242,10 @@ inline = (mempty <$ comment)
<|> (str . (:[]) <$> oneOf "#&~^'`\"[]") -- TODO print warning?
-- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
inlines :: LP Inlines
inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
inlineGroup :: LP Inlines
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
if isNull ils
@ -247,7 +254,7 @@ inlineGroup = do
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
block :: LP Blocks
block :: PandocMonad m => LP m Blocks
block = (mempty <$ comment)
<|> (mempty <$ ((spaceChar <|> newline) *> spaces))
<|> environment
@ -258,10 +265,10 @@ block = (mempty <$ comment)
<|> (mempty <$ char '&') -- loose & in table environment
blocks :: LP Blocks
blocks :: PandocMonad m => LP m Blocks
blocks = mconcat <$> many block
getRawCommand :: String -> LP String
getRawCommand :: PandocMonad m => String -> LP m String
getRawCommand name' = do
rawargs <- withRaw (many (try (optional sp *> opt)) *>
option "" (try (optional sp *> dimenarg)) *>
@ -273,7 +280,7 @@ lookupListDefault d = (fromMaybe d .) . lookupList
where
lookupList l m = msum $ map (`M.lookup` m) l
blockCommand :: LP Blocks
blockCommand :: PandocMonad m => LP m Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
@ -291,21 +298,21 @@ inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"
-- eat an optional argument and one or more arguments in braces
ignoreInlines :: String -> (String, LP Inlines)
ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
(getOption readerParseRaw >>= guard >> withRaw optargs)
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
(getOption readerParseRaw >>= guard >> withRaw optargs)
blockCommands :: M.Map String (LP Blocks)
blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
, ("title", mempty <$ (skipopts *>
@ -370,14 +377,14 @@ blockCommands = M.fromList $
, "newpage"
]
addMeta :: ToMetaValue a => String -> a -> LP ()
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
addMeta field val = updateState $ \st ->
st{ stateMeta = addMetaField field val $ stateMeta st }
splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
setCaption :: LP Blocks
setCaption :: PandocMonad m => LP m Blocks
setCaption = do
ils <- tok
mblabel <- option Nothing $
@ -389,10 +396,10 @@ setCaption = do
updateState $ \st -> st{ stateCaption = Just ils' }
return mempty
resetCaption :: LP ()
resetCaption :: PandocMonad m => LP m ()
resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
authors :: LP ()
authors :: PandocMonad m => LP m ()
authors = try $ do
char '{'
let oneAuthor = mconcat <$>
@ -403,7 +410,7 @@ authors = try $ do
char '}'
addMeta "author" (map trimInlines auths)
section :: Attr -> Int -> LP Blocks
section :: PandocMonad m => Attr -> Int -> LP m Blocks
section (ident, classes, kvs) lvl = do
hasChapters <- stateHasChapters `fmap` getState
let lvl' = if hasChapters then lvl + 1 else lvl
@ -413,7 +420,7 @@ section (ident, classes, kvs) lvl = do
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
inlineCommand :: LP Inlines
inlineCommand :: PandocMonad m => LP m Inlines
inlineCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
@ -435,14 +442,14 @@ inlineCommand = try $ do
optional (try (string "{}")))
<|> raw
unlessParseRaw :: LP ()
unlessParseRaw :: PandocMonad m => LP m ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
isBlockCommand s = s `M.member` blockCommands
isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
inlineEnvironments :: M.Map String (LP Inlines)
inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
inlineEnvironments = M.fromList
[ ("displaymath", mathEnv id Nothing "displaymath")
, ("math", math <$> verbEnv "math")
@ -460,7 +467,7 @@ inlineEnvironments = M.fromList
, ("alignat*", mathEnv id (Just "aligned") "alignat*")
]
inlineCommands :: M.Map String (LP Inlines)
inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
inlineCommands = M.fromList $
[ ("emph", extractSpaces emph <$> tok)
, ("textit", extractSpaces emph <$> tok)
@ -621,7 +628,7 @@ inlineCommands = M.fromList $
-- in which case they will appear as raw latex blocks:
[ "index" ]
mkImage :: [(String, String)] -> String -> LP Inlines
mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
let replaceTextwidth (k,v) = case numUnit v of
Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
@ -645,7 +652,7 @@ unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
enquote :: LP Inlines
enquote :: PandocMonad m => LP m Inlines
enquote = do
skipopts
context <- stateQuoteContext <$> getState
@ -653,18 +660,18 @@ enquote = do
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
doverb :: LP Inlines
doverb :: PandocMonad m => LP m Inlines
doverb = do
marker <- anyChar
code <$> manyTill (satisfy (/='\n')) (char marker)
doLHSverb :: LP Inlines
doLHSverb :: PandocMonad m => LP m Inlines
doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
lit :: String -> LP Inlines
lit :: String -> LP m Inlines
lit = pure . str
accent :: (Char -> String) -> Inlines -> LP Inlines
accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
case toList ils of
(Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
@ -870,53 +877,53 @@ breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
tok :: LP Inlines
tok :: PandocMonad m => LP m Inlines
tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
opt :: LP Inlines
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
rawopt :: LP String
rawopt :: PandocMonad m => LP m String
rawopt = do
contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
try (string "\\[") <|> rawopt)
optional sp
return $ "[" ++ contents ++ "]"
skipopts :: LP ()
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
rawangle :: LP ()
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
char '<'
skipMany (noneOf ">")
char '>'
return ()
skipangles :: LP ()
skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
inlineText :: LP Inlines
inlineText :: PandocMonad m => LP m Inlines
inlineText = str <$> many1 inlineChar
inlineChar :: LP Char
inlineChar :: PandocMonad m => LP m Char
inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
environment :: LP Blocks
environment :: PandocMonad m => LP m Blocks
environment = do
controlSeq "begin"
name <- braced
M.findWithDefault mzero name environments
<|> rawEnv name
inlineEnvironment :: LP Inlines
inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
controlSeq "begin"
name <- braced
M.findWithDefault mzero name inlineEnvironments
rawEnv :: String -> LP Blocks
rawEnv :: PandocMonad m => String -> LP m Blocks
rawEnv name = do
parseRaw <- getOption readerParseRaw
rawOptions <- mconcat <$> many rawopt
@ -1045,7 +1052,7 @@ readFileFromDirs (d:ds) f =
----
keyval :: LP (String, String)
keyval :: PandocMonad m => LP m (String, String)
keyval = try $ do
key <- many1 alphaNum
val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
@ -1055,25 +1062,25 @@ keyval = try $ do
return (key, val)
keyvals :: LP [(String, String)]
keyvals :: PandocMonad m => LP m [(String, String)]
keyvals = try $ char '[' *> manyTill keyval (char ']')
alltt :: String -> LP Blocks
alltt :: PandocMonad m => String -> LP m Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
intercalate "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
rawLaTeXBlock :: LP String
rawLaTeXBlock :: PandocMonad m => LP m String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
rawLaTeXInline :: LP Inline
rawLaTeXInline :: PandocMonad m => LP m Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
addImageCaption :: Blocks -> LP Blocks
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
where go (Image attr alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
@ -1082,7 +1089,7 @@ addImageCaption = walkM go
Nothing -> Image attr alt (src,tit)
go x = return x
addTableCaption :: Blocks -> LP Blocks
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
@ -1091,7 +1098,7 @@ addTableCaption = walkM go
Nothing -> Table c als ws hs rs
go x = return x
environments :: M.Map String (LP Blocks)
environments :: PandocMonad m => M.Map String (LP m Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
, ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
@ -1159,7 +1166,7 @@ environments = M.fromList
, ("alignat*", mathEnv para (Just "aligned") "alignat*")
]
letterContents :: LP Blocks
letterContents :: PandocMonad m => LP m Blocks
letterContents = do
bs <- blocks
st <- getState
@ -1170,7 +1177,7 @@ letterContents = do
_ -> mempty
return $ addr <> bs -- sig added by \closing
closing :: LP Blocks
closing :: PandocMonad m => LP m Blocks
closing = do
contents <- tok
st <- getState
@ -1184,17 +1191,17 @@ closing = do
_ -> mempty
return $ para (trimInlines contents) <> sigs
item :: LP Blocks
item :: PandocMonad m => LP m Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
looseItem :: LP Blocks
looseItem :: PandocMonad m => LP m Blocks
looseItem = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
else return mempty
descItem :: LP (Inlines, [Blocks])
descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
blocks -- skip blocks before item
controlSeq "item"
@ -1203,12 +1210,12 @@ descItem = do
bs <- blocks
return (ils, [bs])
env :: String -> LP a -> LP a
env :: PandocMonad m => String -> LP m a -> LP m a
env name p = p <*
(try (controlSeq "end" *> braced >>= guard . (== name))
<?> ("\\end{" ++ name ++ "}"))
listenv :: String -> LP a -> LP a
listenv :: PandocMonad m => String -> LP m a -> LP m a
listenv name p = try $ do
oldCtx <- stateParserContext `fmap` getState
updateState $ \st -> st{ stateParserContext = ListItemState }
@ -1216,14 +1223,14 @@ listenv name p = try $ do
updateState $ \st -> st{ stateParserContext = oldCtx }
return res
mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a
mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a
mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
where inner x = case innerEnv of
Nothing -> x
Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
"\\end{" ++ y ++ "}"
verbEnv :: String -> LP String
verbEnv :: PandocMonad m => String -> LP m String
verbEnv name = do
skipopts
optional blankline
@ -1231,7 +1238,7 @@ verbEnv name = do
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
fancyverbEnv :: String -> LP Blocks
fancyverbEnv :: PandocMonad m => String -> LP m Blocks
fancyverbEnv name = do
options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
@ -1242,7 +1249,7 @@ fancyverbEnv name = do
let attr = ("",classes,kvs)
codeBlockWith attr <$> verbEnv name
orderedList' :: LP Blocks
orderedList' :: PandocMonad m => LP m Blocks
orderedList' = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
@ -1259,14 +1266,14 @@ orderedList' = do
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
paragraph :: LP Blocks
paragraph :: PandocMonad m => LP m Blocks
paragraph = do
x <- trimInlines . mconcat <$> many1 inline
if x == mempty
then return mempty
else return $ para x
preamble :: LP Blocks
preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
preambleBlock = void comment
@ -1292,7 +1299,7 @@ addSuffix s ks@(_:_) =
in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
addSuffix _ _ = []
simpleCiteArgs :: LP [Citation]
simpleCiteArgs :: PandocMonad m => LP m [Citation]
simpleCiteArgs = try $ do
first <- optionMaybe $ toList <$> opt
second <- optionMaybe $ toList <$> opt
@ -1312,7 +1319,7 @@ simpleCiteArgs = try $ do
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
citationLabel :: LP String
citationLabel :: PandocMonad m => LP m String
citationLabel = optional sp *>
(many1 (satisfy isBibtexKeyChar)
<* optional sp
@ -1320,7 +1327,7 @@ citationLabel = optional sp *>
<* optional sp)
where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
cites :: CitationMode -> Bool -> LP [Citation]
cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
cites mode multi = try $ do
cits <- if multi
then many1 simpleCiteArgs
@ -1332,12 +1339,12 @@ cites mode multi = try $ do
[] -> []
_ -> map (\a -> a {citationMode = mode}) cs
citation :: String -> CitationMode -> Bool -> LP Inlines
citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
(c,raw) <- withRaw $ cites mode multi
return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
complexNatbibCitation :: CitationMode -> LP Inlines
complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
complexNatbibCitation mode = try $ do
let ils = (toList . trimInlines . mconcat) <$>
many (notFollowedBy (oneOf "\\};") >> inline)
@ -1359,7 +1366,7 @@ complexNatbibCitation mode = try $ do
-- tables
parseAligns :: LP [Alignment]
parseAligns :: PandocMonad m => LP m [Alignment]
parseAligns = try $ do
char '{'
let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
@ -1375,7 +1382,7 @@ parseAligns = try $ do
spaces
return aligns'
hline :: LP ()
hline :: PandocMonad m => LP m ()
hline = try $ do
spaces'
controlSeq "hline" <|>
@ -1389,16 +1396,16 @@ hline = try $ do
optional $ bracketed (many1 (satisfy (/=']')))
return ()
lbreak :: LP ()
lbreak :: PandocMonad m => LP m ()
lbreak = () <$ try (spaces' *>
(controlSeq "\\" <|> controlSeq "tabularnewline") <*
spaces')
amp :: LP ()
amp :: PandocMonad m => LP m ()
amp = () <$ try (spaces' *> char '&' <* spaces')
parseTableRow :: Int -- ^ number of columns
-> LP [Blocks]
parseTableRow :: PandocMonad m => Int -- ^ number of columns
-> LP m [Blocks]
parseTableRow cols = try $ do
let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline
let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
@ -1415,10 +1422,10 @@ parseTableRow cols = try $ do
spaces'
return cells''
spaces' :: LP ()
spaces' :: PandocMonad m => LP m ()
spaces' = spaces *> skipMany (comment *> spaces)
simpTable :: Bool -> LP Blocks
simpTable :: PandocMonad m => Bool -> LP m Blocks
simpTable hasWidthParameter = try $ do
when hasWidthParameter $ () <$ (spaces' >> tok)
skipopts
@ -1442,13 +1449,13 @@ simpTable hasWidthParameter = try $ do
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns (repeat 0)) header'' rows
startInclude :: LP Blocks
startInclude :: PandocMonad m => LP m Blocks
startInclude = do
fn <- braced
setPosition $ newPos fn 1 1
return mempty
endInclude :: LP Blocks
endInclude :: PandocMonad m => LP m Blocks
endInclude = do
fn <- braced
ln <- braced

File diff suppressed because it is too large Load diff

View file

@ -57,22 +57,26 @@ import Data.Char (isDigit, isSpace)
import Data.Maybe (fromMaybe)
import Text.Printf (printf)
import Debug.Trace (trace)
import Text.Pandoc.Error
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
readMediaWiki :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readMediaWiki opts s =
readWith parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = Set.empty
}
(s ++ "\n")
-> m Pandoc
readMediaWiki opts s = do
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = Set.empty
}
(s ++ "\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing mediawiki"
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
@ -82,7 +86,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwIdentifierList :: Set.Set String
}
type MWParser = Parser [Char] MWState
type MWParser m = ParserT [Char] MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
@ -101,7 +105,7 @@ instance HasIdentifierList MWState where
-- This is used to prevent exponential blowups for things like:
-- ''a'''a''a'''a''a'''a''a'''a
nested :: MWParser a -> MWParser a
nested :: PandocMonad m => MWParser m a -> MWParser m a
nested p = do
nestlevel <- mwMaxNestingLevel `fmap` getState
guard $ nestlevel > 0
@ -116,7 +120,7 @@ specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
sym :: String -> MWParser ()
sym :: PandocMonad m => String -> MWParser m ()
sym s = () <$ try (string s)
newBlockTags :: [String]
@ -137,10 +141,10 @@ eitherBlockOrInline :: [String]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
htmlComment :: MWParser ()
htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
inlinesInTags :: String -> MWParser Inlines
inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
inlinesInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
@ -148,7 +152,7 @@ inlinesInTags tag = try $ do
else trimInlines . mconcat <$>
manyTill inline (htmlTag (~== TagClose tag))
blocksInTags :: String -> MWParser Blocks
blocksInTags :: PandocMonad m => String -> MWParser m Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
let closer = if tag == "li"
@ -162,7 +166,7 @@ blocksInTags tag = try $ do
then return mempty
else mconcat <$> manyTill block closer
charsInTags :: String -> MWParser [Char]
charsInTags :: PandocMonad m => String -> MWParser m [Char]
charsInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
@ -173,7 +177,7 @@ charsInTags tag = try $ do
-- main parser
--
parseMediaWiki :: MWParser Pandoc
parseMediaWiki :: PandocMonad m => MWParser m Pandoc
parseMediaWiki = do
bs <- mconcat <$> many block
spaces
@ -188,7 +192,7 @@ parseMediaWiki = do
-- block parsers
--
block :: MWParser Blocks
block :: PandocMonad m => MWParser m Blocks
block = do
tr <- getOption readerTrace
pos <- getPosition
@ -209,14 +213,14 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
para :: MWParser Blocks
para :: PandocMonad m => MWParser m Blocks
para = do
contents <- trimInlines . mconcat <$> many1 inline
if F.all (==Space) contents
then return mempty
else return $ B.para contents
table :: MWParser Blocks
table :: PandocMonad m => MWParser m Blocks
table = do
tableStart
styles <- option [] parseAttrs <* blankline
@ -244,10 +248,10 @@ table = do
else (replicate cols mempty, hdr:rows')
return $ B.table caption cellspecs headers rows
parseAttrs :: MWParser [(String,String)]
parseAttrs :: PandocMonad m => MWParser m [(String,String)]
parseAttrs = many1 parseAttr
parseAttr :: MWParser (String, String)
parseAttr :: PandocMonad m => MWParser m (String, String)
parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
@ -256,17 +260,17 @@ parseAttr = try $ do
<|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
tableStart :: MWParser ()
tableStart :: PandocMonad m => MWParser m ()
tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|"
tableEnd :: MWParser ()
tableEnd :: PandocMonad m => MWParser m ()
tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}"
rowsep :: MWParser ()
rowsep :: PandocMonad m => MWParser m ()
rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
optional parseAttr <* blanklines
cellsep :: MWParser ()
cellsep :: PandocMonad m => MWParser m ()
cellsep = try $
(guardColumnOne *> skipSpaces <*
( (char '|' <* notFollowedBy (oneOf "-}+"))
@ -276,7 +280,7 @@ cellsep = try $
<|> (() <$ try (string "||"))
<|> (() <$ try (string "!!"))
tableCaption :: MWParser Inlines
tableCaption :: PandocMonad m => MWParser m Inlines
tableCaption = try $ do
guardColumnOne
skipSpaces
@ -284,10 +288,10 @@ tableCaption = try $ do
optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces)
(trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
tableRow :: MWParser [((Alignment, Double), Blocks)]
tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
tableRow = try $ skipMany htmlComment *> many tableCell
tableCell :: MWParser ((Alignment, Double), Blocks)
tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
tableCell = try $ do
cellsep
skipMany spaceChar
@ -313,7 +317,7 @@ parseWidth s =
('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
_ -> Nothing
template :: MWParser String
template :: PandocMonad m => MWParser m String
template = try $ do
string "{{"
notFollowedBy (char '{')
@ -322,7 +326,7 @@ template = try $ do
contents <- manyTill chunk (try $ string "}}")
return $ "{{" ++ concat contents ++ "}}"
blockTag :: MWParser Blocks
blockTag :: PandocMonad m => MWParser m Blocks
blockTag = do
(tag, _) <- lookAhead $ htmlTag isBlockTag'
case tag of
@ -341,7 +345,7 @@ trimCode :: String -> String
trimCode ('\n':xs) = stripTrailingNewlines xs
trimCode xs = stripTrailingNewlines xs
syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks
syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
syntaxhighlight tag attrs = try $ do
let mblang = lookup "lang" attrs
let mbstart = lookup "start" attrs
@ -351,13 +355,13 @@ syntaxhighlight tag attrs = try $ do
contents <- charsInTags tag
return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
hrule :: MWParser Blocks
hrule :: PandocMonad m => MWParser m Blocks
hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
guardColumnOne :: MWParser ()
guardColumnOne :: PandocMonad m => MWParser m ()
guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
preformatted :: MWParser Blocks
preformatted :: PandocMonad m => MWParser m Blocks
preformatted = try $ do
guardColumnOne
char ' '
@ -388,7 +392,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode
normalizeCode $ (Code a1 (x ++ y)) : zs
normalizeCode (x:xs) = x : normalizeCode xs
header :: MWParser Blocks
header :: PandocMonad m => MWParser m Blocks
header = try $ do
guardColumnOne
eqs <- many1 (char '=')
@ -398,13 +402,13 @@ header = try $ do
attr <- registerHeader nullAttr contents
return $ B.headerWith attr lev contents
bulletList :: MWParser Blocks
bulletList :: PandocMonad m => MWParser m Blocks
bulletList = B.bulletList <$>
( many1 (listItem '*')
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
optional (htmlTag (~== TagClose "ul"))) )
orderedList :: MWParser Blocks
orderedList :: PandocMonad m => MWParser m Blocks
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> try
@ -415,10 +419,10 @@ orderedList =
let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
definitionList :: MWParser Blocks
definitionList :: PandocMonad m => MWParser m Blocks
definitionList = B.definitionList <$> many1 defListItem
defListItem :: MWParser (Inlines, [Blocks])
defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks])
defListItem = try $ do
terms <- mconcat . intersperse B.linebreak <$> many defListTerm
-- we allow dd with no dt, or dt with no dd
@ -429,27 +433,27 @@ defListItem = try $ do
else many (listItem ':')
return (terms, defs)
defListTerm :: MWParser Inlines
defListTerm :: PandocMonad m => MWParser m Inlines
defListTerm = char ';' >> skipMany spaceChar >> anyLine >>=
parseFromString (trimInlines . mconcat <$> many inline)
listStart :: Char -> MWParser ()
listStart :: PandocMonad m => Char -> MWParser m ()
listStart c = char c *> notFollowedBy listStartChar
listStartChar :: MWParser Char
listStartChar :: PandocMonad m => MWParser m Char
listStartChar = oneOf "*#;:"
anyListStart :: MWParser Char
anyListStart :: PandocMonad m => MWParser m Char
anyListStart = char '*'
<|> char '#'
<|> char ':'
<|> char ';'
li :: MWParser Blocks
li :: PandocMonad m => MWParser m Blocks
li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
(firstParaToPlain <$> blocksInTags "li") <* spaces
listItem :: Char -> MWParser Blocks
listItem :: PandocMonad m => Char -> MWParser m Blocks
listItem c = try $ do
extras <- many (try $ char c <* lookAhead listStartChar)
if null extras
@ -475,10 +479,10 @@ listItem c = try $ do
-- }}
-- * next list item
-- which seems to be valid mediawiki.
listChunk :: MWParser String
listChunk :: PandocMonad m => MWParser m String
listChunk = template <|> count 1 anyChar
listItem' :: Char -> MWParser Blocks
listItem' :: PandocMonad m => Char -> MWParser m Blocks
listItem' c = try $ do
listStart c
skipMany spaceChar
@ -498,7 +502,7 @@ firstParaToPlain contents =
-- inline parsers
--
inline :: MWParser Inlines
inline :: PandocMonad m => MWParser m Inlines
inline = whitespace
<|> url
<|> str
@ -516,10 +520,10 @@ inline = whitespace
<|> (B.rawInline "mediawiki" <$> template)
<|> special
str :: MWParser Inlines
str :: PandocMonad m => MWParser m Inlines
str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
math :: MWParser Inlines
math :: PandocMonad m => MWParser m Inlines
math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
<|> (B.math . trim <$> charsInTags "math")
<|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
@ -529,13 +533,13 @@ math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
mStart = string "\\("
mEnd = try (string "\\)")
variable :: MWParser String
variable :: PandocMonad m => MWParser m String
variable = try $ do
string "{{{"
contents <- manyTill anyChar (try $ string "}}}")
return $ "{{{" ++ contents ++ "}}}"
inlineTag :: MWParser Inlines
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
(tag, _) <- lookAhead $ htmlTag isInlineTag'
case tag of
@ -557,18 +561,18 @@ inlineTag = do
TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
special :: MWParser Inlines
special :: PandocMonad m => MWParser m Inlines
special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
oneOf specialChars)
inlineHtml :: MWParser Inlines
inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
whitespace :: MWParser Inlines
whitespace :: PandocMonad m => MWParser m Inlines
whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment)
<|> B.softbreak <$ endline
endline :: MWParser ()
endline :: PandocMonad m => MWParser m ()
endline = () <$ try (newline <*
notFollowedBy spaceChar <*
notFollowedBy newline <*
@ -577,12 +581,12 @@ endline = () <$ try (newline <*
notFollowedBy' header <*
notFollowedBy anyListStart)
imageIdentifiers :: [MWParser ()]
imageIdentifiers :: PandocMonad m => [MWParser m ()]
imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
"Bild"]
image :: MWParser Inlines
image :: PandocMonad m => MWParser m Inlines
image = try $ do
sym "[["
choice imageIdentifiers
@ -600,7 +604,7 @@ image = try $ do
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
imageOption :: MWParser String
imageOption :: PandocMonad m => MWParser m String
imageOption = try $ char '|' *> opt
where
opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
@ -619,7 +623,7 @@ collapseUnderscores (x:xs) = x : collapseUnderscores xs
addUnderscores :: String -> String
addUnderscores = collapseUnderscores . intercalate "_" . words
internalLink :: MWParser Inlines
internalLink :: PandocMonad m => MWParser m Inlines
internalLink = try $ do
sym "[["
pagename <- unwords . words <$> many (noneOf "|]")
@ -637,7 +641,7 @@ internalLink = try $ do
return mempty
else return link
externalLink :: MWParser Inlines
externalLink :: PandocMonad m => MWParser m Inlines
externalLink = try $ do
char '['
(_, src) <- uri
@ -649,29 +653,29 @@ externalLink = try $ do
return $ B.str $ show num
return $ B.link src "" lab
url :: MWParser Inlines
url :: PandocMonad m => MWParser m Inlines
url = do
(orig, src) <- uri
return $ B.link src "" (B.str orig)
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
inlinesBetween start end =
(trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
emph :: MWParser Inlines
emph :: PandocMonad m => MWParser m Inlines
emph = B.emph <$> nested (inlinesBetween start end)
where start = sym "''" >> lookAhead nonspaceChar
end = try $ notFollowedBy' (() <$ strong) >> sym "''"
strong :: MWParser Inlines
strong :: PandocMonad m => MWParser m Inlines
strong = B.strong <$> nested (inlinesBetween start end)
where start = sym "'''" >> lookAhead nonspaceChar
end = try $ sym "'''"
doubleQuotes :: MWParser Inlines
doubleQuotes :: PandocMonad m => MWParser m Inlines
doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar
closeDoubleQuote = try $ sym "\""

View file

@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Control.Monad.Except (throwError)
import Text.Pandoc.Error
import Text.Pandoc.Class
@ -48,9 +49,11 @@ import Text.Pandoc.Class
--
readNative :: PandocMonad m
=> String -- ^ String to parse (assuming @'\n'@ line endings)
-> m (Either PandocError Pandoc)
-> m Pandoc
readNative s =
return $ maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s)
case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: String -> Either PandocError [Block]
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)

View file

@ -13,8 +13,9 @@ import Control.Monad.State
import Data.Default
import Control.Monad.Except
import Text.Pandoc.Error
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
type OPML = ExceptT PandocError (State OPMLState)
type OPML m = StateT OPMLState m
data OPMLState = OPMLState{
opmlSectionLevel :: Int
@ -30,12 +31,14 @@ instance Default OPMLState where
, opmlDocDate = mempty
}
readOPML :: ReaderOptions -> String -> Either PandocError Pandoc
readOPML _ inp = setTitle (opmlDocTitle st')
. setAuthors (opmlDocAuthors st')
. setDate (opmlDocDate st')
. doc . mconcat <$> bs
where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp)
readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc
readOPML _ inp = do
(bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp)
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
setDate (opmlDocDate st') $
doc $ mconcat bs
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
@ -62,21 +65,22 @@ attrValue attr elt =
Just z -> z
Nothing -> ""
exceptT :: Either PandocError a -> OPML a
exceptT = either throwError return
-- exceptT :: PandocMonad m => Either PandocExecutionError a -> OPML m a
-- exceptT = either throwError return
asHtml :: String -> OPML Inlines
asHtml s = (\(Pandoc _ bs) -> case bs of
asHtml :: PandocMonad m => String -> OPML m Inlines
asHtml s =
(\(Pandoc _ bs) -> case bs of
[Plain ils] -> fromList ils
_ -> mempty) <$> exceptT (readHtml def s)
_ -> mempty) <$> (lift $ readHtml def s)
asMarkdown :: String -> OPML Blocks
asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s)
asMarkdown :: PandocMonad m => String -> OPML m Blocks
asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s)
getBlocks :: Element -> OPML Blocks
getBlocks :: PandocMonad m => Element -> OPML m Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
parseBlock :: Content -> OPML Blocks
parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->

View file

@ -39,6 +39,10 @@ import qualified Data.ByteString.Lazy as B
import System.FilePath
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
@ -52,11 +56,21 @@ import Text.Pandoc.Readers.Odt.Generic.XMLConverter
import Text.Pandoc.Readers.Odt.Generic.Fallible
import Text.Pandoc.Shared (filteredFilesFromArchive)
--
readOdt :: ReaderOptions
readOdt :: PandocMonad m
=> ReaderOptions
-> B.ByteString
-> Either PandocError (Pandoc, MediaBag)
readOdt _ bytes = bytesToOdt bytes-- of
-> m Pandoc
readOdt opts bytes = case readOdt' opts bytes of
Right (doc, mb) -> do
P.setMediaBag mb
return doc
Left _ -> throwError $ PandocParseError "couldn't parse odt"
--
readOdt' :: ReaderOptions
-> B.ByteString
-> Either PandocError (Pandoc, MediaBag)
readOdt' _ bytes = bytesToOdt bytes-- of
-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
-- Left err -> Left err

View file

@ -31,24 +31,30 @@ import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState )
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Control.Monad.Reader ( runReader )
import Control.Monad.Except ( throwError )
import Control.Monad.Reader ( runReaderT )
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
readOrg :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readOrg opts s = flip runReader def $
readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
--
-- Parser
--
parseOrg :: OrgParser Pandoc
parseOrg :: PandocMonad m => OrgParser m Pandoc
parseOrg = do
blocks' <- blockList
meta' <- meta

View file

@ -44,7 +44,7 @@ import Control.Monad ( void )
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
hline :: OrgParser ()
hline :: Monad m => OrgParser m ()
hline = try $ do
skipSpaces
string "-----"
@ -54,58 +54,59 @@ hline = try $ do
return ()
-- | Read the start of a header line, return the header level
headerStart :: OrgParser Int
headerStart :: Monad m => OrgParser m Int
headerStart = try $
(length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
tableStart :: OrgParser Char
tableStart :: Monad m => OrgParser m Char
tableStart = try $ skipSpaces *> char '|'
latexEnvStart :: OrgParser String
latexEnvStart :: Monad m => OrgParser m String
latexEnvStart = try $ do
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
latexEnvName :: OrgParser String
latexEnvName :: Monad m => OrgParser m String
latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
-- | Parses bullet list marker.
bulletListStart :: OrgParser ()
bulletListStart :: Monad m => OrgParser m ()
bulletListStart = try $
choice
[ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
, () <$ skipSpaces1 <* char '*' <* skipSpaces1
]
genericListStart :: OrgParser String
-> OrgParser Int
genericListStart :: Monad m
=> OrgParser m String
-> OrgParser m Int
genericListStart listMarker = try $
(+) <$> (length <$> many spaceChar)
<*> (length <$> listMarker <* many1 spaceChar)
orderedListStart :: OrgParser Int
orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
drawerStart :: OrgParser String
drawerStart :: Monad m => OrgParser m String
drawerStart = try $
skipSpaces *> drawerName <* skipSpaces <* newline
where drawerName = char ':' *> manyTill nonspaceChar (char ':')
metaLineStart :: OrgParser ()
metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
commentLineStart :: OrgParser ()
commentLineStart :: Monad m => OrgParser m ()
commentLineStart = try $ skipSpaces <* string "# "
exampleLineStart :: OrgParser ()
exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
noteMarker :: OrgParser String
noteMarker :: Monad m => OrgParser m String
noteMarker = try $ do
char '['
choice [ many1Till digit (char ']')
@ -114,12 +115,12 @@ noteMarker = try $ do
]
-- | Succeeds if the parser is at the end of a block.
endOfBlock :: OrgParser ()
endOfBlock :: Monad m => OrgParser m ()
endOfBlock = lookAhead . try $ do
void blankline <|> anyBlockStart
where
-- Succeeds if there is a new block starting at this position.
anyBlockStart :: OrgParser ()
anyBlockStart :: Monad m => OrgParser m ()
anyBlockStart = try . choice $
[ exampleLineStart
, hline

View file

@ -44,6 +44,7 @@ import Text.Pandoc.Readers.Org.Shared
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Shared ( compactify', compactify'DL, safeRead )
@ -105,7 +106,7 @@ data Headline = Headline
-- | Read an Org mode headline and its contents (i.e. a document subtree).
-- @lvl@ gives the minimum acceptable level of the tree.
headline :: Int -> OrgParser (F Headline)
headline :: PandocMonad m => Int -> OrgParser m (F Headline)
headline lvl = try $ do
level <- headerStart
guard (lvl <= level)
@ -130,16 +131,16 @@ headline lvl = try $ do
, headlineChildren = children'
}
where
endOfTitle :: OrgParser ()
endOfTitle :: Monad m => OrgParser m ()
endOfTitle = void . lookAhead $ optional headerTags *> newline
headerTags :: OrgParser [Tag]
headerTags :: Monad m => OrgParser m [Tag]
headerTags = try $
let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
headlineToBlocks :: Headline -> OrgParser Blocks
headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
headlineToBlocks hdln@(Headline {..}) = do
maxHeadlineLevels <- getExportSetting exportHeadlineLevels
case () of
@ -162,7 +163,7 @@ isCommentTitle :: Inlines -> Bool
isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
isCommentTitle _ = False
archivedHeadlineToBlocks :: Headline -> OrgParser Blocks
archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
archivedHeadlineToBlocks hdln = do
archivedTreesOption <- getExportSetting exportArchivedTrees
case archivedTreesOption of
@ -170,7 +171,7 @@ archivedHeadlineToBlocks hdln = do
ArchivedTreesExport -> headlineToHeaderWithContents hdln
ArchivedTreesHeadlineOnly -> headlineToHeader hdln
headlineToHeaderWithList :: Headline -> OrgParser Blocks
headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
headlineToHeaderWithList hdln@(Headline {..}) = do
maxHeadlineLevels <- getExportSetting exportHeadlineLevels
header <- headlineToHeader hdln
@ -189,13 +190,13 @@ headlineToHeaderWithList hdln@(Headline {..}) = do
(Header _ _ inlns:_) -> B.para (B.fromList inlns)
_ -> mempty
headlineToHeaderWithContents :: Headline -> OrgParser Blocks
headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
headlineToHeaderWithContents hdln@(Headline {..}) = do
header <- headlineToHeader hdln
childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
return $ header <> headlineContents <> childrenBlocks
headlineToHeader :: Headline -> OrgParser Blocks
headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
headlineToHeader (Headline {..}) = do
exportTodoKeyword <- getExportSetting exportWithTodoKeywords
let todoText = if exportTodoKeyword
@ -208,7 +209,7 @@ headlineToHeader (Headline {..}) = do
attr <- registerHeader propAttr headlineText
return $ B.headerWith attr headlineLevel text
todoKeyword :: OrgParser TodoMarker
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
taskStates <- activeTodoMarkers <$> getState
let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
@ -250,7 +251,7 @@ tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
--
-- | Get a list of blocks.
blockList :: OrgParser [Block]
blockList :: PandocMonad m => OrgParser m [Block]
blockList = do
initialBlocks <- blocks
headlines <- sequence <$> manyTill (headline 1) eof
@ -259,15 +260,15 @@ blockList = do
return . B.toList $ (runF initialBlocks st) <> headlineBlocks
-- | Get the meta information safed in the state.
meta :: OrgParser Meta
meta :: Monad m => OrgParser m Meta
meta = do
meta' <- metaExport
runF meta' <$> getState
blocks :: OrgParser (F Blocks)
blocks :: PandocMonad m => OrgParser m (F Blocks)
blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
block :: OrgParser (F Blocks)
block :: PandocMonad m => OrgParser m (F Blocks)
block = choice [ mempty <$ blanklines
, table
, orgBlock
@ -306,7 +307,7 @@ attrFromBlockAttributes (BlockAttributes{..}) =
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String)
stringyMetaAttribute attrCheck = try $ do
metaLineStart
attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
@ -315,7 +316,7 @@ stringyMetaAttribute attrCheck = try $ do
attrValue <- anyLine
return (attrName, attrValue)
blockAttributes :: OrgParser BlockAttributes
blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
blockAttributes = try $ do
kv <- many (stringyMetaAttribute attrCheck)
let caption = foldl' (appendValues "CAPTION") Nothing kv
@ -350,17 +351,17 @@ blockAttributes = try $ do
Just acc -> Just $ acc ++ ' ':value
Nothing -> Just value
keyValues :: OrgParser [(String, String)]
keyValues :: Monad m => OrgParser m [(String, String)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
key :: OrgParser String
key :: Monad m => OrgParser m String
key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
value :: OrgParser String
value :: Monad m => OrgParser m String
value = skipSpaces *> manyTill anyChar endOfValue
endOfValue :: OrgParser ()
endOfValue :: Monad m => OrgParser m ()
endOfValue =
lookAhead $ (() <$ try (many1 spaceChar <* key))
<|> () <$ newline
@ -371,7 +372,7 @@ keyValues = try $
--
-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
orgBlock :: OrgParser (F Blocks)
orgBlock :: PandocMonad m => OrgParser m (F Blocks)
orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
@ -390,25 +391,25 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
blockHeaderStart :: OrgParser String
blockHeaderStart :: Monad m => OrgParser m String
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
lowercase :: String -> String
lowercase = map toLower
rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks)
rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks)
parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
where
parsedBlockContent :: OrgParser (F Blocks)
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
parseFromString blocks (raw ++ "\n")
-- | Read the raw string content of a block
rawBlockContent :: String -> OrgParser String
rawBlockContent :: Monad m => String -> OrgParser m String
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
@ -418,10 +419,10 @@ rawBlockContent blockType = try $ do
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines
where
rawLine :: OrgParser String
rawLine :: Monad m => OrgParser m String
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: OrgParser ()
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
stripIndent :: [String] -> [String]
@ -448,18 +449,18 @@ rawBlockContent blockType = try $ do
commaEscaped cs = cs
-- | Read but ignore all remaining block headers.
ignHeaders :: OrgParser ()
ignHeaders :: Monad m => OrgParser m ()
ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
exportBlock :: String -> OrgParser (F Blocks)
exportBlock :: Monad m => String -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
returnF (B.rawBlock (map toLower exportType) contents)
verseBlock :: String -> OrgParser (F Blocks)
verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
@ -468,7 +469,7 @@ verseBlock blockType = try $ do
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
parseVerseLine :: String -> OrgParser (F Inlines)
parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
parseVerseLine cs = do
let (initialSpaces, indentedLine) = span isSpace cs
let nbspIndent = if null initialSpaces
@ -480,7 +481,7 @@ verseBlock blockType = try $ do
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks)
codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@ -513,7 +514,7 @@ exportsResults :: [(String, String)] -> Bool
exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
|| ("rundoc-exports", "both") `elem` attrs
trailingResultsBlock :: OrgParser (Maybe (F Blocks))
trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks))
trailingResultsBlock = optionMaybe . try $ do
blanklines
stringAnyCase "#+RESULTS:"
@ -522,7 +523,7 @@ trailingResultsBlock = optionMaybe . try $ do
-- | Parse code block arguments
-- TODO: We currently don't handle switches.
codeHeaderArgs :: OrgParser ([String], [(String, String)])
codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
_ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
@ -537,27 +538,27 @@ codeHeaderArgs = try $ do
where
hasRundocParameters = not . null
switch :: OrgParser (Char, Maybe String)
switch :: Monad m => OrgParser m (Char, Maybe String)
switch = try $ simpleSwitch <|> lineNumbersSwitch
where
simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
(string "-l \"" *> many1Till nonspaceChar (char '"'))
blockOption :: OrgParser (String, String)
blockOption :: Monad m => OrgParser m (String, String)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
orgParamValue :: OrgParser String
orgParamValue :: Monad m => OrgParser m String
orgParamValue = try $
skipSpaces
*> notFollowedBy (char ':' )
*> many1 nonspaceChar
<* skipSpaces
horizontalRule :: OrgParser (F Blocks)
horizontalRule :: Monad m => OrgParser m (F Blocks)
horizontalRule = return B.horizontalRule <$ try hline
@ -568,7 +569,7 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | A generic drawer which has no special meaning for org-mode.
-- Whether or not this drawer is included in the output depends on the drawers
-- export setting.
genericDrawer :: OrgParser (F Blocks)
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
name <- map toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
@ -582,35 +583,35 @@ genericDrawer = try $ do
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
parseLines :: [String] -> OrgParser (F Blocks)
parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
parseLines = parseFromString blocks . (++ "\n") . unlines
drawerDiv :: String -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
drawerLine :: OrgParser String
drawerLine :: Monad m => OrgParser m String
drawerLine = anyLine
drawerEnd :: OrgParser String
drawerEnd :: Monad m => OrgParser m String
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
-- within.
propertiesDrawer :: OrgParser Properties
propertiesDrawer :: Monad m => OrgParser m Properties
propertiesDrawer = try $ do
drawerType <- drawerStart
guard $ map toUpper drawerType == "PROPERTIES"
manyTill property (try drawerEnd)
where
property :: OrgParser (PropertyKey, PropertyValue)
property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
property = try $ (,) <$> key <*> value
key :: OrgParser PropertyKey
key :: Monad m => OrgParser m PropertyKey
key = fmap toPropertyKey . try $
skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
value :: OrgParser PropertyValue
value :: Monad m => OrgParser m PropertyValue
value = fmap toPropertyValue . try $
skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
@ -621,7 +622,7 @@ propertiesDrawer = try $ do
-- | Figures or an image paragraph (i.e. an image on a line by itself). Only
-- images with a caption attribute are interpreted as figures.
figure :: OrgParser (F Blocks)
figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
@ -632,7 +633,7 @@ figure = try $ do
let isFigure = not . isNothing $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
selfTarget :: OrgParser String
selfTarget :: PandocMonad m => OrgParser m String
selfTarget = try $ char '[' *> linkTarget <* char ']'
imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
@ -654,7 +655,7 @@ figure = try $ do
else "fig:" ++ cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: OrgParser ()
endOfParagraph :: Monad m => OrgParser m ()
endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
@ -663,11 +664,11 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
--
-- | Example code marked up by a leading colon.
example :: OrgParser (F Blocks)
example :: Monad m => OrgParser m (F Blocks)
example = try $ do
return . return . exampleCode =<< unlines <$> many1 exampleLine
where
exampleLine :: OrgParser String
exampleLine :: Monad m => OrgParser m String
exampleLine = try $ exampleLineStart *> anyLine
exampleCode :: String -> Blocks
@ -678,10 +679,10 @@ exampleCode = B.codeBlockWith ("", ["example"], [])
-- Comments, Options and Metadata
--
specialLine :: OrgParser (F Blocks)
specialLine :: PandocMonad m => OrgParser m (F Blocks)
specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
rawExportLine :: OrgParser Blocks
rawExportLine :: PnadocMonad m => OrgParser m Blocks
rawExportLine = try $ do
metaLineStart
key <- metaKey
@ -689,7 +690,7 @@ rawExportLine = try $ do
then B.rawBlock key <$> anyLine
else mzero
commentLine :: OrgParser Blocks
commentLine :: Monad m => OrgParser m Blocks
commentLine = commentLineStart *> anyLine *> pure mempty
@ -718,7 +719,7 @@ data OrgTable = OrgTable
, orgTableRows :: [[Blocks]]
}
table :: OrgParser (F Blocks)
table :: PandocMonad m => OrgParser m (F Blocks)
table = try $ do
blockAttrs <- blockAttributes
lookAhead tableStart
@ -745,18 +746,18 @@ orgToPandocTable (OrgTable colProps heads lns) caption =
<*> totalWidth
in (align', width')
tableRows :: OrgParser [OrgTableRow]
tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
tableContentRow = try $
OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: OrgParser (F Blocks)
tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
tableContentCell = try $
fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: OrgParser OrgTableRow
tableAlignRow :: Monad m => OrgParser m OrgTableRow
tableAlignRow = try $ do
tableStart
colProps <- many1Till columnPropertyCell newline
@ -764,7 +765,7 @@ tableAlignRow = try $ do
guard $ any (/= def) colProps
return $ OrgAlignRow colProps
columnPropertyCell :: OrgParser ColumnProperty
columnPropertyCell :: Monad m => OrgParser m ColumnProperty
columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
where
emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell)
@ -776,18 +777,18 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<* char '>'
<* emptyCell)
tableAlignFromChar :: OrgParser Alignment
tableAlignFromChar :: Monad m => OrgParser m Alignment
tableAlignFromChar = try $
choice [ char 'l' *> return AlignLeft
, char 'c' *> return AlignCenter
, char 'r' *> return AlignRight
]
tableHline :: OrgParser OrgTableRow
tableHline :: Monad m => OrgParser m OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
endOfCell :: OrgParser Char
endOfCell :: Monad m => OrgParser m Char
endOfCell = try $ char '|' <|> lookAhead newline
rowsToTable :: [OrgTableRow]
@ -840,7 +841,7 @@ rowToContent orgTable row =
--
-- LaTeX fragments
--
latexFragment :: OrgParser (F Blocks)
latexFragment :: Monad m => OrgParser m (F Blocks)
latexFragment = try $ do
envName <- latexEnvStart
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
@ -851,7 +852,7 @@ latexFragment = try $ do
, "\\end{", e, "}\n"
]
latexEnd :: String -> OrgParser ()
latexEnd :: Monad m => String -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
<* string ("\\end{" ++ envName ++ "}")
@ -861,7 +862,7 @@ latexEnd envName = try $
--
-- Footnote defintions
--
noteBlock :: OrgParser (F Blocks)
noteBlock :: PandocMonad m => OrgParser m (F Blocks)
noteBlock = try $ do
ref <- noteMarker <* skipSpaces
content <- mconcat <$> blocksTillHeaderOrNote
@ -873,7 +874,7 @@ noteBlock = try $ do
<|> () <$ lookAhead headerStart)
-- Paragraphs or Plain text
paraOrPlain :: OrgParser (F Blocks)
paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
paraOrPlain = try $ do
-- Make sure we are not looking at a headline
notFollowedBy' (char '*' *> (oneOf " *"))
@ -892,24 +893,24 @@ paraOrPlain = try $ do
-- list blocks
--
list :: OrgParser (F Blocks)
list :: PandocMonad m => OrgParser m (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
definitionList :: OrgParser (F Blocks)
definitionList :: PandocMonad m => OrgParser m (F Blocks)
definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
fmap B.definitionList . fmap compactify'DL . sequence
<$> many1 (definitionListItem $ bulletListStart' (Just n))
bulletList :: OrgParser (F Blocks)
bulletList :: PandocMonad m => OrgParser m (F Blocks)
bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
fmap B.bulletList . fmap compactify' . sequence
<$> many1 (listItem (bulletListStart' $ Just n))
orderedList :: OrgParser (F Blocks)
orderedList :: PandocMonad m => OrgParser m (F Blocks)
orderedList = fmap B.orderedList . fmap compactify' . sequence
<$> many1 (listItem orderedListStart)
bulletListStart' :: Maybe Int -> OrgParser Int
bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int
-- returns length of bulletList prefix, inclusive of marker
bulletListStart' Nothing = do ind <- length <$> many spaceChar
oneOf (bullets $ ind == 0)
@ -925,8 +926,9 @@ bulletListStart' (Just n) = do count (n-1) spaceChar
bullets :: Bool -> String
bullets unindented = if unindented then "+-" else "*+-"
definitionListItem :: OrgParser Int
-> OrgParser (F (Inlines, [Blocks]))
definitionListItem :: PandocMonad m
=> OrgParser m Int
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
term <- manyTill (noneOf "\n\r") (try definitionMarker)
@ -942,8 +944,9 @@ definitionListItem parseMarkerGetLength = try $ do
-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
-> OrgParser (F Blocks)
listItem :: PandocMonad m
=> OrgParser m Int
-> OrgParser m (F Blocks)
listItem start = try . withContext ListItemState $ do
markerLength <- try start
firstLine <- anyLineNewline
@ -953,8 +956,8 @@ listItem start = try . withContext ListItemState $ do
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Int
-> OrgParser String
listContinuation :: Monad m => Int
-> OrgParser m String
listContinuation markerLength = try $
notFollowedBy' blankline
*> (mappend <$> (concat <$> many1 listLine)
@ -963,7 +966,7 @@ listContinuation markerLength = try $
listLine = try $ indentWith markerLength *> anyLineNewline
-- indent by specified number of spaces (or equiv. tabs)
indentWith :: Int -> OrgParser String
indentWith :: Monad m => Int -> OrgParser m String
indentWith num = do
tabStop <- getOption readerTabStop
if num < tabStop
@ -972,5 +975,5 @@ listContinuation markerLength = try $
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
-- | Parse any line, include the final newline in the output.
anyLineNewline :: OrgParser String
anyLineNewline :: Monad m => OrgParser m String
anyLineNewline = (++ "\n") <$> anyLine

View file

@ -37,14 +37,14 @@ import Data.Char ( toLower )
import Data.Maybe ( listToMaybe )
-- | Read and handle space separated org-mode export settings.
exportSettings :: OrgParser ()
exportSettings :: Monad m => OrgParser m ()
exportSettings = void $ sepBy spaces exportSetting
-- | Setter function for export settings.
type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
-- | Read and process a single org-mode export option.
exportSetting :: OrgParser ()
exportSetting :: Monad m => OrgParser m ()
exportSetting = choice
[ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val })
, booleanSetting "'" (\val es -> es { exportSmartQuotes = val })
@ -81,10 +81,11 @@ exportSetting = choice
, ignoredSetting "|"
] <?> "export setting"
genericExportSetting :: OrgParser a
genericExportSetting :: Monad m
=> OrgParser m a
-> String
-> ExportSettingSetter a
-> OrgParser ()
-> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
_ <- string settingIdentifier *> char ':'
value <- optionParser
@ -94,11 +95,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
integerSetting :: String -> ExportSettingSetter Int -> OrgParser ()
integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@ -106,9 +107,10 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
archivedTreeSetting :: String
archivedTreeSetting :: Monad m
=> String
-> ExportSettingSetter ArchivedTreesOption
-> OrgParser ()
-> OrgParser m ()
archivedTreeSetting =
genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean
where
@ -125,9 +127,10 @@ archivedTreeSetting =
else ArchivedTreesNoExport
-- | A list or a complement list (i.e. a list starting with `not`).
complementableListSetting :: String
complementableListSetting :: Monad m
=> String
-> ExportSettingSetter (Either [String] [String])
-> OrgParser ()
-> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
[ Left <$> complementStringList
, Right <$> stringList
@ -135,31 +138,31 @@ complementableListSetting = genericExportSetting $ choice
]
where
-- Read a plain list of strings.
stringList :: OrgParser [String]
stringList :: Monad m => OrgParser m [String]
stringList = try $
char '('
*> sepBy elispString spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
complementStringList :: OrgParser [String]
complementStringList :: Monad m => OrgParser m [String]
complementStringList = try $
string "(not "
*> sepBy elispString spaces
<* char ')'
elispString :: OrgParser String
elispString :: Monad m => OrgParser m String
elispString = try $
char '"'
*> manyTill alphaNum (char '"')
-- | Read but ignore the export setting.
ignoredSetting :: String -> OrgParser ()
ignoredSetting :: Monad m => String -> OrgParser m ()
ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
-- interpreted as true.
elispBoolean :: OrgParser Bool
elispBoolean :: Monad m => OrgParser m Bool
elispBoolean = try $ do
value <- many1 nonspaceChar
return $ case map toLower value of

View file

@ -47,9 +47,11 @@ import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Text.Pandoc.Class (PandocMonad)
import Prelude hiding (sequence)
import Control.Monad ( guard, mplus, mzero, when, void )
import Control.Monad.Trans ( lift )
import Data.Char ( isAlphaNum, isSpace )
import Data.List ( intersperse )
import Data.Maybe ( fromMaybe )
@ -60,46 +62,46 @@ import Data.Traversable (sequence)
--
-- Functions acting on the parser state
--
recordAnchorId :: String -> OrgParser ()
recordAnchorId :: PandocMonad m => String -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
pushToInlineCharStack :: Char -> OrgParser ()
pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m ()
pushToInlineCharStack c = updateState $ \s ->
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
popInlineCharStack :: OrgParser ()
popInlineCharStack :: PandocMonad m => OrgParser m ()
popInlineCharStack = updateState $ \s ->
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
surroundingEmphasisChar :: OrgParser [Char]
surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char]
surroundingEmphasisChar =
take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
startEmphasisNewlinesCounting :: Int -> OrgParser ()
startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m ()
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
s{ orgStateEmphasisNewlines = Just maxNewlines }
decEmphasisNewlinesCount :: OrgParser ()
decEmphasisNewlinesCount :: PandocMonad m => OrgParser m ()
decEmphasisNewlinesCount = updateState $ \s ->
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
newlinesCountWithinLimits :: OrgParser Bool
newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool
newlinesCountWithinLimits = do
st <- getState
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
resetEmphasisNewlines :: OrgParser ()
resetEmphasisNewlines :: PandocMonad m => OrgParser m ()
resetEmphasisNewlines = updateState $ \s ->
s{ orgStateEmphasisNewlines = Nothing }
addToNotesTable :: OrgNoteRecord -> OrgParser ()
addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m ()
addToNotesTable note = do
oldnotes <- orgStateNotes' <$> getState
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-- | Parse a single Org-mode inline element
inline :: OrgParser (F Inlines)
inline :: PandocMonad m => OrgParser m (F Inlines)
inline =
choice [ whitespace
, linebreak
@ -125,7 +127,7 @@ inline =
<?> "inline"
-- | Read the rest of the input as inlines.
inlines :: OrgParser (F Inlines)
inlines :: PandocMonad m => OrgParser m (F Inlines)
inlines = trimInlinesF . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
@ -133,23 +135,23 @@ specialChars :: [Char]
specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~"
whitespace :: OrgParser (F Inlines)
whitespace :: PandocMonad m => OrgParser m (F Inlines)
whitespace = pure B.space <$ skipMany1 spaceChar
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
<?> "whitespace"
linebreak :: OrgParser (F Inlines)
linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
str :: OrgParser (F Inlines)
str :: PandocMonad m => OrgParser m (F Inlines)
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
-- break. This should reflect the values of the Emacs variable
-- @org-element-pagaraph-separate@.
endline :: OrgParser (F Inlines)
endline :: PandocMonad m => OrgParser m (F Inlines)
endline = try $ do
newline
notFollowedBy' endOfBlock
@ -174,7 +176,7 @@ endline = try $ do
-- contributors. All this should be consolidated once an official Org-mode
-- citation syntax has emerged.
cite :: OrgParser (F Inlines)
cite :: PandocMonad m => OrgParser m (F Inlines)
cite = try $ berkeleyCite <|> do
guardEnabled Ext_citations
(cs, raw) <- withRaw $ choice
@ -185,40 +187,41 @@ cite = try $ berkeleyCite <|> do
return $ (flip B.cite (B.text raw)) <$> cs
-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
pandocOrgCite :: OrgParser (F [Citation])
pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
pandocOrgCite = try $
char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
orgRefCite :: OrgParser (F [Citation])
orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
orgRefCite = try $ choice
[ normalOrgRefCite
, fmap (:[]) <$> linkLikeOrgRefCite
]
normalOrgRefCite :: OrgParser (F [Citation])
normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation])
normalOrgRefCite = try $ do
mode <- orgRefCiteMode
-- org-ref style citation key, parsed into a citation of the given mode
let orgRefCiteItem :: OrgParser (F Citation)
orgRefCiteItem = try $ do
key <- orgRefCiteKey
returnF $ Citation
{ citationId = key
, citationPrefix = mempty
, citationSuffix = mempty
, citationMode = mode
, citationNoteNum = 0
, citationHash = 0
}
firstCitation <- orgRefCiteItem
moreCitations <- many (try $ char ',' *> orgRefCiteItem)
firstCitation <- orgRefCiteList mode
moreCitations <- many (try $ char ',' *> orgRefCiteList mode)
return . sequence $ firstCitation : moreCitations
where
where
-- | A list of org-ref style citation keys, parsed as citation of the given
-- citation mode.
orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation)
orgRefCiteList citeMode = try $ do
key <- orgRefCiteKey
returnF $ Citation
{ citationId = key
, citationPrefix = mempty
, citationSuffix = mempty
, citationMode = citeMode
, citationNoteNum = 0
, citationHash = 0
}
-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
-- develop and adjusted to Org-mode style by John MacFarlane and Richard
-- Lawrence, respectively, both philosophers at UC Berkeley.
berkeleyCite :: OrgParser (F Inlines)
berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
berkeleyCite = try $ do
bcl <- berkeleyCitationList
return $ do
@ -260,7 +263,7 @@ data BerkeleyCitationList = BerkeleyCitationList
, berkeleyCiteCommonSuffix :: Maybe Inlines
, berkeleyCiteCitations :: [Citation]
}
berkeleyCitationList :: OrgParser (F BerkeleyCitationList)
berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
berkeleyCitationList = try $ do
char '['
parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
@ -275,22 +278,22 @@ berkeleyCitationList = try $ do
<*> sequence commonSuffix
<*> citations)
where
citationListPart :: OrgParser (F Inlines)
citationListPart :: PandocMonad m => OrgParser m (F Inlines)
citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
notFollowedBy' citeKey
notFollowedBy (oneOf ";]")
inline
berkeleyBareTag :: OrgParser ()
berkeleyBareTag :: PandocMonad m => OrgParser m ()
berkeleyBareTag = try $ void berkeleyBareTag'
berkeleyParensTag :: OrgParser ()
berkeleyParensTag :: PandocMonad m => OrgParser m ()
berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag'
berkeleyBareTag' :: OrgParser ()
berkeleyBareTag' :: PandocMonad m => OrgParser m ()
berkeleyBareTag' = try $ void (string "cite")
berkeleyTextualCite :: OrgParser (F [Citation])
berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
berkeleyTextualCite = try $ do
(suppressAuthor, key) <- citeKey
returnF . return $ Citation
@ -305,14 +308,14 @@ berkeleyTextualCite = try $ do
-- The following is what a Berkeley-style bracketed textual citation parser
-- would look like. However, as these citations are a subset of Pandoc's Org
-- citation style, this isn't used.
-- berkeleyBracketedTextualCite :: OrgParser (F [Citation])
-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-- berkeleyBracketedTextualCite = try . (fmap head) $
-- enclosedByPair '[' ']' berkeleyTextualCite
-- | Read a link-like org-ref style citation. The citation includes pre and
-- post text. However, multiple citations are not possible due to limitations
-- in the syntax.
linkLikeOrgRefCite :: OrgParser (F Citation)
linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation)
linkLikeOrgRefCite = try $ do
_ <- string "[["
mode <- orgRefCiteMode
@ -335,13 +338,13 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
orgRefCiteKey :: OrgParser String
orgRefCiteKey :: PandocMonad m => OrgParser m String
orgRefCiteKey = try . many1 . satisfy $ \c ->
isAlphaNum c || c `elem` ("-_:\\./"::String)
-- | Supported citation types. Only a small subset of org-ref types is
-- supported for now. TODO: rewrite this, use LaTeX reader as template.
orgRefCiteMode :: OrgParser CitationMode
orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode
orgRefCiteMode =
choice $ map (\(s, mode) -> mode <$ try (string s <* char ':'))
[ ("cite", AuthorInText)
@ -352,10 +355,10 @@ orgRefCiteMode =
, ("citeyear", SuppressAuthor)
]
citeList :: OrgParser (F [Citation])
citeList :: PandocMonad m => OrgParser m (F [Citation])
citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
citation :: OrgParser (F Citation)
citation :: PandocMonad m => OrgParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
@ -384,10 +387,10 @@ citation = try $ do
then (B.space <>) <$> rest
else rest
footnote :: OrgParser (F Inlines)
footnote :: PandocMonad m => OrgParser m (F Inlines)
footnote = try $ inlineNote <|> referencedNote
inlineNote :: OrgParser (F Inlines)
inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
ref <- many alphaNum
@ -397,7 +400,7 @@ inlineNote = try $ do
addToNotesTable ("fn:" ++ ref, note)
return $ B.note <$> note
referencedNote :: OrgParser (F Inlines)
referencedNote :: PandocMonad m => OrgParser m (F Inlines)
referencedNote = try $ do
ref <- noteMarker
return $ do
@ -409,14 +412,14 @@ referencedNote = try $ do
let contents' = runF contents st{ orgStateNotes' = [] }
return $ B.note contents'
linkOrImage :: OrgParser (F Inlines)
linkOrImage :: PandocMonad m => OrgParser m (F Inlines)
linkOrImage = explicitOrImageLink
<|> selflinkOrImage
<|> angleLink
<|> plainLink
<?> "link or image"
explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines)
explicitOrImageLink = try $ do
char '['
srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
@ -431,30 +434,30 @@ explicitOrImageLink = try $ do
_ ->
linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
src <- char '[' *> linkTarget <* char ']'
return $ linkToInlinesF src (B.str src)
plainLink :: OrgParser (F Inlines)
plainLink :: PandocMonad m => OrgParser m (F Inlines)
plainLink = try $ do
(orig, src) <- uri
returnF $ B.link src "" (B.str orig)
angleLink :: OrgParser (F Inlines)
angleLink :: PandocMonad m => OrgParser m (F Inlines)
angleLink = try $ do
char '<'
link <- plainLink
char '>'
return link
linkTarget :: OrgParser String
linkTarget :: PandocMonad m => OrgParser m String
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
possiblyEmptyLinkTarget :: OrgParser String
possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat :: String -> OrgParser m (F String)
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
return $ do
@ -487,7 +490,7 @@ internalLink link title = do
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.
anchor :: OrgParser (F Inlines)
anchor :: PandocMonad m => OrgParser m (F Inlines)
anchor = try $ do
anchorId <- parseAnchor
recordAnchorId anchorId
@ -509,7 +512,7 @@ solidify = map replaceSpecialChar
| otherwise = '-'
-- | Parses an inline code block and marks it as an babel block.
inlineCodeBlock :: OrgParser (F Inlines)
inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
@ -519,13 +522,13 @@ inlineCodeBlock = try $ do
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where
inlineBlockOption :: OrgParser (String, String)
inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
orgInlineParamValue :: OrgParser String
orgInlineParamValue :: PandocMonad m => OrgParser m String
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
@ -533,7 +536,7 @@ inlineCodeBlock = try $ do
<* skipSpaces
emphasizedText :: OrgParser (F Inlines)
emphasizedText :: PandocMonad m => OrgParser m (F Inlines)
emphasizedText = do
state <- getState
guard . exportEmphasizedText . orgStateExportSettings $ state
@ -544,60 +547,63 @@ emphasizedText = do
, underline
]
enclosedByPair :: Char -- ^ opening char
enclosedByPair :: PandocMonad m
=> Char -- ^ opening char
-> Char -- ^ closing char
-> OrgParser a -- ^ parser
-> OrgParser [a]
-> OrgParser m a -- ^ parser
-> OrgParser m [a]
enclosedByPair s e p = char s *> many1Till p (char e)
emph :: OrgParser (F Inlines)
emph :: PandocMonad m => OrgParser m (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
strong :: OrgParser (F Inlines)
strong :: PandocMonad m => OrgParser m (F Inlines)
strong = fmap B.strong <$> emphasisBetween '*'
strikeout :: OrgParser (F Inlines)
strikeout :: PandocMonad m => OrgParser m (F Inlines)
strikeout = fmap B.strikeout <$> emphasisBetween '+'
-- There is no underline, so we use strong instead.
underline :: OrgParser (F Inlines)
underline :: PandocMonad m => OrgParser m (F Inlines)
underline = fmap B.strong <$> emphasisBetween '_'
verbatim :: OrgParser (F Inlines)
verbatim :: PandocMonad m => OrgParser m (F Inlines)
verbatim = return . B.code <$> verbatimBetween '='
code :: OrgParser (F Inlines)
code :: PandocMonad m => OrgParser m (F Inlines)
code = return . B.code <$> verbatimBetween '~'
subscript :: OrgParser (F Inlines)
subscript :: PandocMonad m => OrgParser m (F Inlines)
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
superscript :: OrgParser (F Inlines)
superscript :: PandocMonad m => OrgParser m (F Inlines)
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: OrgParser (F Inlines)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
, mathStringBetween '$'
, rawMathBetween "\\(" "\\)"
]
displayMath :: OrgParser (F Inlines)
displayMath :: PandocMonad m => OrgParser m (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
updatePositions :: Char
-> OrgParser Char
updatePositions :: PandocMonad m
=> Char
-> OrgParser m Char
updatePositions c = do
when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c
symbol :: OrgParser (F Inlines)
symbol :: PandocMonad m => OrgParser m (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: Char
-> OrgParser (F Inlines)
emphasisBetween :: PandocMonad m
=> Char
-> OrgParser m (F Inlines)
emphasisBetween c = try $ do
startEmphasisNewlinesCounting emphasisAllowedNewlines
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@ -606,8 +612,9 @@ emphasisBetween c = try $ do
resetEmphasisNewlines
return res
verbatimBetween :: Char
-> OrgParser String
verbatimBetween :: PandocMonad m
=> Char
-> OrgParser m String
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@ -615,8 +622,9 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
mathStringBetween :: Char
-> OrgParser String
mathStringBetween :: PandocMonad m
=> Char
-> OrgParser m String
mathStringBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
@ -626,8 +634,9 @@ mathStringBetween c = try $ do
return $ body ++ [final]
-- | Parse a single character between @c@ using math rules
math1CharBetween :: Char
-> OrgParser String
math1CharBetween :: PandocMonad m
=> Char
-> OrgParser m String
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
@ -635,13 +644,14 @@ math1CharBetween c = try $ do
eof <|> () <$ lookAhead (oneOf mathPostChars)
return [res]
rawMathBetween :: String
rawMathBetween :: PandocMonad m
=> String
-> String
-> OrgParser String
-> OrgParser m String
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-- | Parses the start (opening character) of emphasis
emphasisStart :: Char -> OrgParser Char
emphasisStart :: PandocMonad m => Char -> OrgParser m Char
emphasisStart c = try $ do
guard =<< afterEmphasisPreChar
guard =<< notAfterString
@ -654,7 +664,7 @@ emphasisStart c = try $ do
return c
-- | Parses the closing character of emphasis
emphasisEnd :: Char -> OrgParser Char
emphasisEnd :: PandocMonad m => Char -> OrgParser m Char
emphasisEnd c = try $ do
guard =<< notAfterForbiddenBorderChar
char c
@ -665,11 +675,11 @@ emphasisEnd c = try $ do
where acceptablePostChars =
surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
mathStart :: Char -> OrgParser Char
mathStart :: PandocMonad m => Char -> OrgParser m Char
mathStart c = try $
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
mathEnd :: Char -> OrgParser Char
mathEnd :: PandocMonad m => Char -> OrgParser m Char
mathEnd c = try $ do
res <- noneOf (c:mathForbiddenBorderChars)
char c
@ -677,15 +687,15 @@ mathEnd c = try $ do
return res
enclosedInlines :: OrgParser a
-> OrgParser b
-> OrgParser (F Inlines)
enclosedInlines :: PandocMonad m => OrgParser m a
-> OrgParser m b
-> OrgParser m (F Inlines)
enclosedInlines start end = try $
trimInlinesF . mconcat <$> enclosed start end inline
enclosedRaw :: OrgParser a
-> OrgParser b
-> OrgParser String
enclosedRaw :: PandocMonad m => OrgParser m a
-> OrgParser m b
-> OrgParser m String
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
where onSingleLine = try $ many1Till (noneOf "\n\r") end
@ -694,10 +704,10 @@ enclosedRaw start end = try $
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
-- newlines.
many1TillNOrLessNewlines :: Int
-> OrgParser Char
-> OrgParser a
-> OrgParser String
many1TillNOrLessNewlines :: PandocMonad m => Int
-> OrgParser m Char
-> OrgParser m a
-> OrgParser m String
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@ -746,21 +756,21 @@ mathAllowedNewlines :: Int
mathAllowedNewlines = 2
-- | Whether we are right behind a char allowed before emphasis
afterEmphasisPreChar :: OrgParser Bool
afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool
afterEmphasisPreChar = do
pos <- getPosition
lastPrePos <- orgStateLastPreCharPos <$> getState
return . fromMaybe True $ (== pos) <$> lastPrePos
-- | Whether the parser is right after a forbidden border char
notAfterForbiddenBorderChar :: OrgParser Bool
notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool
notAfterForbiddenBorderChar = do
pos <- getPosition
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
return $ lastFBCPos /= Just pos
-- | Read a sub- or superscript expression
subOrSuperExpr :: OrgParser (F Inlines)
subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
@ -768,7 +778,7 @@ subOrSuperExpr = try $
] >>= parseFromString (mconcat <$> many inline)
where enclosing (left, right) s = left : s ++ [right]
simpleSubOrSuperString :: OrgParser String
simpleSubOrSuperString :: PandocMonad m => OrgParser m String
simpleSubOrSuperString = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
@ -777,17 +787,18 @@ simpleSubOrSuperString = try $ do
<*> many1 alphaNum
]
inlineLaTeX :: OrgParser (F Inlines)
inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
inlineLaTeX = try $ do
cmd <- inlineLaTeXCommand
ils <- (lift . lift) $ parseAsInlineLaTeX cmd
maybe mzero returnF $
parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils
where
parseAsMath :: String -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
parseAsInlineLaTeX :: String -> Maybe Inlines
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
parseAsMathMLSym :: String -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
@ -803,10 +814,11 @@ inlineLaTeX = try $ do
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
inlineLaTeXCommand :: OrgParser String
inlineLaTeXCommand :: PandocMonad m => OrgParser m String
inlineLaTeXCommand = try $ do
rest <- getInput
case runParser rawLaTeXInline def "source" rest of
parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
case parsed of
Right (RawInline _ cs) -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
@ -820,14 +832,14 @@ inlineLaTeXCommand = try $ do
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
exportSnippet :: OrgParser (F Inlines)
exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
format <- many1Till (alphaNum <|> char '-') (char ':')
snippet <- manyTill anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
smart :: OrgParser (F Inlines)
smart :: PandocMonad m => OrgParser m (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
@ -844,7 +856,7 @@ smart = do
<* updateLastForbiddenCharPos
*> return (B.str "\x2019")
singleQuoted :: OrgParser (F Inlines)
singleQuoted :: PandocMonad m => OrgParser m (F Inlines)
singleQuoted = try $ do
guard =<< getExportSetting exportSmartQuotes
singleQuoteStart
@ -856,7 +868,7 @@ singleQuoted = try $ do
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted :: PandocMonad m => OrgParser m (F Inlines)
doubleQuoted = try $ do
guard =<< getExportSetting exportSmartQuotes
doubleQuoteStart

View file

@ -41,6 +41,7 @@ import Text.Pandoc.Readers.Org.Parsing
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Blocks, Inlines )
import Text.Pandoc.Class ( PandocMonad )
import Text.Pandoc.Definition
import Control.Monad ( mzero, void )
@ -51,7 +52,7 @@ import Data.Monoid ( (<>) )
import Network.HTTP ( urlEncode )
-- | Returns the current meta, respecting export options.
metaExport :: OrgParser (F Meta)
metaExport :: Monad m => OrgParser m (F Meta)
metaExport = do
st <- getState
let settings = orgStateExportSettings st
@ -68,10 +69,10 @@ removeMeta key meta' =
-- | Parse and handle a single line containing meta information
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLine :: OrgParser Blocks
metaLine :: PandocMonad m => OrgParser m Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: OrgParser ()
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
key <- map toLower <$> metaKey
(key', value) <- metaValue key
@ -79,12 +80,12 @@ declarationLine = try $ do
let meta' = B.setMeta key' <$> value <*> pure nullMeta
in st { orgStateMeta = meta' <> orgStateMeta st }
metaKey :: OrgParser String
metaKey :: Monad m => OrgParser m String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
<* char ':'
<* skipSpaces
metaValue :: String -> OrgParser (String, (F MetaValue))
metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue))
metaValue key =
let inclKey = "header-includes"
in case key of
@ -103,10 +104,10 @@ metaValue key =
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
metaInlines :: OrgParser (F MetaValue)
metaInlines :: PandocMonad m => OrgParser m (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: OrgParser (F MetaValue)
metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
newline
@ -114,21 +115,21 @@ metaInlinesCommaSeparated = do
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence authors
metaString :: OrgParser (F MetaValue)
metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
metaModifiedString :: (String -> String) -> OrgParser (F MetaValue)
metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
metaExportSnippet :: String -> OrgParser (F MetaValue)
metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
accumulatingList :: String
-> OrgParser (F MetaValue)
-> OrgParser (F MetaValue)
accumulatingList :: Monad m => String
-> OrgParser m (F MetaValue)
-> OrgParser m (F MetaValue)
accumulatingList key p = do
value <- p
meta' <- orgStateMeta <$> getState
@ -141,7 +142,7 @@ accumulatingList key p = do
--
-- export options
--
optionLine :: OrgParser ()
optionLine :: Monad m => OrgParser m ()
optionLine = try $ do
key <- metaKey
case key of
@ -152,14 +153,14 @@ optionLine = try $ do
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence
_ -> mzero
addLinkFormat :: String
addLinkFormat :: Monad m => String
-> (String -> String)
-> OrgParser ()
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
parseLinkFormat :: OrgParser ((String, String -> String))
parseLinkFormat :: Monad m => OrgParser m ((String, String -> String))
parseLinkFormat = try $ do
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
@ -167,7 +168,7 @@ parseLinkFormat = try $ do
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: OrgParser (String -> String)
parseFormat :: Monad m => OrgParser m (String -> String)
parseFormat = try $ do
replacePlain <|> replaceUrl <|> justAppend
where
@ -181,13 +182,13 @@ parseFormat = try $ do
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
--
-- ToDo Sequences and Keywords
--
todoSequence :: OrgParser TodoSequence
todoSequence :: Monad m => OrgParser m TodoSequence
todoSequence = try $ do
todoKws <- todoKeywords
doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
@ -201,13 +202,13 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
todoKeywords :: OrgParser [String]
todoKeywords :: Monad m => OrgParser m [String]
todoKeywords = try $
let keyword = many1 nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
todoDoneSep :: OrgParser ()
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
keywordsToSequence :: [String] -> [String] -> TodoSequence

View file

@ -51,7 +51,7 @@ module Text.Pandoc.Readers.Org.ParserState
) where
import Control.Monad (liftM, liftM2)
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local)
import Data.Default (Default(..))
import qualified Data.Map as M
@ -122,7 +122,7 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
instance HasQuoteContext st (Reader OrgParserLocal) where
instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where
getQuoteContext = asks orgLocalQuoteContext
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})

View file

@ -72,6 +72,7 @@ module Text.Pandoc.Readers.Org.Parsing
, citeKey
-- * Re-exports from Text.Pandoc.Parsec
, runParser
, runParserT
, getInput
, char
, letter
@ -114,17 +115,17 @@ import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
, parseFromString )
import Control.Monad ( guard )
import Control.Monad.Reader ( Reader )
import Control.Monad.Reader ( ReaderT )
-- | The parser used to read org files.
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
anyLine :: OrgParser String
anyLine :: Monad m => OrgParser m String
anyLine =
P.anyLine
<* updateLastPreCharPos
@ -132,7 +133,7 @@ anyLine =
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
parseFromString parser str' = do
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
@ -141,33 +142,34 @@ parseFromString parser str' = do
return result
-- | Skip one or more tab or space characters.
skipSpaces1 :: OrgParser ()
skipSpaces1 :: Monad m => OrgParser m ()
skipSpaces1 = skipMany1 spaceChar
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline :: Monad m => OrgParser m Char
newline =
P.newline
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
blanklines :: OrgParser [Char]
blanklines :: Monad m => OrgParser m [Char]
blanklines =
P.blanklines
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Succeeds when we're in list context.
inList :: OrgParser ()
inList :: Monad m => OrgParser m ()
inList = do
ctx <- orgStateParserContext <$> getState
guard (ctx == ListItemState)
-- | Parse in different context
withContext :: ParserContext -- ^ New parser context
-> OrgParser a -- ^ Parser to run in that context
-> OrgParser a
withContext :: Monad m
=> ParserContext -- ^ New parser context
-> OrgParser m a -- ^ Parser to run in that context
-> OrgParser m a
withContext context parser = do
oldContext <- orgStateParserContext <$> getState
updateState $ \s -> s{ orgStateParserContext = context }
@ -180,19 +182,19 @@ withContext context parser = do
--
-- | Get an export setting.
getExportSetting :: (ExportSettings -> a) -> OrgParser a
getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a
getExportSetting s = s . orgStateExportSettings <$> getState
-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos :: Monad m => OrgParser m ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-- | Set the current parser position as the position at which a character was
-- seen which allows inline markup to follow.
updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos :: Monad m => OrgParser m ()
updateLastPreCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
@ -201,15 +203,15 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
orgArgKey :: OrgParser String
orgArgKey :: Monad m => OrgParser m String
orgArgKey = try $
skipSpaces *> char ':'
*> many1 orgArgWordChar
-- | Read the value of a plist style key-value list.
orgArgWord :: OrgParser String
orgArgWord :: Monad m => OrgParser m String
orgArgWord = many1 orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: OrgParser Char
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"

View file

@ -49,18 +49,29 @@ import qualified Text.Pandoc.Builder as B
import Data.Sequence (viewr, ViewR(..))
import Data.Char (toLower, isHexDigit, isSpace)
import Data.Monoid ((<>))
import Text.Pandoc.Error
import Control.Monad.Except (throwError)
import Control.Monad.Trans (lift)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
readRST :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
-> m Pandoc
readRST opts s = do
parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "error parsing rst"
readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
readRSTWithWarnings :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readRSTWithWarnings = readRST
type RSTParser = Parser [Char] ParserState
type RSTParser m = ParserT [Char] ParserState m
--
-- Constants and data structure definitions
@ -141,7 +152,7 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
factorSemi (Str ys)
factorSemi x = [x]
parseRST :: RSTParser Pandoc
parseRST :: PandocMonad m => RSTParser m Pandoc
parseRST = do
optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
@ -168,10 +179,10 @@ parseRST = do
-- parsing blocks
--
parseBlocks :: RSTParser Blocks
parseBlocks :: PandocMonad m => RSTParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
block :: RSTParser Blocks
block :: PandocMonad m => RSTParser m Blocks
block = choice [ codeBlock
, blockQuote
, fieldList
@ -191,7 +202,7 @@ block = choice [ codeBlock
-- field list
--
rawFieldListItem :: Int -> RSTParser (String, String)
rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
rawFieldListItem minIndent = try $ do
indent <- length <$> many (char ' ')
guard $ indent >= minIndent
@ -204,7 +215,7 @@ rawFieldListItem minIndent = try $ do
let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
return (name, raw)
fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
term <- parseInlineFromString name
@ -212,7 +223,7 @@ fieldListItem minIndent = try $ do
optional blanklines
return (term, [contents])
fieldList :: RSTParser Blocks
fieldList :: PandocMonad m => RSTParser m Blocks
fieldList = try $ do
indent <- length <$> lookAhead (many spaceChar)
items <- many1 $ fieldListItem indent
@ -224,7 +235,7 @@ fieldList = try $ do
-- line block
--
lineBlock :: RSTParser Blocks
lineBlock :: PandocMonad m => RSTParser m Blocks
lineBlock = try $ do
lines' <- lineBlockLines
lines'' <- mapM parseInlineFromString lines'
@ -235,7 +246,7 @@ lineBlock = try $ do
--
-- note: paragraph can end in a :: starting a code block
para :: RSTParser Blocks
para :: PandocMonad m => RSTParser m Blocks
para = try $ do
result <- trimInlines . mconcat <$> many1 inline
option (B.plain result) $ try $ do
@ -248,18 +259,18 @@ para = try $ do
<> raw
_ -> return (B.para result)
plain :: RSTParser Blocks
plain :: PandocMonad m => RSTParser m Blocks
plain = B.plain . trimInlines . mconcat <$> many1 inline
--
-- header blocks
--
header :: RSTParser Blocks
header :: PandocMonad m => RSTParser m Blocks
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
doubleHeader :: RSTParser Blocks
doubleHeader :: PandocMonad m => RSTParser m Blocks
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
@ -285,7 +296,7 @@ doubleHeader = try $ do
return $ B.headerWith attr level txt
-- a header with line on the bottom only
singleHeader :: RSTParser Blocks
singleHeader :: PandocMonad m => RSTParser m Blocks
singleHeader = try $ do
notFollowedBy' whitespace
txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
@ -309,7 +320,7 @@ singleHeader = try $ do
-- hrule block
--
hrule :: Parser [Char] st Blocks
hrule :: Monad m => ParserT [Char] st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@ -323,14 +334,14 @@ hrule = try $ do
--
-- read a line indented by a given string
indentedLine :: String -> Parser [Char] st [Char]
indentedLine :: Monad m => String -> ParserT [Char] st m [Char]
indentedLine indents = try $ do
string indents
anyLine
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: Parser [Char] st [Char]
indentedBlock :: Monad m => ParserT [Char] st m [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many1 $ try $ do b <- option "" blanklines
@ -339,24 +350,24 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
quotedBlock :: Parser [Char] st [Char]
quotedBlock :: Monad m => ParserT [Char] st m [Char]
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ unlines lns
codeBlockStart :: Parser [Char] st Char
codeBlockStart :: Monad m => ParserT [Char] st m Char
codeBlockStart = string "::" >> blankline >> blankline
codeBlock :: Parser [Char] st Blocks
codeBlock :: Monad m => ParserT [Char] st m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Parser [Char] st Blocks
codeBlockBody :: Monad m => ParserT [Char] st m Blocks
codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
(indentedBlock <|> quotedBlock)
lhsCodeBlock :: RSTParser Blocks
lhsCodeBlock :: Monad m => RSTParser m Blocks
lhsCodeBlock = try $ do
getPosition >>= guard . (==1) . sourceColumn
guardEnabled Ext_literate_haskell
@ -366,14 +377,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
$ intercalate "\n" lns
latexCodeBlock :: Parser [Char] st [[Char]]
latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
birdCodeBlock :: Parser [Char] st [[Char]]
birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@ -381,14 +392,14 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (drop 1) lns
else lns
birdTrackLine :: Parser [Char] st [Char]
birdTrackLine :: Monad m => ParserT [Char] st m [Char]
birdTrackLine = char '>' >> anyLine
--
-- block quotes
--
blockQuote :: RSTParser Blocks
blockQuote :: PandocMonad m => RSTParser m Blocks
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
@ -399,10 +410,10 @@ blockQuote = do
-- list blocks
--
list :: RSTParser Blocks
list :: PandocMonad m => RSTParser m Blocks
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
definitionListItem :: RSTParser (Inlines, [Blocks])
definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks])
definitionListItem = try $ do
-- avoid capturing a directive or comment
notFollowedBy (try $ char '.' >> char '.')
@ -412,11 +423,11 @@ definitionListItem = try $ do
contents <- parseFromString parseBlocks $ raw ++ "\n"
return (term, [contents])
definitionList :: RSTParser Blocks
definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
bulletListStart :: Parser [Char] st Int
bulletListStart :: Monad m => ParserT [Char] st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@ -424,16 +435,16 @@ bulletListStart = try $ do
return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart :: ListNumberStyle
orderedListStart :: Monad m => ListNumberStyle
-> ListNumberDelim
-> RSTParser Int
-> RSTParser m Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
-- parse a line of a list item
listLine :: Int -> RSTParser [Char]
listLine :: Monad m => Int -> RSTParser m [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@ -441,7 +452,7 @@ listLine markerLength = try $ do
return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
indentWith :: Int -> RSTParser [Char]
indentWith :: Monad m => Int -> RSTParser m [Char]
indentWith num = do
tabStop <- getOption readerTabStop
if (num < tabStop)
@ -450,8 +461,8 @@ indentWith num = do
(try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: RSTParser Int
-> RSTParser (Int, [Char])
rawListItem :: Monad m => RSTParser m Int
-> RSTParser m (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- anyLine
@ -461,14 +472,15 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Int -> RSTParser [Char]
listContinuation :: Monad m => Int -> RSTParser m [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
listItem :: RSTParser Int
-> RSTParser Blocks
listItem :: PandocMonad m
=> RSTParser m Int
-> RSTParser m Blocks
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
@ -490,21 +502,21 @@ listItem start = try $ do
[Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
_ -> parsed
orderedList :: RSTParser Blocks
orderedList :: PandocMonad m => RSTParser m Blocks
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify' items
return $ B.orderedListWith (start, style, delim) items'
bulletList :: RSTParser Blocks
bulletList :: PandocMonad m => RSTParser m Blocks
bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
--
-- directive (e.g. comment, container, compound-paragraph)
--
comment :: RSTParser Blocks
comment :: Monad m => RSTParser m Blocks
comment = try $ do
string ".."
skipMany1 spaceChar <|> (() <$ lookAhead newline)
@ -513,11 +525,11 @@ comment = try $ do
optional indentedBlock
return mempty
directiveLabel :: RSTParser String
directiveLabel :: Monad m => RSTParser m String
directiveLabel = map toLower
<$> many1Till (letter <|> char '-') (try $ string "::")
directive :: RSTParser Blocks
directive :: PandocMonad m => RSTParser m Blocks
directive = try $ do
string ".."
directive'
@ -526,7 +538,7 @@ directive = try $ do
-- date
-- include
-- title
directive' :: RSTParser Blocks
directive' :: PandocMonad m => RSTParser m Blocks
directive' = do
skipMany1 spaceChar
label <- directiveLabel
@ -614,13 +626,13 @@ directive' = do
return $ B.divWith attrs children
other -> do
pos <- getPosition
addWarning (Just pos) $ "ignoring unknown directive: " ++ other
P.addWarningWithPos (Just pos) $ "ignoring unknown directive: " ++ other
return mempty
-- TODO:
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
@ -642,20 +654,20 @@ addNewRole roleString fields = do
-- warn about syntax we ignore
flip mapM_ fields $ \(key, _) -> case key of
"language" -> when (baseRole /= "code") $ addWarning Nothing $
"language" -> when (baseRole /= "code") $ lift $ P.warn $
"ignoring :language: field because the parent of role :" ++
role ++ ": is :" ++ baseRole ++ ": not :code:"
"format" -> when (baseRole /= "raw") $ addWarning Nothing $
"format" -> when (baseRole /= "raw") $ lift $ P.warn $
"ignoring :format: field because the parent of role :" ++
role ++ ": is :" ++ baseRole ++ ": not :raw:"
_ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
_ -> lift $ P.warn $ "ignoring unknown field :" ++ key ++
": in definition of role :" ++ role ++ ": in"
when (parentRole == "raw" && countKeys "format" > 1) $
addWarning Nothing $
lift $ P.warn $
"ignoring :format: fields after the first in the definition of role :"
++ role ++": in"
when (parentRole == "code" && countKeys "language" > 1) $
addWarning Nothing $
lift $ P.warn $
"ignoring :language: fields after the first in the definition of role :"
++ role ++": in"
@ -700,7 +712,7 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
where (ds,rest) = span isHexDigit s
mbc = safeRead ('\'':'\\':'x':ds ++ "'")
extractCaption :: RSTParser (Inlines, Blocks)
extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
extractCaption = do
capt <- trimInlines . mconcat <$> many inline
legend <- optional blanklines >> (mconcat <$> many block)
@ -712,7 +724,7 @@ toChunks = dropWhile null
. map (trim . unlines)
. splitBy (all (`elem` (" \t" :: String))) . lines
codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks
codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
codeblock classes numberLines lang body =
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
where attribs = ("", classes', kvs)
@ -728,7 +740,7 @@ codeblock classes numberLines lang body =
--- note block
---
noteBlock :: RSTParser [Char]
noteBlock :: Monad m => RSTParser m [Char]
noteBlock = try $ do
startPos <- getPosition
string ".."
@ -747,7 +759,7 @@ noteBlock = try $ do
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
noteMarker :: RSTParser [Char]
noteMarker :: Monad m => RSTParser m [Char]
noteMarker = do
char '['
res <- many1 digit
@ -760,13 +772,13 @@ noteMarker = do
-- reference key
--
quotedReferenceName :: RSTParser Inlines
quotedReferenceName :: PandocMonad m => RSTParser m Inlines
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
label' <- trimInlines . mconcat <$> many1Till inline (char '`')
return label'
unquotedReferenceName :: RSTParser Inlines
unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
unquotedReferenceName = try $ do
label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
return label'
@ -775,24 +787,24 @@ unquotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
simpleReferenceName' :: Parser [Char] st String
simpleReferenceName' :: Monad m => ParserT [Char] st m String
simpleReferenceName' = do
x <- alphaNum
xs <- many $ alphaNum
<|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
simpleReferenceName :: Parser [Char] st Inlines
simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
simpleReferenceName = do
raw <- simpleReferenceName'
return $ B.str raw
referenceName :: RSTParser Inlines
referenceName :: PandocMonad m => RSTParser m Inlines
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName <* lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: RSTParser [Char]
referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do
startPos <- getPosition
choice [substKey, anonymousKey, regularKey]
@ -801,7 +813,7 @@ referenceKey = do
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
targetURI :: Parser [Char] st [Char]
targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
optional newline
@ -810,7 +822,7 @@ targetURI = do
blanklines
return $ escapeURI $ trim $ contents
substKey :: RSTParser ()
substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do
string ".."
skipMany1 spaceChar
@ -828,7 +840,7 @@ substKey = try $ do
let key = toKey $ stripFirstAndLast ref
updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
anonymousKey :: RSTParser ()
anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
@ -842,7 +854,7 @@ stripTicks = reverse . stripTick . reverse . stripTick
where stripTick ('`':xs) = xs
stripTick xs = xs
regularKey :: RSTParser ()
regularKey :: PandocMonad m => RSTParser m ()
regularKey = try $ do
string ".. _"
(_,ref) <- withRaw referenceName
@ -869,31 +881,31 @@ regularKey = try $ do
-- Grid tables TODO:
-- - column spans
dashedLine :: Char -> Parser [Char] st (Int, Int)
dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)]
simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
simpleTableSep :: Char -> RSTParser Char
simpleTableSep :: Monad m => Char -> RSTParser m Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
simpleTableFooter :: RSTParser [Char]
simpleTableFooter :: Monad m => RSTParser m [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
simpleTableRawLine :: [Int] -> RSTParser [String]
simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-- Parse a table row and return a list of blocks (columns).
simpleTableRow :: [Int] -> RSTParser [[Block]]
simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [[Block]]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
@ -906,8 +918,9 @@ simpleTableSplitLine indices line =
map trim
$ tail $ splitByIndices (init indices) line
simpleTableHeader :: Bool -- ^ Headerless table
-> RSTParser ([[Block]], [Alignment], [Int])
simpleTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
-> RSTParser m ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@ -926,8 +939,9 @@ simpleTableHeader headless = try $ do
return (heads, aligns, indices)
-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
-> RSTParser Blocks
simpleTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> RSTParser m Blocks
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
-- Simple tables get 0s for relative column widths (i.e., use default)
@ -935,12 +949,13 @@ simpleTable headless = do
where
sep = return () -- optional (simpleTableSep '-')
gridTable :: Bool -- ^ Headerless table
-> RSTParser Blocks
gridTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> RSTParser m Blocks
gridTable headerless = B.singleton
<$> gridTableWith (B.toList <$> parseBlocks) headerless
table :: RSTParser Blocks
table :: PandocMonad m => RSTParser m Blocks
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
@ -948,7 +963,7 @@ table = gridTable False <|> simpleTable False <|>
-- inline
--
inline :: RSTParser Inlines
inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws
, whitespace
, link
@ -964,29 +979,29 @@ inline = choice [ note -- can start with whitespace, so try before ws
, escapedChar
, symbol ] <?> "inline"
parseInlineFromString :: String -> RSTParser Inlines
parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
hyphens :: RSTParser Inlines
hyphens :: Monad m => RSTParser m Inlines
hyphens = do
result <- many1 (char '-')
optional endline
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
escapedChar :: Parser [Char] st Inlines
escapedChar :: Monad m => ParserT [Char] st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' -- '\ ' is null in RST
then mempty
else B.str [c]
symbol :: RSTParser Inlines
symbol :: Monad m => RSTParser m Inlines
symbol = do
result <- oneOf specialChars
return $ B.str [result]
-- parses inline code, between codeStart and codeEnd
code :: RSTParser Inlines
code :: Monad m => RSTParser m Inlines
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
@ -994,7 +1009,7 @@ code = try $ do
$ trim $ unwords $ lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
atStart :: RSTParser a -> RSTParser a
atStart :: Monad m => RSTParser m a -> RSTParser m a
atStart p = do
pos <- getPosition
st <- getState
@ -1002,11 +1017,11 @@ atStart p = do
guard $ stateLastStrPos st /= Just pos
p
emph :: RSTParser Inlines
emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$>
enclosed (atStart $ char '*') (char '*') inline
strong :: RSTParser Inlines
strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$>
enclosed (atStart $ string "**") (try $ string "**") inline
@ -1018,12 +1033,12 @@ strong = B.strong . trimInlines . mconcat <$>
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: RSTParser Inlines
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
(role, contents) <- roleBefore <|> roleAfter
renderRole contents Nothing role nullAttr
renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
@ -1050,7 +1065,7 @@ renderRole contents fmt role attr = case role of
renderRole contents newFmt newRole newAttr
Nothing -> do
pos <- getPosition
addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
P.addWarningWithPos (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
@ -1063,31 +1078,31 @@ renderRole contents fmt role attr = case role of
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
roleName :: RSTParser String
roleName :: PandocMonad m => RSTParser m String
roleName = many1 (letter <|> char '-')
roleMarker :: RSTParser String
roleMarker :: PandocMonad m => RSTParser m String
roleMarker = char ':' *> roleName <* char ':'
roleBefore :: RSTParser (String,String)
roleBefore :: PandocMonad m => RSTParser m (String,String)
roleBefore = try $ do
role <- roleMarker
contents <- unmarkedInterpretedText
return (role,contents)
roleAfter :: RSTParser (String,String)
roleAfter :: PandocMonad m => RSTParser m (String,String)
roleAfter = try $ do
contents <- unmarkedInterpretedText
role <- roleMarker <|> (stateRstDefaultRole <$> getState)
return (role,contents)
unmarkedInterpretedText :: RSTParser [Char]
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
whitespace :: RSTParser Inlines
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
str :: RSTParser Inlines
str :: Monad m => RSTParser m Inlines
str = do
let strChar = noneOf ("\t\n " ++ specialChars)
result <- many1 strChar
@ -1095,7 +1110,7 @@ str = do
return $ B.str result
-- an endline character that can be treated as a space, not a structural break
endline :: RSTParser Inlines
endline :: Monad m => RSTParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
@ -1111,10 +1126,10 @@ endline = try $ do
-- links
--
link :: RSTParser Inlines
link :: PandocMonad m => RSTParser m Inlines
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
explicitLink :: RSTParser Inlines
explicitLink :: PandocMonad m => RSTParser m Inlines
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
@ -1143,7 +1158,7 @@ explicitLink = try $ do
_ -> return (src, "", nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
referenceLink :: RSTParser Inlines
referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
(label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
char '_'
@ -1169,20 +1184,20 @@ referenceLink = try $ do
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ B.linkWith attr src tit label'
autoURI :: RSTParser Inlines
autoURI :: Monad m => RSTParser m Inlines
autoURI = do
(orig, src) <- uri
return $ B.link src "" $ B.str orig
autoEmail :: RSTParser Inlines
autoEmail :: Monad m => RSTParser m Inlines
autoEmail = do
(orig, src) <- emailAddress
return $ B.link src "" $ B.str orig
autoLink :: RSTParser Inlines
autoLink :: PandocMonad m => RSTParser m Inlines
autoLink = autoURI <|> autoEmail
subst :: RSTParser Inlines
subst :: PandocMonad m => RSTParser m Inlines
subst = try $ do
(_,ref) <- withRaw $ enclosed (char '|') (char '|') inline
state <- getState
@ -1196,7 +1211,7 @@ subst = try $ do
return mempty
Just target -> return target
note :: RSTParser Inlines
note :: PandocMonad m => RSTParser m Inlines
note = try $ do
optional whitespace
ref <- noteMarker
@ -1224,20 +1239,20 @@ note = try $ do
updateState $ \st -> st{ stateNotes = newnotes }
return $ B.note contents
smart :: RSTParser Inlines
smart :: PandocMonad m => RSTParser m Inlines
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
choice [apostrophe, dash, ellipses]
singleQuoted :: RSTParser Inlines
singleQuoted :: PandocMonad m => RSTParser m Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
B.singleQuoted . trimInlines . mconcat <$>
many1Till inline singleQuoteEnd
doubleQuoted :: RSTParser Inlines
doubleQuoted :: PandocMonad m => RSTParser m Inlines
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $

View file

@ -30,7 +30,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of twiki text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.TWiki ( readTWiki
, readTWikiWithWarnings
) where
import Text.Pandoc.Definition
@ -48,17 +47,25 @@ import Data.Char (isAlphaNum)
import qualified Data.Foldable as F
import Text.Pandoc.Error
-- | Read twiki from an input string and return a Pandoc document.
readTWiki :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readTWiki opts s =
(readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
-- | Read twiki from an input string and return a Pandoc document.
readTWiki :: PandocMonad m
=> ReaderOptions
-> String
-> m Pandoc
readTWiki opts s = case readTWikiWithWarnings' opts s of
Right (doc, warns) -> do
mapM_ P.warn warns
return doc
Left _ -> throwError $ PandocParseError "couldn't parse TWiki"
readTWikiWithWarnings' :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError (Pandoc, [String])
readTWikiWithWarnings opts s =
readTWikiWithWarnings' opts s =
(readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
where parseTWikiWithWarnings = do
doc <- parseTWiki

View file

@ -68,18 +68,23 @@ import Control.Monad ( guard, liftM, when )
import Data.Monoid ((<>))
import Text.Printf
import Debug.Trace (trace)
import Text.Pandoc.Error
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import Control.Monad.Except (throwError)
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ReaderOptions -- ^ Reader options
readTextile :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readTextile opts s =
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "textile parse error"
-- | Generate a Pandoc ADT from a textile document
parseTextile :: Parser [Char] ParserState Pandoc
parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
parseTextile = do
-- textile allows raw HTML and does smart punctuation by default,
-- but we do not enable smart punctuation unless it is explicitly
@ -103,10 +108,10 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc nullMeta (B.toList blocks) -- FIXME
noteMarker :: Parser [Char] ParserState [Char]
noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
noteBlock :: Parser [Char] ParserState [Char]
noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@ -121,11 +126,11 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-- | Parse document blocks
parseBlocks :: Parser [Char] ParserState Blocks
parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
blockParsers :: [Parser [Char] ParserState Blocks]
blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@ -140,7 +145,7 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
block :: Parser [Char] ParserState Blocks
block :: PandocMonad m => ParserT [Char] ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
pos <- getPosition
@ -150,16 +155,16 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
commentBlock :: Parser [Char] ParserState Blocks
commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
codeBlock :: Parser [Char] ParserState Blocks
codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: Parser [Char] ParserState Blocks
codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlockBc = try $ do
string "bc."
extended <- option False (True <$ char '.')
@ -179,7 +184,7 @@ trimTrailingNewlines :: String -> String
trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
-- | Code Blocks in Textile are between <pre> and </pre>
codeBlockPre :: Parser [Char] ParserState Blocks
codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
@ -198,7 +203,7 @@ codeBlockPre = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
header :: Parser [Char] ParserState Blocks
header :: PandocMonad m => ParserT [Char] ParserState m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@ -210,14 +215,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
blockQuote :: Parser [Char] ParserState Blocks
blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
hrule :: Parser [Char] st Blocks
hrule :: PandocMonad m => ParserT [Char] st m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@ -232,39 +237,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
anyList :: Parser [Char] ParserState Blocks
anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
p <- mconcat <$> many listInline
@ -273,25 +278,25 @@ genericListItemAtDepth c depth = try $ do
return $ (B.plain p) <> sublist
-- | A definition list is a set of consecutive definition items
definitionList :: Parser [Char] ParserState Blocks
definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
listStart :: Parser [Char] ParserState ()
listStart :: PandocMonad m => ParserT [Char] ParserState m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
genericListStart :: Char -> Parser [Char] st ()
genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
basicDLStart :: Parser [Char] ParserState ()
basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
definitionListStart :: Parser [Char] ParserState Inlines
definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@ -300,7 +305,7 @@ definitionListStart = try $ do
<|> try (lookAhead (() <$ string ":="))
)
listInline :: Parser [Char] ParserState Inlines
listInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
listInline = try (notFollowedBy newline >> inline)
<|> try (endline <* notFollowedBy listStart)
@ -308,15 +313,15 @@ listInline = try (notFollowedBy newline >> inline)
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
definitionListItem = try $ do
term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
where inlineDef :: Parser [Char] ParserState [Blocks]
where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
multilineDef :: Parser [Char] ParserState [Blocks]
multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
@ -327,7 +332,7 @@ definitionListItem = try $ do
-- raw content
-- | A raw Html Block, optionally followed by blanklines
rawHtmlBlock :: Parser [Char] ParserState Blocks
rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@ -335,14 +340,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
rawLaTeXBlock' :: Parser [Char] ParserState Blocks
rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
para :: Parser [Char] ParserState Blocks
para :: PandocMonad m => ParserT [Char] ParserState m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@ -353,7 +358,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
cellAttributes :: Parser [Char] ParserState (Bool, Alignment)
cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@ -366,7 +371,7 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
tableCell :: Parser [Char] ParserState ((Bool, Alignment), Blocks)
tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes
@ -377,7 +382,7 @@ tableCell = try $ do
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
tableRow :: Parser [Char] ParserState [((Bool, Alignment), Blocks)]
tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@ -387,7 +392,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
table :: Parser [Char] ParserState Blocks
table :: PandocMonad m => ParserT [Char] ParserState m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@ -411,7 +416,7 @@ table = try $ do
(map (map snd) rows)
-- | Ignore markers for cols, thead, tfoot.
ignorableRow :: Parser [Char] ParserState ()
ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@ -420,7 +425,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
explicitBlockStart :: String -> Parser [Char] ParserState ()
explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
explicitBlockStart name = try $ do
string name
attributes
@ -430,9 +435,10 @@ explicitBlockStart name = try $ do
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: String -- ^ block tag name
-> Parser [Char] ParserState Blocks -- ^ implicit block
-> Parser [Char] ParserState Blocks
maybeExplicitBlock :: PandocMonad m
=> String -- ^ block tag name
-> ParserT [Char] ParserState m Blocks -- ^ implicit block
-> ParserT [Char] ParserState m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@ -445,12 +451,12 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
inline :: Parser [Char] ParserState Inlines
inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
inline = do
choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
inlineParsers :: [Parser [Char] ParserState Inlines]
inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
inlineParsers = [ str
, whitespace
, endline
@ -470,7 +476,7 @@ inlineParsers = [ str
]
-- | Inline markups
inlineMarkup :: Parser [Char] ParserState Inlines
inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@ -484,29 +490,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
mark :: Parser [Char] st Inlines
mark :: PandocMonad m => ParserT [Char] st m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: Parser [Char] st Inlines
reg :: PandocMonad m => ParserT [Char] st m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
tm :: Parser [Char] st Inlines
tm :: PandocMonad m => ParserT [Char] st m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
copy :: Parser [Char] st Inlines
copy :: PandocMonad m => ParserT [Char] st m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
note :: Parser [Char] ParserState Inlines
note :: PandocMonad m => ParserT [Char] ParserState m Inlines
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
@ -530,13 +536,13 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
hyphenedWords :: Parser [Char] ParserState String
hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ intercalate "-" (x:xs)
wordChunk :: Parser [Char] ParserState String
wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( (noneOf wordBoundaries) <|>
@ -545,7 +551,7 @@ wordChunk = try $ do
return $ hd:tl
-- | Any string
str :: Parser [Char] ParserState Inlines
str :: PandocMonad m => ParserT [Char] ParserState m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediatly
@ -558,11 +564,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
whitespace :: Parser [Char] st Inlines
whitespace :: PandocMonad m => ParserT [Char] st m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
endline :: Parser [Char] ParserState Inlines
endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
endline = try $ do
newline
notFollowedBy blankline
@ -570,18 +576,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
rawLaTeXInline' :: Parser [Char] ParserState Inlines
rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.singleton <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
link :: Parser [Char] ParserState Inlines
link :: PandocMonad m => ParserT [Char] ParserState m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@ -600,7 +606,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
image :: Parser [Char] ParserState Inlines
image :: PandocMonad m => ParserT [Char] ParserState m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@ -612,50 +618,50 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
escapedInline :: Parser [Char] ParserState Inlines
escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: Parser [Char] ParserState Inlines
escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedEqs = B.str <$>
(try $ string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
escapedTag :: Parser [Char] ParserState Inlines
escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedTag = B.str <$>
(try $ string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
symbol :: Parser [Char] ParserState Inlines
symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
symbol = B.str . singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
code :: Parser [Char] ParserState Inlines
code :: PandocMonad m => ParserT [Char] ParserState m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
anyChar' :: Parser [Char] ParserState Char
anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
anyChar' =
satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
code1 :: Parser [Char] ParserState Inlines
code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
code1 = B.code <$> surrounded (char '@') anyChar'
code2 :: Parser [Char] ParserState Inlines
code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
attributes :: Parser [Char] ParserState Attr
attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
attributes = (foldl (flip ($)) ("",[],[])) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
specialAttribute :: Parser [Char] ParserState (Attr -> Attr)
specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@ -664,11 +670,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle ("text-align:" ++ alignStr)
attribute :: Parser [Char] ParserState (Attr -> Attr)
attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
classIdAttr :: Parser [Char] ParserState (Attr -> Attr)
classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- words `fmap` manyTill anyChar' (char ')')
@ -679,7 +685,7 @@ classIdAttr = try $ do -- (class class #id)
classes' -> return $ \(_,_,keyvals) ->
("",classes',keyvals)
styleAttr :: Parser [Char] ParserState (Attr -> Attr)
styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle style
@ -690,21 +696,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
langAttr :: Parser [Char] ParserState (Attr -> Attr)
langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: Parser [Char] st t -- ^ surrounding parser
-> Parser [Char] st a -- ^ content parser (to be used repeatedly)
-> Parser [Char] st [a]
surrounded :: PandocMonad m
=> ParserT [Char] st m t -- ^ surrounding parser
-> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
-> ParserT [Char] st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
-> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly)
simpleInline :: PandocMonad m
=> ParserT [Char] ParserState m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
-> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@ -718,7 +726,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
groupedInlineMarkup :: Parser [Char] ParserState Inlines
groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace

View file

@ -46,13 +46,12 @@ import Data.Maybe (fromMaybe)
import Control.Monad (void, guard, when)
import Data.Default
import Control.Monad.Reader (Reader, runReader, asks)
import Text.Pandoc.Error
import Data.Time.LocalTime (getZonedTime)
import System.Directory(getModificationTime)
import Data.Time.Format (formatTime)
import Text.Pandoc.Compat.Time (defaultTimeLocale)
import System.IO.Error (catchIOError)
import Control.Monad.Except (throwError, catchError)
import Text.Pandoc.Class (PandocMonad, PandocExecutionError(..))
import qualified Text.Pandoc.Class as P
type T2T = ParserT String ParserState (Reader T2TMeta)
@ -69,26 +68,42 @@ instance Default T2TMeta where
def = T2TMeta "" "" "" ""
-- | Get the meta information required by Txt2Tags macros
getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta
getT2TMeta inps out = do
curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime
getT2TMeta :: PandocMonad m => m T2TMeta
getT2TMeta = do
mbInps <- P.getInputFiles
let inps = case mbInps of
Just x -> x
Nothing -> []
mbOutp <- P.getOutputFile
let outp = case mbOutp of
Just x -> x
Nothing -> ""
curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
let getModTime = fmap (formatTime defaultTimeLocale "%T") .
getModificationTime
P.getModificationTime
curMtime <- case inps of
[] -> formatTime defaultTimeLocale "%T" <$> getZonedTime
_ -> catchIOError
[] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
_ -> catchError
(maximum <$> mapM getModTime inps)
(const (return ""))
return $ T2TMeta curDate curMtime (intercalate ", " inps) out
return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc
readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
readTxt2Tags :: PandocMonad m
=> ReaderOptions
-> String
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
case parsed of
Right result -> return $ result
Left _ -> throwError $ PandocParseError "error parsing t2t"
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc
readTxt2TagsNoMacros = readTxt2Tags def
readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
readTxt2TagsNoMacros = readTxt2Tags
parseT2T :: T2T Pandoc
parseT2T = do