Org reader: move parser state into separate module
The org reader code has become large and confusing. Extracting smaller parts into submodules should help to clean things up.
This commit is contained in:
parent
fd9ec835ec
commit
7a0729ea09
3 changed files with 234 additions and 159 deletions
|
@ -393,6 +393,7 @@ Library
|
|||
Text.Pandoc.Readers.Odt.Generic.XMLConverter,
|
||||
Text.Pandoc.Readers.Odt.Arrows.State,
|
||||
Text.Pandoc.Readers.Odt.Arrows.Utils,
|
||||
Text.Pandoc.Readers.Org.ParserState,
|
||||
Text.Pandoc.Writers.Shared,
|
||||
Text.Pandoc.Asciify,
|
||||
Text.Pandoc.MIME,
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
|
||||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
|
@ -31,8 +30,7 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
|
|||
module Text.Pandoc.Readers.Org ( readOrg ) where
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
|
||||
trimInlines )
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Compat.Monoid ((<>))
|
||||
import Text.Pandoc.Error
|
||||
|
@ -43,153 +41,33 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
|
|||
, parseFromString, blanklines
|
||||
)
|
||||
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Shared (compactify', compactify'DL)
|
||||
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
|
||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||
|
||||
import Control.Arrow (first)
|
||||
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
|
||||
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
|
||||
import Control.Monad (foldM, guard, mplus, mzero, when)
|
||||
import Control.Monad.Reader ( Reader, runReader )
|
||||
import Data.Char (isAlphaNum, isSpace, toLower)
|
||||
import Data.Default
|
||||
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as Set
|
||||
import Data.Maybe (fromMaybe, isJust)
|
||||
import Network.HTTP (urlEncode)
|
||||
|
||||
|
||||
-- | Parse org-mode string and return a Pandoc document.
|
||||
readOrg :: ReaderOptions -- ^ Reader options
|
||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> Either PandocError Pandoc
|
||||
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||
|
||||
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
|
||||
|
||||
-- | The parser used to read org files.
|
||||
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||
|
||||
instance HasIdentifierList OrgParserState where
|
||||
extractIdentifierList = orgStateIdentifiers
|
||||
updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
|
||||
|
||||
instance HasHeaderMap OrgParserState where
|
||||
extractHeaderMap = orgStateHeaderMap
|
||||
updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
|
||||
|
||||
parseOrg :: OrgParser Pandoc
|
||||
parseOrg = do
|
||||
blocks' <- parseBlocks
|
||||
st <- getState
|
||||
let meta = runF (orgStateMeta' st) st
|
||||
let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
|
||||
return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
|
||||
|
||||
-- | Drop COMMENT headers and the document tree below those headers.
|
||||
dropCommentTrees :: [Block] -> [Block]
|
||||
dropCommentTrees [] = []
|
||||
dropCommentTrees (b:bs) =
|
||||
maybe (b:dropCommentTrees bs)
|
||||
(dropCommentTrees . flip dropUntilHeaderAboveLevel bs)
|
||||
(commentHeaderLevel b)
|
||||
|
||||
-- | Return the level of a header starting a comment or :noexport: tree and
|
||||
-- Nothing otherwise.
|
||||
commentHeaderLevel :: Block -> Maybe Int
|
||||
commentHeaderLevel blk =
|
||||
case blk of
|
||||
(Header level _ ((Str "COMMENT"):_)) -> Just level
|
||||
(Header level _ title) | hasNoExportTag title -> Just level
|
||||
_ -> Nothing
|
||||
where
|
||||
hasNoExportTag :: [Inline] -> Bool
|
||||
hasNoExportTag = any isNoExportTag
|
||||
|
||||
isNoExportTag :: Inline -> Bool
|
||||
isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True
|
||||
isNoExportTag _ = False
|
||||
|
||||
-- | Drop blocks until a header on or above the given level is seen
|
||||
dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
|
||||
dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
|
||||
|
||||
isHeaderLevelLowerEq :: Int -> Block -> Bool
|
||||
isHeaderLevelLowerEq n blk =
|
||||
case blk of
|
||||
(Header level _ _) -> n >= level
|
||||
_ -> False
|
||||
|
||||
--
|
||||
-- Parser State for Org
|
||||
-- Functions acting on the parser state
|
||||
--
|
||||
|
||||
type OrgNoteRecord = (String, F Blocks)
|
||||
type OrgNoteTable = [OrgNoteRecord]
|
||||
|
||||
type OrgBlockAttributes = M.Map String String
|
||||
|
||||
type OrgLinkFormatters = M.Map String (String -> String)
|
||||
|
||||
-- | Org-mode parser state
|
||||
data OrgParserState = OrgParserState
|
||||
{ orgStateOptions :: ReaderOptions
|
||||
, orgStateAnchorIds :: [String]
|
||||
, orgStateBlockAttributes :: OrgBlockAttributes
|
||||
, orgStateEmphasisCharStack :: [Char]
|
||||
, orgStateEmphasisNewlines :: Maybe Int
|
||||
, orgStateLastForbiddenCharPos :: Maybe SourcePos
|
||||
, orgStateLastPreCharPos :: Maybe SourcePos
|
||||
, orgStateLastStrPos :: Maybe SourcePos
|
||||
, orgStateLinkFormatters :: OrgLinkFormatters
|
||||
, orgStateMeta :: Meta
|
||||
, orgStateMeta' :: F Meta
|
||||
, orgStateNotes' :: OrgNoteTable
|
||||
, orgStateParserContext :: ParserContext
|
||||
, orgStateIdentifiers :: Set.Set String
|
||||
, orgStateHeaderMap :: M.Map Inlines String
|
||||
}
|
||||
|
||||
instance Default OrgParserLocal where
|
||||
def = OrgParserLocal NoQuote
|
||||
|
||||
instance HasReaderOptions OrgParserState where
|
||||
extractReaderOptions = orgStateOptions
|
||||
|
||||
instance HasMeta OrgParserState where
|
||||
setMeta field val st =
|
||||
st{ orgStateMeta = setMeta field val $ orgStateMeta st }
|
||||
deleteMeta field st =
|
||||
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
||||
|
||||
instance HasLastStrPosition OrgParserState where
|
||||
getLastStrPos = orgStateLastStrPos
|
||||
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||
|
||||
instance HasQuoteContext st (Reader OrgParserLocal) where
|
||||
getQuoteContext = asks orgLocalQuoteContext
|
||||
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
|
||||
|
||||
instance Default OrgParserState where
|
||||
def = defaultOrgParserState
|
||||
|
||||
defaultOrgParserState :: OrgParserState
|
||||
defaultOrgParserState = OrgParserState
|
||||
{ orgStateOptions = def
|
||||
, orgStateAnchorIds = []
|
||||
, orgStateBlockAttributes = M.empty
|
||||
, orgStateEmphasisCharStack = []
|
||||
, orgStateEmphasisNewlines = Nothing
|
||||
, orgStateLastForbiddenCharPos = Nothing
|
||||
, orgStateLastPreCharPos = Nothing
|
||||
, orgStateLastStrPos = Nothing
|
||||
, orgStateLinkFormatters = M.empty
|
||||
, orgStateMeta = nullMeta
|
||||
, orgStateMeta' = return nullMeta
|
||||
, orgStateNotes' = []
|
||||
, orgStateParserContext = NullState
|
||||
, orgStateIdentifiers = Set.empty
|
||||
, orgStateHeaderMap = M.empty
|
||||
}
|
||||
|
||||
recordAnchorId :: String -> OrgParser ()
|
||||
recordAnchorId i = updateState $ \s ->
|
||||
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
||||
|
@ -243,6 +121,56 @@ addToNotesTable note = do
|
|||
oldnotes <- orgStateNotes' <$> getState
|
||||
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
|
||||
|
||||
--
|
||||
-- Parser
|
||||
--
|
||||
parseOrg :: OrgParser Pandoc
|
||||
parseOrg = do
|
||||
blocks' <- parseBlocks
|
||||
st <- getState
|
||||
let meta = runF (orgStateMeta' st) st
|
||||
let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
|
||||
return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
|
||||
|
||||
-- | Drop COMMENT headers and the document tree below those headers.
|
||||
dropCommentTrees :: [Block] -> [Block]
|
||||
dropCommentTrees [] = []
|
||||
dropCommentTrees (b:bs) =
|
||||
maybe (b:dropCommentTrees bs)
|
||||
(dropCommentTrees . flip dropUntilHeaderAboveLevel bs)
|
||||
(commentHeaderLevel b)
|
||||
|
||||
-- | Return the level of a header starting a comment or :noexport: tree and
|
||||
-- Nothing otherwise.
|
||||
commentHeaderLevel :: Block -> Maybe Int
|
||||
commentHeaderLevel blk =
|
||||
case blk of
|
||||
(Header level _ ((Str "COMMENT"):_)) -> Just level
|
||||
(Header level _ title) | hasNoExportTag title -> Just level
|
||||
_ -> Nothing
|
||||
where
|
||||
hasNoExportTag :: [Inline] -> Bool
|
||||
hasNoExportTag = any isNoExportTag
|
||||
|
||||
isNoExportTag :: Inline -> Bool
|
||||
isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True
|
||||
isNoExportTag _ = False
|
||||
|
||||
-- | Drop blocks until a header on or above the given level is seen
|
||||
dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
|
||||
dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
|
||||
|
||||
isHeaderLevelLowerEq :: Int -> Block -> Bool
|
||||
isHeaderLevelLowerEq n blk =
|
||||
case blk of
|
||||
(Header level _ _) -> n >= level
|
||||
_ -> False
|
||||
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
--
|
||||
|
||||
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
||||
-- of the state saved and restored.
|
||||
parseFromString :: OrgParser a -> String -> OrgParser a
|
||||
|
@ -253,35 +181,6 @@ parseFromString parser str' = do
|
|||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||
return result
|
||||
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
--
|
||||
|
||||
newtype F a = F { unF :: Reader OrgParserState a
|
||||
} deriving (Monad, Applicative, Functor)
|
||||
|
||||
runF :: F a -> OrgParserState -> a
|
||||
runF = runReader . unF
|
||||
|
||||
askF :: F OrgParserState
|
||||
askF = F ask
|
||||
|
||||
asksF :: (OrgParserState -> a) -> F a
|
||||
asksF f = F $ asks f
|
||||
|
||||
instance Monoid a => Monoid (F a) where
|
||||
mempty = return mempty
|
||||
mappend = liftM2 mappend
|
||||
mconcat = fmap mconcat . sequence
|
||||
|
||||
trimInlinesF :: F Inlines -> F Inlines
|
||||
trimInlinesF = liftM trimInlines
|
||||
|
||||
returnF :: a -> OrgParser (F a)
|
||||
returnF = return . return
|
||||
|
||||
|
||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||
newline :: OrgParser Char
|
||||
newline =
|
||||
|
|
175
src/Text/Pandoc/Readers/Org/ParserState.hs
Normal file
175
src/Text/Pandoc/Readers/Org/ParserState.hs
Normal file
|
@ -0,0 +1,175 @@
|
|||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Org.Options
|
||||
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
Define the Org-mode parser state.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.ParserState
|
||||
( OrgParserState(..)
|
||||
, OrgParserLocal(..)
|
||||
, OrgNoteRecord
|
||||
, F(..)
|
||||
, askF
|
||||
, asksF
|
||||
, trimInlinesF
|
||||
, runF
|
||||
, returnF
|
||||
) where
|
||||
|
||||
import Control.Monad (liftM, liftM2)
|
||||
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
|
||||
|
||||
import Data.Default (Default(..))
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
|
||||
trimInlines )
|
||||
import Text.Pandoc.Definition ( Meta(..), nullMeta )
|
||||
import Text.Pandoc.Options ( ReaderOptions(..) )
|
||||
import Text.Pandoc.Parsing ( HasHeaderMap(..)
|
||||
, HasIdentifierList(..)
|
||||
, HasLastStrPosition(..)
|
||||
, HasQuoteContext(..)
|
||||
, HasReaderOptions(..)
|
||||
, ParserContext(..)
|
||||
, QuoteContext(..)
|
||||
, SourcePos )
|
||||
|
||||
-- | An inline note / footnote containing the note key and its (inline) value.
|
||||
type OrgNoteRecord = (String, F Blocks)
|
||||
-- | Table of footnotes
|
||||
type OrgNoteTable = [OrgNoteRecord]
|
||||
-- | Map of org block attributes (e.g. LABEL, CAPTION, NAME, etc)
|
||||
type OrgBlockAttributes = M.Map String String
|
||||
-- | Map of functions for link transformations. The map key is refers to the
|
||||
-- link-type, the corresponding function transforms the given link string.
|
||||
type OrgLinkFormatters = M.Map String (String -> String)
|
||||
|
||||
-- | Org-mode parser state
|
||||
data OrgParserState = OrgParserState
|
||||
{ orgStateOptions :: ReaderOptions
|
||||
, orgStateAnchorIds :: [String]
|
||||
, orgStateBlockAttributes :: OrgBlockAttributes
|
||||
, orgStateEmphasisCharStack :: [Char]
|
||||
, orgStateEmphasisNewlines :: Maybe Int
|
||||
, orgStateLastForbiddenCharPos :: Maybe SourcePos
|
||||
, orgStateLastPreCharPos :: Maybe SourcePos
|
||||
, orgStateLastStrPos :: Maybe SourcePos
|
||||
, orgStateLinkFormatters :: OrgLinkFormatters
|
||||
, orgStateMeta :: Meta
|
||||
, orgStateMeta' :: F Meta
|
||||
, orgStateNotes' :: OrgNoteTable
|
||||
, orgStateParserContext :: ParserContext
|
||||
, orgStateIdentifiers :: Set.Set String
|
||||
, orgStateHeaderMap :: M.Map Inlines String
|
||||
}
|
||||
|
||||
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
|
||||
|
||||
instance Default OrgParserLocal where
|
||||
def = OrgParserLocal NoQuote
|
||||
|
||||
instance HasReaderOptions OrgParserState where
|
||||
extractReaderOptions = orgStateOptions
|
||||
|
||||
instance HasMeta OrgParserState where
|
||||
setMeta field val st =
|
||||
st{ orgStateMeta = setMeta field val $ orgStateMeta st }
|
||||
deleteMeta field st =
|
||||
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
||||
|
||||
instance HasLastStrPosition OrgParserState where
|
||||
getLastStrPos = orgStateLastStrPos
|
||||
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||
|
||||
instance HasQuoteContext st (Reader OrgParserLocal) where
|
||||
getQuoteContext = asks orgLocalQuoteContext
|
||||
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
|
||||
|
||||
instance HasIdentifierList OrgParserState where
|
||||
extractIdentifierList = orgStateIdentifiers
|
||||
updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
|
||||
|
||||
instance HasHeaderMap OrgParserState where
|
||||
extractHeaderMap = orgStateHeaderMap
|
||||
updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
|
||||
|
||||
|
||||
instance Default OrgParserState where
|
||||
def = defaultOrgParserState
|
||||
|
||||
defaultOrgParserState :: OrgParserState
|
||||
defaultOrgParserState = OrgParserState
|
||||
{ orgStateOptions = def
|
||||
, orgStateAnchorIds = []
|
||||
, orgStateBlockAttributes = M.empty
|
||||
, orgStateEmphasisCharStack = []
|
||||
, orgStateEmphasisNewlines = Nothing
|
||||
, orgStateLastForbiddenCharPos = Nothing
|
||||
, orgStateLastPreCharPos = Nothing
|
||||
, orgStateLastStrPos = Nothing
|
||||
, orgStateLinkFormatters = M.empty
|
||||
, orgStateMeta = nullMeta
|
||||
, orgStateMeta' = return nullMeta
|
||||
, orgStateNotes' = []
|
||||
, orgStateParserContext = NullState
|
||||
, orgStateIdentifiers = Set.empty
|
||||
, orgStateHeaderMap = M.empty
|
||||
}
|
||||
|
||||
|
||||
--
|
||||
-- Parser state reader
|
||||
--
|
||||
|
||||
-- | Reader monad wrapping the parser state. This is used to delay evaluation
|
||||
-- until all relevant information has been parsed and made available in the
|
||||
-- parser state. See also the newtype of the same name in
|
||||
-- Text.Pandoc.Parsing.
|
||||
newtype F a = F { unF :: Reader OrgParserState a
|
||||
} deriving (Functor, Applicative, Monad)
|
||||
|
||||
instance Monoid a => Monoid (F a) where
|
||||
mempty = return mempty
|
||||
mappend = liftM2 mappend
|
||||
mconcat = fmap mconcat . sequence
|
||||
|
||||
runF :: F a -> OrgParserState -> a
|
||||
runF = runReader . unF
|
||||
|
||||
askF :: F OrgParserState
|
||||
askF = F ask
|
||||
|
||||
asksF :: (OrgParserState -> a) -> F a
|
||||
asksF f = F $ asks f
|
||||
|
||||
trimInlinesF :: F Inlines -> F Inlines
|
||||
trimInlinesF = liftM trimInlines
|
||||
|
||||
returnF :: Monad m => a -> m (F a)
|
||||
returnF = return . return
|
Loading…
Add table
Reference in a new issue