Org reader: extract parsing function to module
The Org-mode reader uses many functions defined in the `Text.Pandoc.Parsing` utility module. Some of the functions are overwritten with versions adapted to Org-mode idiosyncrasies. These special functions, as well as the normal Pandoc versions, are combined in a single module to increase the ease of use. This leads to decoupling of Org-mode and Pandoc and hence to slightly cleaner code. The downside is code-bloat due to repeated import/export statements.
This commit is contained in:
parent
cc937eea2f
commit
a340c7249f
4 changed files with 198 additions and 78 deletions
|
@ -394,6 +394,7 @@ Library
|
|||
Text.Pandoc.Readers.Odt.Arrows.State,
|
||||
Text.Pandoc.Readers.Odt.Arrows.Utils,
|
||||
Text.Pandoc.Readers.Org.ParserState,
|
||||
Text.Pandoc.Readers.Org.Parsing,
|
||||
Text.Pandoc.Writers.Shared,
|
||||
Text.Pandoc.Asciify,
|
||||
Text.Pandoc.MIME,
|
||||
|
|
|
@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
|
|||
-}
|
||||
module Text.Pandoc.Readers.Org ( readOrg ) where
|
||||
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Compat.Monoid ((<>))
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Options
|
||||
import qualified Text.Pandoc.Parsing as P
|
||||
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
|
||||
, anyLine, blanklines, newline
|
||||
, orderedListMarker
|
||||
, parseFromString
|
||||
)
|
||||
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Shared (compactify', compactify'DL)
|
||||
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
|
||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||
|
||||
import Control.Arrow (first)
|
||||
import Control.Monad (foldM, guard, mplus, mzero, when)
|
||||
import Control.Monad.Reader ( Reader, runReader )
|
||||
import Control.Monad.Reader ( runReader )
|
||||
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
|
||||
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
|
||||
import qualified Data.Map as M
|
||||
|
@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
|
|||
-> Either PandocError Pandoc
|
||||
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||
|
||||
-- | The parser used to read org files.
|
||||
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||
|
||||
--
|
||||
-- Functions acting on the parser state
|
||||
--
|
||||
|
@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
|
|||
recordAnchorId i = updateState $ \s ->
|
||||
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
||||
|
||||
updateLastForbiddenCharPos :: OrgParser ()
|
||||
updateLastForbiddenCharPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
||||
|
||||
updateLastPreCharPos :: OrgParser ()
|
||||
updateLastPreCharPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
||||
|
||||
pushToInlineCharStack :: Char -> OrgParser ()
|
||||
pushToInlineCharStack c = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
||||
|
@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk =
|
|||
_ -> False
|
||||
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
--
|
||||
|
||||
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
||||
-- of the state saved and restored.
|
||||
parseFromString :: OrgParser a -> String -> OrgParser a
|
||||
parseFromString parser str' = do
|
||||
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
||||
result <- P.parseFromString parser str'
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||
return result
|
||||
|
||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||
newline :: OrgParser Char
|
||||
newline =
|
||||
P.newline
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
|
||||
blanklines :: OrgParser [Char]
|
||||
blanklines =
|
||||
P.blanklines
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
anyLine :: OrgParser String
|
||||
anyLine =
|
||||
P.anyLine
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
-- | Succeeds when we're in list context.
|
||||
inList :: OrgParser ()
|
||||
inList = do
|
||||
ctx <- orgStateParserContext <$> getState
|
||||
guard (ctx == ListItemState)
|
||||
|
||||
-- | Parse in different context
|
||||
withContext :: ParserContext -- ^ New parser context
|
||||
-> OrgParser a -- ^ Parser to run in that context
|
||||
-> OrgParser a
|
||||
withContext context parser = do
|
||||
oldContext <- orgStateParserContext <$> getState
|
||||
updateState $ \s -> s{ orgStateParserContext = context }
|
||||
result <- parser
|
||||
updateState $ \s -> s{ orgStateParserContext = oldContext }
|
||||
return result
|
||||
|
||||
--
|
||||
-- parsing blocks
|
||||
--
|
||||
|
@ -398,7 +332,7 @@ keyValues = try $
|
|||
endOfValue :: OrgParser ()
|
||||
endOfValue =
|
||||
lookAhead $ (() <$ try (many1 spaceChar <* key))
|
||||
<|> () <$ P.newline
|
||||
<|> () <$ newline
|
||||
|
||||
|
||||
--
|
||||
|
@ -675,7 +609,7 @@ propertiesDrawer = try $ do
|
|||
key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
|
||||
|
||||
value :: OrgParser String
|
||||
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
|
||||
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
|
||||
|
||||
keyValuesToAttr :: [(String, String)] -> Attr
|
||||
keyValuesToAttr kvs =
|
||||
|
@ -696,7 +630,7 @@ keyValuesToAttr kvs =
|
|||
figure :: OrgParser (F Blocks)
|
||||
figure = try $ do
|
||||
figAttrs <- blockAttributes
|
||||
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
|
||||
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
|
||||
guard . not . isNothing . blockAttrCaption $ figAttrs
|
||||
guard (isImageFilename src)
|
||||
let figName = fromMaybe mempty $ blockAttrName figAttrs
|
||||
|
@ -785,7 +719,7 @@ parseFormat = try $ do
|
|||
header :: OrgParser (F Blocks)
|
||||
header = try $ do
|
||||
level <- headerStart
|
||||
title <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
|
||||
title <- manyTill inline (lookAhead $ optional headerTags <* newline)
|
||||
tags <- option [] headerTags
|
||||
newline
|
||||
propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
|
||||
|
@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
|
|||
return $ (,) <$> term' <*> fmap (:[]) contents'
|
||||
where
|
||||
definitionMarker =
|
||||
spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
|
||||
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
|
||||
|
||||
|
||||
-- parse raw text for one list item, excluding start marker and continuations
|
||||
|
@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
|
|||
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||
>>= uncurry nMoreLines
|
||||
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
|
||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
|
||||
finalLine = try $ manyTill p end
|
||||
minus1 k = k - 1
|
||||
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||
|
|
|
@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Define the Org-mode parser state.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.ParserState
|
||||
( OrgParserState(..)
|
||||
, OrgParserLocal(..)
|
||||
( OrgParserState (..)
|
||||
, OrgParserLocal (..)
|
||||
, OrgNoteRecord
|
||||
, HasReaderOptions (..)
|
||||
, HasQuoteContext (..)
|
||||
, F(..)
|
||||
, askF
|
||||
, asksF
|
||||
|
@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
|
|||
modifyExportSettings setter val state =
|
||||
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
|
||||
|
||||
|
||||
--
|
||||
-- Parser state reader
|
||||
--
|
||||
|
|
182
src/Text/Pandoc/Readers/Org/Parsing.hs
Normal file
182
src/Text/Pandoc/Readers/Org/Parsing.hs
Normal file
|
@ -0,0 +1,182 @@
|
|||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Org.Options
|
||||
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
Org-mode parsing utilities.
|
||||
|
||||
Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
|
||||
functions are adapted to Org-mode specific functionality.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.Parsing
|
||||
( OrgParser
|
||||
, anyLine
|
||||
, blanklines
|
||||
, newline
|
||||
, parseFromString
|
||||
, inList
|
||||
, withContext
|
||||
, updateLastForbiddenCharPos
|
||||
, updateLastPreCharPos
|
||||
-- * Re-exports from Text.Pandoc.Parser
|
||||
, ParserContext (..)
|
||||
, many1Till
|
||||
, notFollowedBy'
|
||||
, spaceChar
|
||||
, nonspaceChar
|
||||
, skipSpaces
|
||||
, blankline
|
||||
, enclosed
|
||||
, stringAnyCase
|
||||
, charsInBalanced
|
||||
, uri
|
||||
, withRaw
|
||||
, readWithM
|
||||
, guardEnabled
|
||||
, updateLastStrPos
|
||||
, notAfterString
|
||||
, ParserState (..)
|
||||
, registerHeader
|
||||
, QuoteContext (..)
|
||||
, singleQuoteStart
|
||||
, singleQuoteEnd
|
||||
, doubleQuoteStart
|
||||
, doubleQuoteEnd
|
||||
, dash
|
||||
, ellipses
|
||||
, citeKey
|
||||
-- * Re-exports from Text.Pandoc.Parsec
|
||||
, runParser
|
||||
, getInput
|
||||
, char
|
||||
, letter
|
||||
, digit
|
||||
, alphaNum
|
||||
, skipMany1
|
||||
, spaces
|
||||
, anyChar
|
||||
, string
|
||||
, count
|
||||
, eof
|
||||
, noneOf
|
||||
, oneOf
|
||||
, lookAhead
|
||||
, notFollowedBy
|
||||
, many
|
||||
, many1
|
||||
, manyTill
|
||||
, (<|>)
|
||||
, (<?>)
|
||||
, choice
|
||||
, try
|
||||
, sepBy
|
||||
, sepBy1
|
||||
, option
|
||||
, optional
|
||||
, optionMaybe
|
||||
, getState
|
||||
, updateState
|
||||
, SourcePos
|
||||
, getPosition
|
||||
) where
|
||||
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
|
||||
import qualified Text.Pandoc.Parsing as P
|
||||
import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
|
||||
, parseFromString )
|
||||
|
||||
import Control.Monad ( guard )
|
||||
import Control.Monad.Reader ( Reader )
|
||||
|
||||
-- | The parser used to read org files.
|
||||
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
--
|
||||
|
||||
-- | Parse any line of text
|
||||
anyLine :: OrgParser String
|
||||
anyLine =
|
||||
P.anyLine
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
||||
-- of the state saved and restored.
|
||||
parseFromString :: OrgParser a -> String -> OrgParser a
|
||||
parseFromString parser str' = do
|
||||
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
||||
result <- P.parseFromString parser str'
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||
return result
|
||||
|
||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||
newline :: OrgParser Char
|
||||
newline =
|
||||
P.newline
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
|
||||
blanklines :: OrgParser [Char]
|
||||
blanklines =
|
||||
P.blanklines
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
|
||||
-- | Succeeds when we're in list context.
|
||||
inList :: OrgParser ()
|
||||
inList = do
|
||||
ctx <- orgStateParserContext <$> getState
|
||||
guard (ctx == ListItemState)
|
||||
|
||||
-- | Parse in different context
|
||||
withContext :: ParserContext -- ^ New parser context
|
||||
-> OrgParser a -- ^ Parser to run in that context
|
||||
-> OrgParser a
|
||||
withContext context parser = do
|
||||
oldContext <- orgStateParserContext <$> getState
|
||||
updateState $ \s -> s{ orgStateParserContext = context }
|
||||
result <- parser
|
||||
updateState $ \s -> s{ orgStateParserContext = oldContext }
|
||||
return result
|
||||
|
||||
--
|
||||
-- Parser state update functions
|
||||
--
|
||||
|
||||
-- | Set the current position as the last position at which a forbidden char
|
||||
-- was found (i.e. a character which is not allowed at the inner border of
|
||||
-- markup).
|
||||
updateLastForbiddenCharPos :: OrgParser ()
|
||||
updateLastForbiddenCharPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
||||
|
||||
-- | Set the current parser position as the position at which a character was
|
||||
-- seen which allows inline markup to follow.
|
||||
updateLastPreCharPos :: OrgParser ()
|
||||
updateLastPreCharPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
Loading…
Reference in a new issue