Org reader: extract parsing function to module

The Org-mode reader uses many functions defined in the
`Text.Pandoc.Parsing` utility module.  Some of the functions are
overwritten with versions adapted to Org-mode idiosyncrasies.  These
special functions, as well as the normal Pandoc versions, are combined
in a single module to increase the ease of use.

This leads to decoupling of Org-mode and Pandoc and hence to slightly
cleaner code.  The downside is code-bloat due to repeated import/export
statements.
This commit is contained in:
Albert Krewinkel 2016-05-22 16:33:31 +02:00
parent cc937eea2f
commit a340c7249f
4 changed files with 198 additions and 78 deletions

View file

@ -394,6 +394,7 @@ Library
Text.Pandoc.Readers.Odt.Arrows.State,
Text.Pandoc.Readers.Odt.Arrows.Utils,
Text.Pandoc.Readers.Org.ParserState,
Text.Pandoc.Readers.Org.Parsing,
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,
Text.Pandoc.MIME,

View file

@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
, anyLine, blanklines, newline
, orderedListMarker
, parseFromString
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Arrow (first)
import Control.Monad (foldM, guard, mplus, mzero, when)
import Control.Monad.Reader ( Reader, runReader )
import Control.Monad.Reader ( runReader )
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
import qualified Data.Map as M
@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
-- | The parser used to read org files.
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
--
-- Functions acting on the parser state
--
@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
pushToInlineCharStack :: Char -> OrgParser ()
pushToInlineCharStack c = updateState $ \s ->
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk =
_ -> False
--
-- Adaptions and specializations of parsing utilities
--
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString parser str' = do
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
return result
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
P.newline
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
blanklines :: OrgParser [Char]
blanklines =
P.blanklines
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
anyLine :: OrgParser String
anyLine =
P.anyLine
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Succeeds when we're in list context.
inList :: OrgParser ()
inList = do
ctx <- orgStateParserContext <$> getState
guard (ctx == ListItemState)
-- | Parse in different context
withContext :: ParserContext -- ^ New parser context
-> OrgParser a -- ^ Parser to run in that context
-> OrgParser a
withContext context parser = do
oldContext <- orgStateParserContext <$> getState
updateState $ \s -> s{ orgStateParserContext = context }
result <- parser
updateState $ \s -> s{ orgStateParserContext = oldContext }
return result
--
-- parsing blocks
--
@ -398,7 +332,7 @@ keyValues = try $
endOfValue :: OrgParser ()
endOfValue =
lookAhead $ (() <$ try (many1 spaceChar <* key))
<|> () <$ P.newline
<|> () <$ newline
--
@ -675,7 +609,7 @@ propertiesDrawer = try $ do
key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
value :: OrgParser String
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
keyValuesToAttr :: [(String, String)] -> Attr
keyValuesToAttr kvs =
@ -696,7 +630,7 @@ keyValuesToAttr kvs =
figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
guard . not . isNothing . blockAttrCaption $ figAttrs
guard (isImageFilename src)
let figName = fromMaybe mempty $ blockAttrName figAttrs
@ -785,7 +719,7 @@ parseFormat = try $ do
header :: OrgParser (F Blocks)
header = try $ do
level <- headerStart
title <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
title <- manyTill inline (lookAhead $ optional headerTags <* newline)
tags <- option [] headerTags
newline
propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-- parse raw text for one list item, excluding start marker and continuations
@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
nMoreLines k cs = try $ (final k cs <|> rest k cs)
>>= uncurry nMoreLines
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
oneOrMore cs = guard (not $ null cs) *> return cs

View file

@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Define the Org-mode parser state.
-}
module Text.Pandoc.Readers.Org.ParserState
( OrgParserState(..)
, OrgParserLocal(..)
( OrgParserState (..)
, OrgParserLocal (..)
, OrgNoteRecord
, HasReaderOptions (..)
, HasQuoteContext (..)
, F(..)
, askF
, asksF
@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
modifyExportSettings setter val state =
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
--
-- Parser state reader
--

View file

@ -0,0 +1,182 @@
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.Org.Options
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Org-mode parsing utilities.
Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
functions are adapted to Org-mode specific functionality.
-}
module Text.Pandoc.Readers.Org.Parsing
( OrgParser
, anyLine
, blanklines
, newline
, parseFromString
, inList
, withContext
, updateLastForbiddenCharPos
, updateLastPreCharPos
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
, many1Till
, notFollowedBy'
, spaceChar
, nonspaceChar
, skipSpaces
, blankline
, enclosed
, stringAnyCase
, charsInBalanced
, uri
, withRaw
, readWithM
, guardEnabled
, updateLastStrPos
, notAfterString
, ParserState (..)
, registerHeader
, QuoteContext (..)
, singleQuoteStart
, singleQuoteEnd
, doubleQuoteStart
, doubleQuoteEnd
, dash
, ellipses
, citeKey
-- * Re-exports from Text.Pandoc.Parsec
, runParser
, getInput
, char
, letter
, digit
, alphaNum
, skipMany1
, spaces
, anyChar
, string
, count
, eof
, noneOf
, oneOf
, lookAhead
, notFollowedBy
, many
, many1
, manyTill
, (<|>)
, (<?>)
, choice
, try
, sepBy
, sepBy1
, option
, optional
, optionMaybe
, getState
, updateState
, SourcePos
, getPosition
) where
import Text.Pandoc.Readers.Org.ParserState
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
, parseFromString )
import Control.Monad ( guard )
import Control.Monad.Reader ( Reader )
-- | The parser used to read org files.
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
anyLine :: OrgParser String
anyLine =
P.anyLine
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString parser str' = do
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
return result
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
P.newline
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
blanklines :: OrgParser [Char]
blanklines =
P.blanklines
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Succeeds when we're in list context.
inList :: OrgParser ()
inList = do
ctx <- orgStateParserContext <$> getState
guard (ctx == ListItemState)
-- | Parse in different context
withContext :: ParserContext -- ^ New parser context
-> OrgParser a -- ^ Parser to run in that context
-> OrgParser a
withContext context parser = do
oldContext <- orgStateParserContext <$> getState
updateState $ \s -> s{ orgStateParserContext = context }
result <- parser
updateState $ \s -> s{ orgStateParserContext = oldContext }
return result
--
-- Parser state update functions
--
-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-- | Set the current parser position as the position at which a character was
-- seen which allows inline markup to follow.
updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}