Org reader: extract parsing function to module

The Org-mode reader uses many functions defined in the `Text.Pandoc.Parsing` utility module. Some of the functions are overwritten with versions adapted to Org-mode idiosyncrasies. These special functions, as well as the normal Pandoc versions, are combined in a single module to increase the ease of use. This leads to decoupling of Org-mode and Pandoc and hence to slightly cleaner code. The downside is code-bloat due to repeated import/export statements.
2016-05-22 16:33:31 +02:00 · 2016-05-22 16:33:31 +02:00 · a340c7249f
commit a340c7249f
parent cc937eea2f
4 changed files with 198 additions and 78 deletions
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -394,6 +394,7 @@ Library
                   Text.Pandoc.Readers.Odt.Arrows.State,
                   Text.Pandoc.Readers.Odt.Arrows.Utils,
                   Text.Pandoc.Readers.Org.ParserState,
+                   Text.Pandoc.Readers.Org.Parsing,
                   Text.Pandoc.Writers.Shared,
                   Text.Pandoc.Asciify,
                   Text.Pandoc.MIME,
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where

+import           Text.Pandoc.Readers.Org.ParserState
+import           Text.Pandoc.Readers.Org.Parsing
+
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines, Blocks )
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Compat.Monoid ((<>))
 import           Text.Pandoc.Error
 import           Text.Pandoc.Options
-import qualified Text.Pandoc.Parsing as P
-import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
-                                            , anyLine, blanklines, newline
-                                            , orderedListMarker
-                                            , parseFromString
-                                            )
 import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
-import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Shared (compactify', compactify'DL)
 import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap

 import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, mplus, mzero, when)
-import           Control.Monad.Reader ( Reader, runReader )
+import           Control.Monad.Reader ( runReader )
 import           Data.Char (isAlphaNum, isSpace, toLower, toUpper)
 import           Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
 import qualified Data.Map as M
@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
        -> Either PandocError Pandoc
 readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")

-- | The parser used to read org files.
-type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-
 --
 -- Functions acting on the parser state
 --
@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
 recordAnchorId i = updateState $ \s ->
  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }

-updateLastForbiddenCharPos :: OrgParser ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
-updateLastPreCharPos :: OrgParser ()
-updateLastPreCharPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
 pushToInlineCharStack :: Char -> OrgParser ()
 pushToInlineCharStack c = updateState $ \s ->
  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk =
    _                  -> False


--
-- Adaptions and specializations of parsing utilities
--
-
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
-parseFromString parser str' = do
-  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
-  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
-  result <- P.parseFromString parser str'
-  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
-  return result
-
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: OrgParser Char
-newline =
-  P.newline
-       <* updateLastPreCharPos
-       <* updateLastForbiddenCharPos
-
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: OrgParser [Char]
-blanklines =
-  P.blanklines
-       <* updateLastPreCharPos
-       <* updateLastForbiddenCharPos
-
-anyLine :: OrgParser String
-anyLine =
-  P.anyLine
-    <* updateLastPreCharPos
-    <* updateLastForbiddenCharPos
-
-- | Succeeds when we're in list context.
-inList :: OrgParser ()
-inList = do
-  ctx <- orgStateParserContext <$> getState
-  guard (ctx == ListItemState)
-
-- | Parse in different context
-withContext :: ParserContext -- ^ New parser context
-            -> OrgParser a   -- ^ Parser to run in that context
-            -> OrgParser a
-withContext context parser = do
-  oldContext <- orgStateParserContext <$> getState
-  updateState $ \s -> s{ orgStateParserContext = context }
-  result <- parser
-  updateState $ \s -> s{ orgStateParserContext = oldContext }
-  return result
-
 --
 -- parsing blocks
 --
@ -398,7 +332,7 @@ keyValues = try $
   endOfValue :: OrgParser ()
   endOfValue =
     lookAhead $ (() <$ try (many1 spaceChar <* key))
-              <|> () <$ P.newline
+              <|> () <$ newline


 --
@ -675,7 +609,7 @@ propertiesDrawer = try $ do
   key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')

   value :: OrgParser String
-   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
+   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)

 keyValuesToAttr :: [(String, String)] -> Attr
 keyValuesToAttr kvs =
@ -696,7 +630,7 @@ keyValuesToAttr kvs =
 figure :: OrgParser (F Blocks)
 figure = try $ do
  figAttrs <- blockAttributes
-  src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
+  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
  guard . not . isNothing . blockAttrCaption $ figAttrs
  guard (isImageFilename src)
  let figName    = fromMaybe mempty $ blockAttrName figAttrs
@ -785,7 +719,7 @@ parseFormat = try $ do
 header :: OrgParser (F Blocks)
 header = try $ do
  level    <- headerStart
-  title    <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
+  title    <- manyTill inline (lookAhead $ optional headerTags <* newline)
  tags     <- option [] headerTags
  newline
  propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
  return $ (,) <$> term' <*> fmap (:[]) contents'
 where
   definitionMarker =
-     spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
+     spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)


 -- parse raw text for one list item, excluding start marker and continuations
@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
                                  >>= uncurry nMoreLines
   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
-   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
   finalLine = try $ manyTill p end
   minus1 k = k - 1
   oneOrMore cs = guard (not $ null cs) *> return cs
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Define the Org-mode parser state.
 -}
 module Text.Pandoc.Readers.Org.ParserState
-  ( OrgParserState(..)
-  , OrgParserLocal(..)
+  ( OrgParserState (..)
+  , OrgParserLocal (..)
  , OrgNoteRecord
+  , HasReaderOptions (..)
+  , HasQuoteContext (..)
  , F(..)
  , askF
  , asksF
@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
 modifyExportSettings setter val state =
  state { orgStateExportSettings = setter val . orgStateExportSettings $ state }

+
 --
 -- Parser state reader
 --
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@ -0,0 +1,182 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org.Options
+   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Org-mode parsing utilities.
+
+Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
+functions are adapted to Org-mode specific functionality.
+-}
+module Text.Pandoc.Readers.Org.Parsing
+  ( OrgParser
+  , anyLine
+  , blanklines
+  , newline
+  , parseFromString
+  , inList
+  , withContext
+  , updateLastForbiddenCharPos
+  , updateLastPreCharPos
+  -- * Re-exports from Text.Pandoc.Parser
+  , ParserContext (..)
+  , many1Till
+  , notFollowedBy'
+  , spaceChar
+  , nonspaceChar
+  , skipSpaces
+  , blankline
+  , enclosed
+  , stringAnyCase
+  , charsInBalanced
+  , uri
+  , withRaw
+  , readWithM
+  , guardEnabled
+  , updateLastStrPos
+  , notAfterString
+  , ParserState (..)
+  , registerHeader
+  , QuoteContext (..)
+  , singleQuoteStart
+  , singleQuoteEnd
+  , doubleQuoteStart
+  , doubleQuoteEnd
+  , dash
+  , ellipses
+  , citeKey
+  -- * Re-exports from Text.Pandoc.Parsec
+  , runParser
+  , getInput
+  , char
+  , letter
+  , digit
+  , alphaNum
+  , skipMany1
+  , spaces
+  , anyChar
+  , string
+  , count
+  , eof
+  , noneOf
+  , oneOf
+  , lookAhead
+  , notFollowedBy
+  , many
+  , many1
+  , manyTill
+  , (<|>)
+  , (<?>)
+  , choice
+  , try
+  , sepBy
+  , sepBy1
+  , option
+  , optional
+  , optionMaybe
+  , getState
+  , updateState
+  , SourcePos
+  , getPosition
+  ) where
+
+import           Text.Pandoc.Readers.Org.ParserState
+
+import qualified Text.Pandoc.Parsing as P
+import           Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
+                                            , parseFromString )
+
+import           Control.Monad ( guard )
+import           Control.Monad.Reader ( Reader )
+
+-- | The parser used to read org files.
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+-- | Parse any line of text
+anyLine :: OrgParser String
+anyLine =
+  P.anyLine
+    <* updateLastPreCharPos
+    <* updateLastForbiddenCharPos
+
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
+newline :: OrgParser Char
+newline =
+  P.newline
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
+
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+  P.blanklines
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
+
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+  ctx <- orgStateParserContext <$> getState
+  guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+            -> OrgParser a   -- ^ Parser to run in that context
+            -> OrgParser a
+withContext context parser = do
+  oldContext <- orgStateParserContext <$> getState
+  updateState $ \s -> s{ orgStateParserContext = context }
+  result <- parser
+  updateState $ \s -> s{ orgStateParserContext = oldContext }
+  return result
+
+--
+-- Parser state update functions
+--
+
+-- | Set the current position as the last position at which a forbidden char
+-- was found (i.e. a character which is not allowed at the inner border of
+-- markup).
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
+
+-- | Set the current parser position as the position at which a character was
+-- seen which allows inline markup to follow.
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}