Org reader: extract parsing function to module
The Org-mode reader uses many functions defined in the `Text.Pandoc.Parsing` utility module. Some of the functions are overwritten with versions adapted to Org-mode idiosyncrasies. These special functions, as well as the normal Pandoc versions, are combined in a single module to increase the ease of use. This leads to decoupling of Org-mode and Pandoc and hence to slightly cleaner code. The downside is code-bloat due to repeated import/export statements.
This commit is contained in:
parent
cc937eea2f
commit
a340c7249f
4 changed files with 198 additions and 78 deletions
|
@ -394,6 +394,7 @@ Library
|
||||||
Text.Pandoc.Readers.Odt.Arrows.State,
|
Text.Pandoc.Readers.Odt.Arrows.State,
|
||||||
Text.Pandoc.Readers.Odt.Arrows.Utils,
|
Text.Pandoc.Readers.Odt.Arrows.Utils,
|
||||||
Text.Pandoc.Readers.Org.ParserState,
|
Text.Pandoc.Readers.Org.ParserState,
|
||||||
|
Text.Pandoc.Readers.Org.Parsing,
|
||||||
Text.Pandoc.Writers.Shared,
|
Text.Pandoc.Writers.Shared,
|
||||||
Text.Pandoc.Asciify,
|
Text.Pandoc.Asciify,
|
||||||
Text.Pandoc.MIME,
|
Text.Pandoc.MIME,
|
||||||
|
|
|
@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Readers.Org ( readOrg ) where
|
module Text.Pandoc.Readers.Org ( readOrg ) where
|
||||||
|
|
||||||
|
import Text.Pandoc.Readers.Org.ParserState
|
||||||
|
import Text.Pandoc.Readers.Org.Parsing
|
||||||
|
|
||||||
import qualified Text.Pandoc.Builder as B
|
import qualified Text.Pandoc.Builder as B
|
||||||
import Text.Pandoc.Builder ( Inlines, Blocks )
|
import Text.Pandoc.Builder ( Inlines, Blocks )
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Compat.Monoid ((<>))
|
import Text.Pandoc.Compat.Monoid ((<>))
|
||||||
import Text.Pandoc.Error
|
import Text.Pandoc.Error
|
||||||
import Text.Pandoc.Options
|
import Text.Pandoc.Options
|
||||||
import qualified Text.Pandoc.Parsing as P
|
|
||||||
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
|
|
||||||
, anyLine, blanklines, newline
|
|
||||||
, orderedListMarker
|
|
||||||
, parseFromString
|
|
||||||
)
|
|
||||||
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
||||||
import Text.Pandoc.Readers.Org.ParserState
|
|
||||||
import Text.Pandoc.Shared (compactify', compactify'DL)
|
import Text.Pandoc.Shared (compactify', compactify'DL)
|
||||||
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
|
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
|
||||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||||
|
|
||||||
import Control.Arrow (first)
|
import Control.Arrow (first)
|
||||||
import Control.Monad (foldM, guard, mplus, mzero, when)
|
import Control.Monad (foldM, guard, mplus, mzero, when)
|
||||||
import Control.Monad.Reader ( Reader, runReader )
|
import Control.Monad.Reader ( runReader )
|
||||||
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
|
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
|
||||||
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
|
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
|
||||||
-> Either PandocError Pandoc
|
-> Either PandocError Pandoc
|
||||||
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||||
|
|
||||||
-- | The parser used to read org files.
|
|
||||||
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Functions acting on the parser state
|
-- Functions acting on the parser state
|
||||||
--
|
--
|
||||||
|
@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
|
||||||
recordAnchorId i = updateState $ \s ->
|
recordAnchorId i = updateState $ \s ->
|
||||||
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
||||||
|
|
||||||
updateLastForbiddenCharPos :: OrgParser ()
|
|
||||||
updateLastForbiddenCharPos = getPosition >>= \p ->
|
|
||||||
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
|
||||||
|
|
||||||
updateLastPreCharPos :: OrgParser ()
|
|
||||||
updateLastPreCharPos = getPosition >>= \p ->
|
|
||||||
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
|
||||||
|
|
||||||
pushToInlineCharStack :: Char -> OrgParser ()
|
pushToInlineCharStack :: Char -> OrgParser ()
|
||||||
pushToInlineCharStack c = updateState $ \s ->
|
pushToInlineCharStack c = updateState $ \s ->
|
||||||
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
||||||
|
@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk =
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
|
|
||||||
--
|
|
||||||
-- Adaptions and specializations of parsing utilities
|
|
||||||
--
|
|
||||||
|
|
||||||
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
|
||||||
-- of the state saved and restored.
|
|
||||||
parseFromString :: OrgParser a -> String -> OrgParser a
|
|
||||||
parseFromString parser str' = do
|
|
||||||
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
|
||||||
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
|
||||||
result <- P.parseFromString parser str'
|
|
||||||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
|
||||||
return result
|
|
||||||
|
|
||||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
|
||||||
newline :: OrgParser Char
|
|
||||||
newline =
|
|
||||||
P.newline
|
|
||||||
<* updateLastPreCharPos
|
|
||||||
<* updateLastForbiddenCharPos
|
|
||||||
|
|
||||||
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
|
|
||||||
blanklines :: OrgParser [Char]
|
|
||||||
blanklines =
|
|
||||||
P.blanklines
|
|
||||||
<* updateLastPreCharPos
|
|
||||||
<* updateLastForbiddenCharPos
|
|
||||||
|
|
||||||
anyLine :: OrgParser String
|
|
||||||
anyLine =
|
|
||||||
P.anyLine
|
|
||||||
<* updateLastPreCharPos
|
|
||||||
<* updateLastForbiddenCharPos
|
|
||||||
|
|
||||||
-- | Succeeds when we're in list context.
|
|
||||||
inList :: OrgParser ()
|
|
||||||
inList = do
|
|
||||||
ctx <- orgStateParserContext <$> getState
|
|
||||||
guard (ctx == ListItemState)
|
|
||||||
|
|
||||||
-- | Parse in different context
|
|
||||||
withContext :: ParserContext -- ^ New parser context
|
|
||||||
-> OrgParser a -- ^ Parser to run in that context
|
|
||||||
-> OrgParser a
|
|
||||||
withContext context parser = do
|
|
||||||
oldContext <- orgStateParserContext <$> getState
|
|
||||||
updateState $ \s -> s{ orgStateParserContext = context }
|
|
||||||
result <- parser
|
|
||||||
updateState $ \s -> s{ orgStateParserContext = oldContext }
|
|
||||||
return result
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- parsing blocks
|
-- parsing blocks
|
||||||
--
|
--
|
||||||
|
@ -398,7 +332,7 @@ keyValues = try $
|
||||||
endOfValue :: OrgParser ()
|
endOfValue :: OrgParser ()
|
||||||
endOfValue =
|
endOfValue =
|
||||||
lookAhead $ (() <$ try (many1 spaceChar <* key))
|
lookAhead $ (() <$ try (many1 spaceChar <* key))
|
||||||
<|> () <$ P.newline
|
<|> () <$ newline
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
|
@ -675,7 +609,7 @@ propertiesDrawer = try $ do
|
||||||
key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
|
key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
|
||||||
|
|
||||||
value :: OrgParser String
|
value :: OrgParser String
|
||||||
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
|
value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
|
||||||
|
|
||||||
keyValuesToAttr :: [(String, String)] -> Attr
|
keyValuesToAttr :: [(String, String)] -> Attr
|
||||||
keyValuesToAttr kvs =
|
keyValuesToAttr kvs =
|
||||||
|
@ -696,7 +630,7 @@ keyValuesToAttr kvs =
|
||||||
figure :: OrgParser (F Blocks)
|
figure :: OrgParser (F Blocks)
|
||||||
figure = try $ do
|
figure = try $ do
|
||||||
figAttrs <- blockAttributes
|
figAttrs <- blockAttributes
|
||||||
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
|
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
|
||||||
guard . not . isNothing . blockAttrCaption $ figAttrs
|
guard . not . isNothing . blockAttrCaption $ figAttrs
|
||||||
guard (isImageFilename src)
|
guard (isImageFilename src)
|
||||||
let figName = fromMaybe mempty $ blockAttrName figAttrs
|
let figName = fromMaybe mempty $ blockAttrName figAttrs
|
||||||
|
@ -785,7 +719,7 @@ parseFormat = try $ do
|
||||||
header :: OrgParser (F Blocks)
|
header :: OrgParser (F Blocks)
|
||||||
header = try $ do
|
header = try $ do
|
||||||
level <- headerStart
|
level <- headerStart
|
||||||
title <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
|
title <- manyTill inline (lookAhead $ optional headerTags <* newline)
|
||||||
tags <- option [] headerTags
|
tags <- option [] headerTags
|
||||||
newline
|
newline
|
||||||
propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
|
propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
|
||||||
|
@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
|
||||||
return $ (,) <$> term' <*> fmap (:[]) contents'
|
return $ (,) <$> term' <*> fmap (:[]) contents'
|
||||||
where
|
where
|
||||||
definitionMarker =
|
definitionMarker =
|
||||||
spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
|
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
|
||||||
|
|
||||||
|
|
||||||
-- parse raw text for one list item, excluding start marker and continuations
|
-- parse raw text for one list item, excluding start marker and continuations
|
||||||
|
@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
|
||||||
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||||
>>= uncurry nMoreLines
|
>>= uncurry nMoreLines
|
||||||
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
||||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
|
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
|
||||||
finalLine = try $ manyTill p end
|
finalLine = try $ manyTill p end
|
||||||
minus1 k = k - 1
|
minus1 k = k - 1
|
||||||
oneOrMore cs = guard (not $ null cs) *> return cs
|
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||||
|
|
|
@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
Define the Org-mode parser state.
|
Define the Org-mode parser state.
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Readers.Org.ParserState
|
module Text.Pandoc.Readers.Org.ParserState
|
||||||
( OrgParserState(..)
|
( OrgParserState (..)
|
||||||
, OrgParserLocal(..)
|
, OrgParserLocal (..)
|
||||||
, OrgNoteRecord
|
, OrgNoteRecord
|
||||||
|
, HasReaderOptions (..)
|
||||||
|
, HasQuoteContext (..)
|
||||||
, F(..)
|
, F(..)
|
||||||
, askF
|
, askF
|
||||||
, asksF
|
, asksF
|
||||||
|
@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
|
||||||
modifyExportSettings setter val state =
|
modifyExportSettings setter val state =
|
||||||
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
|
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Parser state reader
|
-- Parser state reader
|
||||||
--
|
--
|
||||||
|
|
182
src/Text/Pandoc/Readers/Org/Parsing.hs
Normal file
182
src/Text/Pandoc/Readers/Org/Parsing.hs
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
{-
|
||||||
|
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
-}
|
||||||
|
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Readers.Org.Options
|
||||||
|
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||||
|
|
||||||
|
Org-mode parsing utilities.
|
||||||
|
|
||||||
|
Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
|
||||||
|
functions are adapted to Org-mode specific functionality.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Readers.Org.Parsing
|
||||||
|
( OrgParser
|
||||||
|
, anyLine
|
||||||
|
, blanklines
|
||||||
|
, newline
|
||||||
|
, parseFromString
|
||||||
|
, inList
|
||||||
|
, withContext
|
||||||
|
, updateLastForbiddenCharPos
|
||||||
|
, updateLastPreCharPos
|
||||||
|
-- * Re-exports from Text.Pandoc.Parser
|
||||||
|
, ParserContext (..)
|
||||||
|
, many1Till
|
||||||
|
, notFollowedBy'
|
||||||
|
, spaceChar
|
||||||
|
, nonspaceChar
|
||||||
|
, skipSpaces
|
||||||
|
, blankline
|
||||||
|
, enclosed
|
||||||
|
, stringAnyCase
|
||||||
|
, charsInBalanced
|
||||||
|
, uri
|
||||||
|
, withRaw
|
||||||
|
, readWithM
|
||||||
|
, guardEnabled
|
||||||
|
, updateLastStrPos
|
||||||
|
, notAfterString
|
||||||
|
, ParserState (..)
|
||||||
|
, registerHeader
|
||||||
|
, QuoteContext (..)
|
||||||
|
, singleQuoteStart
|
||||||
|
, singleQuoteEnd
|
||||||
|
, doubleQuoteStart
|
||||||
|
, doubleQuoteEnd
|
||||||
|
, dash
|
||||||
|
, ellipses
|
||||||
|
, citeKey
|
||||||
|
-- * Re-exports from Text.Pandoc.Parsec
|
||||||
|
, runParser
|
||||||
|
, getInput
|
||||||
|
, char
|
||||||
|
, letter
|
||||||
|
, digit
|
||||||
|
, alphaNum
|
||||||
|
, skipMany1
|
||||||
|
, spaces
|
||||||
|
, anyChar
|
||||||
|
, string
|
||||||
|
, count
|
||||||
|
, eof
|
||||||
|
, noneOf
|
||||||
|
, oneOf
|
||||||
|
, lookAhead
|
||||||
|
, notFollowedBy
|
||||||
|
, many
|
||||||
|
, many1
|
||||||
|
, manyTill
|
||||||
|
, (<|>)
|
||||||
|
, (<?>)
|
||||||
|
, choice
|
||||||
|
, try
|
||||||
|
, sepBy
|
||||||
|
, sepBy1
|
||||||
|
, option
|
||||||
|
, optional
|
||||||
|
, optionMaybe
|
||||||
|
, getState
|
||||||
|
, updateState
|
||||||
|
, SourcePos
|
||||||
|
, getPosition
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Text.Pandoc.Readers.Org.ParserState
|
||||||
|
|
||||||
|
import qualified Text.Pandoc.Parsing as P
|
||||||
|
import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
|
||||||
|
, parseFromString )
|
||||||
|
|
||||||
|
import Control.Monad ( guard )
|
||||||
|
import Control.Monad.Reader ( Reader )
|
||||||
|
|
||||||
|
-- | The parser used to read org files.
|
||||||
|
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Adaptions and specializations of parsing utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Parse any line of text
|
||||||
|
anyLine :: OrgParser String
|
||||||
|
anyLine =
|
||||||
|
P.anyLine
|
||||||
|
<* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
|
||||||
|
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
||||||
|
-- of the state saved and restored.
|
||||||
|
parseFromString :: OrgParser a -> String -> OrgParser a
|
||||||
|
parseFromString parser str' = do
|
||||||
|
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
||||||
|
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
||||||
|
result <- P.parseFromString parser str'
|
||||||
|
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||||
|
return result
|
||||||
|
|
||||||
|
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||||
|
newline :: OrgParser Char
|
||||||
|
newline =
|
||||||
|
P.newline
|
||||||
|
<* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
|
||||||
|
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
|
||||||
|
blanklines :: OrgParser [Char]
|
||||||
|
blanklines =
|
||||||
|
P.blanklines
|
||||||
|
<* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
|
||||||
|
-- | Succeeds when we're in list context.
|
||||||
|
inList :: OrgParser ()
|
||||||
|
inList = do
|
||||||
|
ctx <- orgStateParserContext <$> getState
|
||||||
|
guard (ctx == ListItemState)
|
||||||
|
|
||||||
|
-- | Parse in different context
|
||||||
|
withContext :: ParserContext -- ^ New parser context
|
||||||
|
-> OrgParser a -- ^ Parser to run in that context
|
||||||
|
-> OrgParser a
|
||||||
|
withContext context parser = do
|
||||||
|
oldContext <- orgStateParserContext <$> getState
|
||||||
|
updateState $ \s -> s{ orgStateParserContext = context }
|
||||||
|
result <- parser
|
||||||
|
updateState $ \s -> s{ orgStateParserContext = oldContext }
|
||||||
|
return result
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Parser state update functions
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Set the current position as the last position at which a forbidden char
|
||||||
|
-- was found (i.e. a character which is not allowed at the inner border of
|
||||||
|
-- markup).
|
||||||
|
updateLastForbiddenCharPos :: OrgParser ()
|
||||||
|
updateLastForbiddenCharPos = getPosition >>= \p ->
|
||||||
|
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
||||||
|
|
||||||
|
-- | Set the current parser position as the position at which a character was
|
||||||
|
-- seen which allows inline markup to follow.
|
||||||
|
updateLastPreCharPos :: OrgParser ()
|
||||||
|
updateLastPreCharPos = getPosition >>= \p ->
|
||||||
|
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
Loading…
Add table
Reference in a new issue