87096c64f8
This gives better results for tight lists. Closes #1437. An alternative solution would be to use Para everywhere, and never Plain. I am not sufficiently familiar with org to know which is best. Thoughts, @tarleb?
1405 lines
46 KiB
Haskell
1405 lines
46 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
{-
|
|
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Readers.Org
|
|
Copyright : Copyright (C) 2014 Albert Krewinkel
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
|
|
|
|
Conversion of org-mode formatted plain text to 'Pandoc' document.
|
|
-}
|
|
module Text.Pandoc.Readers.Org ( readOrg ) where
|
|
|
|
import qualified Text.Pandoc.Builder as B
|
|
import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
|
|
, trimInlines )
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Options
|
|
import qualified Text.Pandoc.Parsing as P
|
|
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
|
|
, newline, orderedListMarker
|
|
, parseFromString
|
|
)
|
|
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
|
import Text.Pandoc.Shared (compactify', compactify'DL)
|
|
import Text.TeXMath (texMathToPandoc, DisplayType(..))
|
|
|
|
import Control.Applicative ( Applicative, pure
|
|
, (<$>), (<$), (<*>), (<*), (*>) )
|
|
import Control.Arrow (first)
|
|
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
|
|
import Control.Monad.Reader (Reader, runReader, ask, asks)
|
|
import Data.Char (isAlphaNum, toLower)
|
|
import Data.Default
|
|
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
|
import qualified Data.Map as M
|
|
import Data.Maybe (fromMaybe, isJust)
|
|
import Data.Monoid (Monoid, mconcat, mempty, mappend)
|
|
import Network.HTTP (urlEncode)
|
|
|
|
-- | Parse org-mode string and return a Pandoc document.
|
|
readOrg :: ReaderOptions -- ^ Reader options
|
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
|
-> Pandoc
|
|
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
|
|
|
type OrgParser = Parser [Char] OrgParserState
|
|
|
|
parseOrg :: OrgParser Pandoc
|
|
parseOrg = do
|
|
blocks' <- parseBlocks
|
|
st <- getState
|
|
let meta = runF (orgStateMeta' st) st
|
|
return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
|
|
|
|
--
|
|
-- Parser State for Org
|
|
--
|
|
|
|
type OrgNoteRecord = (String, F Blocks)
|
|
type OrgNoteTable = [OrgNoteRecord]
|
|
|
|
type OrgBlockAttributes = M.Map String String
|
|
|
|
type OrgLinkFormatters = M.Map String (String -> String)
|
|
|
|
-- | Org-mode parser state
|
|
data OrgParserState = OrgParserState
|
|
{ orgStateOptions :: ReaderOptions
|
|
, orgStateAnchorIds :: [String]
|
|
, orgStateBlockAttributes :: OrgBlockAttributes
|
|
, orgStateEmphasisCharStack :: [Char]
|
|
, orgStateEmphasisNewlines :: Maybe Int
|
|
, orgStateLastForbiddenCharPos :: Maybe SourcePos
|
|
, orgStateLastPreCharPos :: Maybe SourcePos
|
|
, orgStateLastStrPos :: Maybe SourcePos
|
|
, orgStateLinkFormatters :: OrgLinkFormatters
|
|
, orgStateMeta :: Meta
|
|
, orgStateMeta' :: F Meta
|
|
, orgStateNotes' :: OrgNoteTable
|
|
}
|
|
|
|
instance HasReaderOptions OrgParserState where
|
|
extractReaderOptions = orgStateOptions
|
|
|
|
instance HasMeta OrgParserState where
|
|
setMeta field val st =
|
|
st{ orgStateMeta = setMeta field val $ orgStateMeta st }
|
|
deleteMeta field st =
|
|
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
|
|
|
instance HasLastStrPosition OrgParserState where
|
|
getLastStrPos = orgStateLastStrPos
|
|
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
|
|
|
instance Default OrgParserState where
|
|
def = defaultOrgParserState
|
|
|
|
defaultOrgParserState :: OrgParserState
|
|
defaultOrgParserState = OrgParserState
|
|
{ orgStateOptions = def
|
|
, orgStateAnchorIds = []
|
|
, orgStateBlockAttributes = M.empty
|
|
, orgStateEmphasisCharStack = []
|
|
, orgStateEmphasisNewlines = Nothing
|
|
, orgStateLastForbiddenCharPos = Nothing
|
|
, orgStateLastPreCharPos = Nothing
|
|
, orgStateLastStrPos = Nothing
|
|
, orgStateLinkFormatters = M.empty
|
|
, orgStateMeta = nullMeta
|
|
, orgStateMeta' = return nullMeta
|
|
, orgStateNotes' = []
|
|
}
|
|
|
|
recordAnchorId :: String -> OrgParser ()
|
|
recordAnchorId i = updateState $ \s ->
|
|
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
|
|
|
addBlockAttribute :: String -> String -> OrgParser ()
|
|
addBlockAttribute key val = updateState $ \s ->
|
|
let attrs = orgStateBlockAttributes s
|
|
in s{ orgStateBlockAttributes = M.insert key val attrs }
|
|
|
|
lookupBlockAttribute :: String -> OrgParser (Maybe String)
|
|
lookupBlockAttribute key =
|
|
M.lookup key . orgStateBlockAttributes <$> getState
|
|
|
|
resetBlockAttributes :: OrgParser ()
|
|
resetBlockAttributes = updateState $ \s ->
|
|
s{ orgStateBlockAttributes = orgStateBlockAttributes def }
|
|
|
|
updateLastForbiddenCharPos :: OrgParser ()
|
|
updateLastForbiddenCharPos = getPosition >>= \p ->
|
|
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
|
|
|
updateLastPreCharPos :: OrgParser ()
|
|
updateLastPreCharPos = getPosition >>= \p ->
|
|
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
|
|
|
pushToInlineCharStack :: Char -> OrgParser ()
|
|
pushToInlineCharStack c = updateState $ \s ->
|
|
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
|
|
|
popInlineCharStack :: OrgParser ()
|
|
popInlineCharStack = updateState $ \s ->
|
|
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
|
|
|
|
surroundingEmphasisChar :: OrgParser [Char]
|
|
surroundingEmphasisChar =
|
|
take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
|
|
|
|
startEmphasisNewlinesCounting :: Int -> OrgParser ()
|
|
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
|
|
s{ orgStateEmphasisNewlines = Just maxNewlines }
|
|
|
|
decEmphasisNewlinesCount :: OrgParser ()
|
|
decEmphasisNewlinesCount = updateState $ \s ->
|
|
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
|
|
|
|
newlinesCountWithinLimits :: OrgParser Bool
|
|
newlinesCountWithinLimits = do
|
|
st <- getState
|
|
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
|
|
|
|
resetEmphasisNewlines :: OrgParser ()
|
|
resetEmphasisNewlines = updateState $ \s ->
|
|
s{ orgStateEmphasisNewlines = Nothing }
|
|
|
|
addLinkFormat :: String
|
|
-> (String -> String)
|
|
-> OrgParser ()
|
|
addLinkFormat key formatter = updateState $ \s ->
|
|
let fs = orgStateLinkFormatters s
|
|
in s{ orgStateLinkFormatters = M.insert key formatter fs }
|
|
|
|
addToNotesTable :: OrgNoteRecord -> OrgParser ()
|
|
addToNotesTable note = do
|
|
oldnotes <- orgStateNotes' <$> getState
|
|
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
|
|
|
|
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
|
-- of the state saved and restored.
|
|
parseFromString :: OrgParser a -> String -> OrgParser a
|
|
parseFromString parser str' = do
|
|
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
|
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
|
result <- P.parseFromString parser str'
|
|
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
|
return result
|
|
|
|
|
|
--
|
|
-- Adaptions and specializations of parsing utilities
|
|
--
|
|
|
|
newtype F a = F { unF :: Reader OrgParserState a
|
|
} deriving (Monad, Applicative, Functor)
|
|
|
|
runF :: F a -> OrgParserState -> a
|
|
runF = runReader . unF
|
|
|
|
askF :: F OrgParserState
|
|
askF = F ask
|
|
|
|
asksF :: (OrgParserState -> a) -> F a
|
|
asksF f = F $ asks f
|
|
|
|
instance Monoid a => Monoid (F a) where
|
|
mempty = return mempty
|
|
mappend = liftM2 mappend
|
|
mconcat = fmap mconcat . sequence
|
|
|
|
trimInlinesF :: F Inlines -> F Inlines
|
|
trimInlinesF = liftM trimInlines
|
|
|
|
returnF :: a -> OrgParser (F a)
|
|
returnF = return . return
|
|
|
|
|
|
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
|
newline :: OrgParser Char
|
|
newline =
|
|
P.newline
|
|
<* updateLastPreCharPos
|
|
<* updateLastForbiddenCharPos
|
|
|
|
--
|
|
-- parsing blocks
|
|
--
|
|
|
|
parseBlocks :: OrgParser (F Blocks)
|
|
parseBlocks = mconcat <$> manyTill block eof
|
|
|
|
block :: OrgParser (F Blocks)
|
|
block = choice [ mempty <$ blanklines
|
|
, optionalAttributes $ choice
|
|
[ orgBlock
|
|
, figure
|
|
, table
|
|
]
|
|
, example
|
|
, drawer
|
|
, specialLine
|
|
, header
|
|
, return <$> hline
|
|
, list
|
|
, latexFragment
|
|
, noteBlock
|
|
, paraOrPlain
|
|
] <?> "block"
|
|
|
|
optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
|
|
optionalAttributes parser = try $
|
|
resetBlockAttributes *> parseBlockAttributes *> parser
|
|
|
|
parseBlockAttributes :: OrgParser ()
|
|
parseBlockAttributes = do
|
|
attrs <- many attribute
|
|
() <$ mapM (uncurry parseAndAddAttribute) attrs
|
|
where
|
|
attribute :: OrgParser (String, String)
|
|
attribute = try $ do
|
|
key <- metaLineStart *> many1Till nonspaceChar (char ':')
|
|
val <- skipSpaces *> anyLine
|
|
return (map toLower key, val)
|
|
|
|
parseAndAddAttribute :: String -> String -> OrgParser ()
|
|
parseAndAddAttribute key value = do
|
|
let key' = map toLower key
|
|
() <$ addBlockAttribute key' value
|
|
|
|
lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
|
|
lookupInlinesAttr attr = try $ do
|
|
val <- lookupBlockAttribute attr
|
|
maybe (return Nothing)
|
|
(fmap Just . parseFromString parseInlines)
|
|
val
|
|
|
|
|
|
--
|
|
-- Org Blocks (#+BEGIN_... / #+END_...)
|
|
--
|
|
|
|
type BlockProperties = (Int, String) -- (Indentation, Block-Type)
|
|
|
|
orgBlock :: OrgParser (F Blocks)
|
|
orgBlock = try $ do
|
|
blockProp@(_, blkType) <- blockHeaderStart
|
|
($ blockProp) $
|
|
case blkType of
|
|
"comment" -> withRaw' (const mempty)
|
|
"html" -> withRaw' (return . (B.rawBlock blkType))
|
|
"latex" -> withRaw' (return . (B.rawBlock blkType))
|
|
"ascii" -> withRaw' (return . (B.rawBlock blkType))
|
|
"example" -> withRaw' (return . exampleCode)
|
|
"quote" -> withParsed (fmap B.blockQuote)
|
|
"verse" -> verseBlock
|
|
"src" -> codeBlock
|
|
_ -> withParsed (fmap $ divWithClass blkType)
|
|
|
|
blockHeaderStart :: OrgParser (Int, String)
|
|
blockHeaderStart = try $ (,) <$> indent <*> blockType
|
|
where
|
|
indent = length <$> many spaceChar
|
|
blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
|
|
|
|
withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
|
|
withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
|
|
|
|
withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
|
|
withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
|
|
|
|
ignHeaders :: OrgParser ()
|
|
ignHeaders = (() <$ newline) <|> (() <$ anyLine)
|
|
|
|
divWithClass :: String -> Blocks -> Blocks
|
|
divWithClass cls = B.divWith ("", [cls], [])
|
|
|
|
verseBlock :: BlockProperties -> OrgParser (F Blocks)
|
|
verseBlock blkProp = try $ do
|
|
ignHeaders
|
|
content <- rawBlockContent blkProp
|
|
fmap B.para . mconcat . intersperse (pure B.linebreak)
|
|
<$> mapM (parseFromString parseInlines) (lines content)
|
|
|
|
exportsCode :: [(String, String)] -> Bool
|
|
exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
|
|
|| ("rundoc-exports", "results") `elem` attrs)
|
|
|
|
exportsResults :: [(String, String)] -> Bool
|
|
exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
|
|
|| ("rundoc-exports", "both") `elem` attrs
|
|
|
|
followingResultsBlock :: OrgParser (Maybe String)
|
|
followingResultsBlock =
|
|
optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
|
|
*> blankline
|
|
*> (unlines <$> many1 exampleLine))
|
|
|
|
codeBlock :: BlockProperties -> OrgParser (F Blocks)
|
|
codeBlock blkProp = do
|
|
skipSpaces
|
|
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
|
|
id' <- fromMaybe "" <$> lookupBlockAttribute "name"
|
|
content <- rawBlockContent blkProp
|
|
resultsContent <- followingResultsBlock
|
|
let includeCode = exportsCode kv
|
|
let includeResults = exportsResults kv
|
|
let codeBlck = B.codeBlockWith ( id', classes, kv ) content
|
|
labelledBlck <- maybe (pure codeBlck)
|
|
(labelDiv codeBlck)
|
|
<$> lookupInlinesAttr "caption"
|
|
let resultBlck = pure $ maybe mempty (exampleCode) resultsContent
|
|
return $ (if includeCode then labelledBlck else mempty)
|
|
<> (if includeResults then resultBlck else mempty)
|
|
where
|
|
labelDiv blk value =
|
|
B.divWith nullAttr <$> (mappend <$> labelledBlock value
|
|
<*> pure blk)
|
|
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
|
|
|
|
rawBlockContent :: BlockProperties -> OrgParser String
|
|
rawBlockContent (indent, blockType) = try $
|
|
unlines . map commaEscaped <$> manyTill indentedLine blockEnder
|
|
where
|
|
indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
|
|
blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
|
|
|
|
parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
|
|
parsedBlockContent blkProps = try $ do
|
|
raw <- rawBlockContent blkProps
|
|
parseFromString parseBlocks (raw ++ "\n")
|
|
|
|
-- indent by specified number of spaces (or equiv. tabs)
|
|
indentWith :: Int -> OrgParser String
|
|
indentWith num = do
|
|
tabStop <- getOption readerTabStop
|
|
if num < tabStop
|
|
then count num (char ' ')
|
|
else choice [ try (count num (char ' '))
|
|
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
|
|
|
|
type SwitchOption = (Char, Maybe String)
|
|
|
|
orgArgWord :: OrgParser String
|
|
orgArgWord = many1 orgArgWordChar
|
|
|
|
-- | Parse code block arguments
|
|
-- TODO: We currently don't handle switches.
|
|
codeHeaderArgs :: OrgParser ([String], [(String, String)])
|
|
codeHeaderArgs = try $ do
|
|
language <- skipSpaces *> orgArgWord
|
|
_ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
|
|
parameters <- manyTill blockOption newline
|
|
let pandocLang = translateLang language
|
|
return $
|
|
if hasRundocParameters parameters
|
|
then ( [ pandocLang, rundocBlockClass ]
|
|
, map toRundocAttrib (("language", language) : parameters)
|
|
)
|
|
else ([ pandocLang ], parameters)
|
|
where hasRundocParameters = not . null
|
|
|
|
switch :: OrgParser SwitchOption
|
|
switch = try $ simpleSwitch <|> lineNumbersSwitch
|
|
where
|
|
simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
|
|
lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
|
|
(string "-l \"" *> many1Till nonspaceChar (char '"'))
|
|
|
|
translateLang :: String -> String
|
|
translateLang "C" = "c"
|
|
translateLang "C++" = "cpp"
|
|
translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
|
|
translateLang "js" = "javascript"
|
|
translateLang "lisp" = "commonlisp"
|
|
translateLang "R" = "r"
|
|
translateLang "sh" = "bash"
|
|
translateLang "sqlite" = "sql"
|
|
translateLang cs = cs
|
|
|
|
-- | Prefix used for Rundoc classes and arguments.
|
|
rundocPrefix :: String
|
|
rundocPrefix = "rundoc-"
|
|
|
|
-- | The class-name used to mark rundoc blocks.
|
|
rundocBlockClass :: String
|
|
rundocBlockClass = rundocPrefix ++ "block"
|
|
|
|
blockOption :: OrgParser (String, String)
|
|
blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
|
|
|
|
inlineBlockOption :: OrgParser (String, String)
|
|
inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
|
|
|
|
orgArgKey :: OrgParser String
|
|
orgArgKey = try $
|
|
skipSpaces *> char ':'
|
|
*> many1 orgArgWordChar
|
|
|
|
orgParamValue :: OrgParser String
|
|
orgParamValue = try $
|
|
skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
|
|
|
|
orgInlineParamValue :: OrgParser String
|
|
orgInlineParamValue = try $
|
|
skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
|
|
|
|
orgArgWordChar :: OrgParser Char
|
|
orgArgWordChar = alphaNum <|> oneOf "-_"
|
|
|
|
toRundocAttrib :: (String, String) -> (String, String)
|
|
toRundocAttrib = first ("rundoc-" ++)
|
|
|
|
commaEscaped :: String -> String
|
|
commaEscaped (',':cs@('*':_)) = cs
|
|
commaEscaped (',':cs@('#':'+':_)) = cs
|
|
commaEscaped cs = cs
|
|
|
|
example :: OrgParser (F Blocks)
|
|
example = try $ do
|
|
return . return . exampleCode =<< unlines <$> many1 exampleLine
|
|
|
|
exampleCode :: String -> Blocks
|
|
exampleCode = B.codeBlockWith ("", ["example"], [])
|
|
|
|
exampleLine :: OrgParser String
|
|
exampleLine = try $ string ": " *> anyLine
|
|
|
|
-- Drawers for properties or a logbook
|
|
drawer :: OrgParser (F Blocks)
|
|
drawer = try $ do
|
|
drawerStart
|
|
manyTill drawerLine (try drawerEnd)
|
|
return mempty
|
|
|
|
drawerStart :: OrgParser String
|
|
drawerStart = try $
|
|
skipSpaces *> drawerName <* skipSpaces <* P.newline
|
|
where drawerName = try $ char ':' *> validDrawerName <* char ':'
|
|
validDrawerName = stringAnyCase "PROPERTIES"
|
|
<|> stringAnyCase "LOGBOOK"
|
|
|
|
drawerLine :: OrgParser String
|
|
drawerLine = try anyLine
|
|
|
|
drawerEnd :: OrgParser String
|
|
drawerEnd = try $
|
|
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline
|
|
|
|
|
|
--
|
|
-- Figures
|
|
--
|
|
|
|
-- Figures (Image on a line by itself, preceded by name and/or caption)
|
|
figure :: OrgParser (F Blocks)
|
|
figure = try $ do
|
|
(cap, nam) <- nameAndCaption
|
|
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
|
|
guard (isImageFilename src)
|
|
return $ do
|
|
cap' <- cap
|
|
return $ B.para $ B.image src nam cap'
|
|
where
|
|
nameAndCaption =
|
|
do
|
|
maybeCap <- lookupInlinesAttr "caption"
|
|
maybeNam <- lookupBlockAttribute "name"
|
|
guard $ isJust maybeCap || isJust maybeNam
|
|
return ( fromMaybe mempty maybeCap
|
|
, maybe mempty withFigPrefix maybeNam )
|
|
withFigPrefix cs =
|
|
if "fig:" `isPrefixOf` cs
|
|
then cs
|
|
else "fig:" ++ cs
|
|
|
|
--
|
|
-- Comments, Options and Metadata
|
|
specialLine :: OrgParser (F Blocks)
|
|
specialLine = fmap return . try $ metaLine <|> commentLine
|
|
|
|
metaLine :: OrgParser Blocks
|
|
metaLine = try $ mempty
|
|
<$ (metaLineStart *> (optionLine <|> declarationLine))
|
|
|
|
commentLine :: OrgParser Blocks
|
|
commentLine = try $ commentLineStart *> anyLine *> pure mempty
|
|
|
|
-- The order, in which blocks are tried, makes sure that we're not looking at
|
|
-- the beginning of a block, so we don't need to check for it
|
|
metaLineStart :: OrgParser String
|
|
metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
|
|
|
|
commentLineStart :: OrgParser String
|
|
commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
|
|
|
|
declarationLine :: OrgParser ()
|
|
declarationLine = try $ do
|
|
key <- metaKey
|
|
inlinesF <- metaInlines
|
|
updateState $ \st ->
|
|
let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
|
|
in st { orgStateMeta' = orgStateMeta' st <> meta' }
|
|
return ()
|
|
|
|
metaInlines :: OrgParser (F MetaValue)
|
|
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
|
|
|
|
metaKey :: OrgParser String
|
|
metaKey = map toLower <$> many1 (noneOf ": \n\r")
|
|
<* char ':'
|
|
<* skipSpaces
|
|
|
|
optionLine :: OrgParser ()
|
|
optionLine = try $ do
|
|
key <- metaKey
|
|
case key of
|
|
"link" -> parseLinkFormat >>= uncurry addLinkFormat
|
|
_ -> mzero
|
|
|
|
parseLinkFormat :: OrgParser ((String, String -> String))
|
|
parseLinkFormat = try $ do
|
|
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
|
|
linkSubst <- parseFormat
|
|
return (linkType, linkSubst)
|
|
|
|
-- | An ad-hoc, single-argument-only implementation of a printf-style format
|
|
-- parser.
|
|
parseFormat :: OrgParser (String -> String)
|
|
parseFormat = try $ do
|
|
replacePlain <|> replaceUrl <|> justAppend
|
|
where
|
|
-- inefficient, but who cares
|
|
replacePlain = try $ (\x -> concat . flip intersperse x)
|
|
<$> sequence [tillSpecifier 's', rest]
|
|
replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
|
|
<$> sequence [tillSpecifier 'h', rest]
|
|
justAppend = try $ (++) <$> rest
|
|
|
|
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
|
|
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
|
|
|
|
--
|
|
-- Headers
|
|
--
|
|
|
|
-- | Headers
|
|
header :: OrgParser (F Blocks)
|
|
header = try $ do
|
|
level <- headerStart
|
|
title <- inlinesTillNewline
|
|
return $ B.header level <$> title
|
|
|
|
headerStart :: OrgParser Int
|
|
headerStart = try $
|
|
(length <$> many1 (char '*')) <* many1 (char ' ')
|
|
|
|
|
|
-- Don't use (or need) the reader wrapper here, we want hline to be
|
|
-- @show@able. Otherwise we can't use it with @notFollowedBy'@.
|
|
|
|
-- | Horizontal Line (five -- dashes or more)
|
|
hline :: OrgParser Blocks
|
|
hline = try $ do
|
|
skipSpaces
|
|
string "-----"
|
|
many (char '-')
|
|
skipSpaces
|
|
newline
|
|
return B.horizontalRule
|
|
|
|
--
|
|
-- Tables
|
|
--
|
|
|
|
data OrgTableRow = OrgContentRow (F [Blocks])
|
|
| OrgAlignRow [Alignment]
|
|
| OrgHlineRow
|
|
|
|
data OrgTable = OrgTable
|
|
{ orgTableColumns :: Int
|
|
, orgTableAlignments :: [Alignment]
|
|
, orgTableHeader :: [Blocks]
|
|
, orgTableRows :: [[Blocks]]
|
|
}
|
|
|
|
table :: OrgParser (F Blocks)
|
|
table = try $ do
|
|
lookAhead tableStart
|
|
do
|
|
rows <- tableRows
|
|
cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
|
|
return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
|
|
|
|
orgToPandocTable :: OrgTable
|
|
-> Inlines
|
|
-> Blocks
|
|
orgToPandocTable (OrgTable _ aligns heads lns) caption =
|
|
B.table caption (zip aligns $ repeat 0) heads lns
|
|
|
|
tableStart :: OrgParser Char
|
|
tableStart = try $ skipSpaces *> char '|'
|
|
|
|
tableRows :: OrgParser [OrgTableRow]
|
|
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
|
|
|
|
tableContentRow :: OrgParser OrgTableRow
|
|
tableContentRow = try $
|
|
OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
|
|
|
|
tableContentCell :: OrgParser (F Blocks)
|
|
tableContentCell = try $
|
|
fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
|
|
|
|
endOfCell :: OrgParser Char
|
|
endOfCell = try $ char '|' <|> lookAhead newline
|
|
|
|
tableAlignRow :: OrgParser OrgTableRow
|
|
tableAlignRow = try $
|
|
OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
|
|
|
|
tableAlignCell :: OrgParser Alignment
|
|
tableAlignCell =
|
|
choice [ try $ emptyCell *> return AlignDefault
|
|
, try $ skipSpaces
|
|
*> char '<'
|
|
*> tableAlignFromChar
|
|
<* many digit
|
|
<* char '>'
|
|
<* emptyCell
|
|
] <?> "alignment info"
|
|
where emptyCell = try $ skipSpaces *> endOfCell
|
|
|
|
tableAlignFromChar :: OrgParser Alignment
|
|
tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
|
|
, char 'c' *> return AlignCenter
|
|
, char 'r' *> return AlignRight
|
|
]
|
|
|
|
tableHline :: OrgParser OrgTableRow
|
|
tableHline = try $
|
|
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
|
|
|
|
rowsToTable :: [OrgTableRow]
|
|
-> F OrgTable
|
|
rowsToTable = foldM (flip rowToContent) zeroTable
|
|
where zeroTable = OrgTable 0 mempty mempty mempty
|
|
|
|
normalizeTable :: OrgTable
|
|
-> OrgTable
|
|
normalizeTable (OrgTable cols aligns heads lns) =
|
|
let aligns' = fillColumns aligns AlignDefault
|
|
heads' = if heads == mempty
|
|
then mempty
|
|
else fillColumns heads (B.plain mempty)
|
|
lns' = map (`fillColumns` B.plain mempty) lns
|
|
fillColumns base padding = take cols $ base ++ repeat padding
|
|
in OrgTable cols aligns' heads' lns'
|
|
|
|
|
|
-- One or more horizontal rules after the first content line mark the previous
|
|
-- line as a header. All other horizontal lines are discarded.
|
|
rowToContent :: OrgTableRow
|
|
-> OrgTable
|
|
-> F OrgTable
|
|
rowToContent OrgHlineRow t = maybeBodyToHeader t
|
|
rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
|
|
rowToContent (OrgContentRow rf) t = do
|
|
rs <- rf
|
|
setLongestRow rs =<< appendToBody rs t
|
|
|
|
setLongestRow :: [a]
|
|
-> OrgTable
|
|
-> F OrgTable
|
|
setLongestRow rs t =
|
|
return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
|
|
|
|
maybeBodyToHeader :: OrgTable
|
|
-> F OrgTable
|
|
maybeBodyToHeader t = case t of
|
|
OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
|
|
return t{ orgTableHeader = b , orgTableRows = [] }
|
|
_ -> return t
|
|
|
|
appendToBody :: [Blocks]
|
|
-> OrgTable
|
|
-> F OrgTable
|
|
appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
|
|
|
|
setAligns :: [Alignment]
|
|
-> OrgTable
|
|
-> F OrgTable
|
|
setAligns aligns t = return $ t{ orgTableAlignments = aligns }
|
|
|
|
|
|
--
|
|
-- LaTeX fragments
|
|
--
|
|
latexFragment :: OrgParser (F Blocks)
|
|
latexFragment = try $ do
|
|
envName <- latexEnvStart
|
|
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
|
|
return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
|
|
where
|
|
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
|
|
, c
|
|
, "\\end{", e, "}\n"
|
|
]
|
|
|
|
latexEnvStart :: OrgParser String
|
|
latexEnvStart = try $ do
|
|
skipSpaces *> string "\\begin{"
|
|
*> latexEnvName
|
|
<* string "}"
|
|
<* blankline
|
|
|
|
latexEnd :: String -> OrgParser ()
|
|
latexEnd envName = try $
|
|
() <$ skipSpaces
|
|
<* string ("\\end{" ++ envName ++ "}")
|
|
<* blankline
|
|
|
|
-- | Parses a LaTeX environment name.
|
|
latexEnvName :: OrgParser String
|
|
latexEnvName = try $ do
|
|
mappend <$> many1 alphaNum
|
|
<*> option "" (string "*")
|
|
|
|
|
|
--
|
|
-- Footnote defintions
|
|
--
|
|
noteBlock :: OrgParser (F Blocks)
|
|
noteBlock = try $ do
|
|
ref <- noteMarker <* skipSpaces
|
|
content <- mconcat <$> blocksTillHeaderOrNote
|
|
addToNotesTable (ref, content)
|
|
return mempty
|
|
where
|
|
blocksTillHeaderOrNote =
|
|
many1Till block (eof <|> () <$ lookAhead noteMarker
|
|
<|> () <$ lookAhead headerStart)
|
|
|
|
-- Paragraphs or Plain text
|
|
paraOrPlain :: OrgParser (F Blocks)
|
|
paraOrPlain = try $ do
|
|
ils <- parseInlines
|
|
nl <- option False (newline >> return True)
|
|
try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
|
|
return (B.para <$> ils))
|
|
<|> (return (B.plain <$> ils))
|
|
|
|
inlinesTillNewline :: OrgParser (F Inlines)
|
|
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
|
|
|
|
|
|
--
|
|
-- list blocks
|
|
--
|
|
|
|
list :: OrgParser (F Blocks)
|
|
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
|
|
|
|
definitionList :: OrgParser (F Blocks)
|
|
definitionList = fmap B.definitionList . fmap compactify'DL . sequence
|
|
<$> many1 (definitionListItem bulletListStart)
|
|
|
|
bulletList :: OrgParser (F Blocks)
|
|
bulletList = fmap B.bulletList . fmap compactify' . sequence
|
|
<$> many1 (listItem bulletListStart)
|
|
|
|
orderedList :: OrgParser (F Blocks)
|
|
orderedList = fmap B.orderedList . fmap compactify' . sequence
|
|
<$> many1 (listItem orderedListStart)
|
|
|
|
genericListStart :: OrgParser String
|
|
-> OrgParser Int
|
|
genericListStart listMarker = try $
|
|
(+) <$> (length <$> many spaceChar)
|
|
<*> (length <$> listMarker <* many1 spaceChar)
|
|
|
|
-- parses bullet list start and returns its length (excl. following whitespace)
|
|
bulletListStart :: OrgParser Int
|
|
bulletListStart = genericListStart bulletListMarker
|
|
where bulletListMarker = pure <$> oneOf "*-+"
|
|
|
|
orderedListStart :: OrgParser Int
|
|
orderedListStart = genericListStart orderedListMarker
|
|
-- Ordered list markers allowed in org-mode
|
|
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
|
|
|
|
definitionListItem :: OrgParser Int
|
|
-> OrgParser (F (Inlines, [Blocks]))
|
|
definitionListItem parseMarkerGetLength = try $ do
|
|
markerLength <- parseMarkerGetLength
|
|
term <- manyTill (noneOf "\n\r") (try $ string "::")
|
|
line1 <- anyLineNewline
|
|
blank <- option "" ("\n" <$ blankline)
|
|
cont <- concat <$> many (listContinuation markerLength)
|
|
term' <- parseFromString inline term
|
|
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
|
|
return $ (,) <$> term' <*> fmap (:[]) contents'
|
|
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
|
listItem :: OrgParser Int
|
|
-> OrgParser (F Blocks)
|
|
listItem start = try $ do
|
|
markerLength <- try start
|
|
firstLine <- anyLineNewline
|
|
blank <- option "" ("\n" <$ blankline)
|
|
rest <- concat <$> many (listContinuation markerLength)
|
|
parseFromString parseBlocks $ firstLine ++ blank ++ rest
|
|
|
|
-- continuation of a list item - indented and separated by blankline or endline.
|
|
-- Note: nested lists are parsed as continuations.
|
|
listContinuation :: Int
|
|
-> OrgParser String
|
|
listContinuation markerLength = try $
|
|
notFollowedBy' blankline
|
|
*> (mappend <$> (concat <$> many1 listLine)
|
|
<*> many blankline)
|
|
where listLine = try $ indentWith markerLength *> anyLineNewline
|
|
|
|
anyLineNewline :: OrgParser String
|
|
anyLineNewline = (++ "\n") <$> anyLine
|
|
|
|
|
|
--
|
|
-- inline
|
|
--
|
|
|
|
inline :: OrgParser (F Inlines)
|
|
inline =
|
|
choice [ whitespace
|
|
, linebreak
|
|
, cite
|
|
, footnote
|
|
, linkOrImage
|
|
, anchor
|
|
, inlineCodeBlock
|
|
, str
|
|
, endline
|
|
, emph
|
|
, strong
|
|
, strikeout
|
|
, underline
|
|
, code
|
|
, math
|
|
, displayMath
|
|
, verbatim
|
|
, subscript
|
|
, superscript
|
|
, inlineLaTeX
|
|
, symbol
|
|
] <* (guard =<< newlinesCountWithinLimits)
|
|
<?> "inline"
|
|
|
|
parseInlines :: OrgParser (F Inlines)
|
|
parseInlines = trimInlinesF . mconcat <$> many1 inline
|
|
|
|
-- treat these as potentially non-text when parsing inline:
|
|
specialChars :: [Char]
|
|
specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
|
|
|
|
|
|
whitespace :: OrgParser (F Inlines)
|
|
whitespace = pure B.space <$ skipMany1 spaceChar
|
|
<* updateLastPreCharPos
|
|
<* updateLastForbiddenCharPos
|
|
<?> "whitespace"
|
|
|
|
linebreak :: OrgParser (F Inlines)
|
|
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
|
|
|
|
str :: OrgParser (F Inlines)
|
|
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
|
<* updateLastStrPos
|
|
|
|
-- | An endline character that can be treated as a space, not a structural
|
|
-- break. This should reflect the values of the Emacs variable
|
|
-- @org-element-pagaraph-separate@.
|
|
endline :: OrgParser (F Inlines)
|
|
endline = try $ do
|
|
newline
|
|
notFollowedBy blankline
|
|
notFollowedBy' exampleLine
|
|
notFollowedBy' hline
|
|
notFollowedBy' noteMarker
|
|
notFollowedBy' tableStart
|
|
notFollowedBy' drawerStart
|
|
notFollowedBy' headerStart
|
|
notFollowedBy' metaLineStart
|
|
notFollowedBy' latexEnvStart
|
|
notFollowedBy' commentLineStart
|
|
notFollowedBy' bulletListStart
|
|
notFollowedBy' orderedListStart
|
|
decEmphasisNewlinesCount
|
|
guard =<< newlinesCountWithinLimits
|
|
updateLastPreCharPos
|
|
return . return $ B.space
|
|
|
|
cite :: OrgParser (F Inlines)
|
|
cite = try $ do
|
|
guardEnabled Ext_citations
|
|
(cs, raw) <- withRaw normalCite
|
|
return $ (flip B.cite (B.text raw)) <$> cs
|
|
|
|
normalCite :: OrgParser (F [Citation])
|
|
normalCite = try $ char '['
|
|
*> skipSpaces
|
|
*> citeList
|
|
<* skipSpaces
|
|
<* char ']'
|
|
|
|
citeList :: OrgParser (F [Citation])
|
|
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
|
|
|
|
citation :: OrgParser (F Citation)
|
|
citation = try $ do
|
|
pref <- prefix
|
|
(suppress_author, key) <- citeKey
|
|
suff <- suffix
|
|
return $ do
|
|
x <- pref
|
|
y <- suff
|
|
return $ Citation{ citationId = key
|
|
, citationPrefix = B.toList x
|
|
, citationSuffix = B.toList y
|
|
, citationMode = if suppress_author
|
|
then SuppressAuthor
|
|
else NormalCitation
|
|
, citationNoteNum = 0
|
|
, citationHash = 0
|
|
}
|
|
where
|
|
prefix = trimInlinesF . mconcat <$>
|
|
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
|
|
suffix = try $ do
|
|
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
|
skipSpaces
|
|
rest <- trimInlinesF . mconcat <$>
|
|
many (notFollowedBy (oneOf ";]") *> inline)
|
|
return $ if hasSpace
|
|
then (B.space <>) <$> rest
|
|
else rest
|
|
|
|
footnote :: OrgParser (F Inlines)
|
|
footnote = try $ inlineNote <|> referencedNote
|
|
|
|
inlineNote :: OrgParser (F Inlines)
|
|
inlineNote = try $ do
|
|
string "[fn:"
|
|
ref <- many alphaNum
|
|
char ':'
|
|
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
|
|
when (not $ null ref) $
|
|
addToNotesTable ("fn:" ++ ref, note)
|
|
return $ B.note <$> note
|
|
|
|
referencedNote :: OrgParser (F Inlines)
|
|
referencedNote = try $ do
|
|
ref <- noteMarker
|
|
return $ do
|
|
notes <- asksF orgStateNotes'
|
|
case lookup ref notes of
|
|
Nothing -> return $ B.str $ "[" ++ ref ++ "]"
|
|
Just contents -> do
|
|
st <- askF
|
|
let contents' = runF contents st{ orgStateNotes' = [] }
|
|
return $ B.note contents'
|
|
|
|
noteMarker :: OrgParser String
|
|
noteMarker = try $ do
|
|
char '['
|
|
choice [ many1Till digit (char ']')
|
|
, (++) <$> string "fn:"
|
|
<*> many1Till (noneOf "\n\r\t ") (char ']')
|
|
]
|
|
|
|
linkOrImage :: OrgParser (F Inlines)
|
|
linkOrImage = explicitOrImageLink
|
|
<|> selflinkOrImage
|
|
<|> angleLink
|
|
<|> plainLink
|
|
<?> "link or image"
|
|
|
|
explicitOrImageLink :: OrgParser (F Inlines)
|
|
explicitOrImageLink = try $ do
|
|
char '['
|
|
srcF <- applyCustomLinkFormat =<< linkTarget
|
|
title <- enclosedRaw (char '[') (char ']')
|
|
title' <- parseFromString (mconcat <$> many inline) title
|
|
char ']'
|
|
return $ do
|
|
src <- srcF
|
|
if isImageFilename src && isImageFilename title
|
|
then pure $ B.link src "" $ B.image title mempty mempty
|
|
else linkToInlinesF src =<< title'
|
|
|
|
selflinkOrImage :: OrgParser (F Inlines)
|
|
selflinkOrImage = try $ do
|
|
src <- char '[' *> linkTarget <* char ']'
|
|
return $ linkToInlinesF src (B.str src)
|
|
|
|
plainLink :: OrgParser (F Inlines)
|
|
plainLink = try $ do
|
|
(orig, src) <- uri
|
|
returnF $ B.link src "" (B.str orig)
|
|
|
|
angleLink :: OrgParser (F Inlines)
|
|
angleLink = try $ do
|
|
char '<'
|
|
link <- plainLink
|
|
char '>'
|
|
return link
|
|
|
|
selfTarget :: OrgParser String
|
|
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
|
|
|
linkTarget :: OrgParser String
|
|
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
|
|
|
|
applyCustomLinkFormat :: String -> OrgParser (F String)
|
|
applyCustomLinkFormat link = do
|
|
let (linkType, rest) = break (== ':') link
|
|
return $ do
|
|
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
|
|
return $ maybe link ($ drop 1 rest) formatter
|
|
|
|
|
|
linkToInlinesF :: String -> Inlines -> F Inlines
|
|
linkToInlinesF s@('#':_) = pure . B.link s ""
|
|
linkToInlinesF s
|
|
| isImageFilename s = const . pure $ B.image s "" ""
|
|
| isUri s = pure . B.link s ""
|
|
| isRelativeUrl s = pure . B.link s ""
|
|
linkToInlinesF s = \title -> do
|
|
anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
|
|
if anchorB
|
|
then pure $ B.link ('#':s) "" title
|
|
else pure $ B.emph title
|
|
|
|
isRelativeUrl :: String -> Bool
|
|
isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
|
|
|
|
isUri :: String -> Bool
|
|
isUri s = let (scheme, path) = break (== ':') s
|
|
in all (\c -> isAlphaNum c || c `elem` ".-") scheme
|
|
&& not (null path)
|
|
|
|
isImageFilename :: String -> Bool
|
|
isImageFilename filename =
|
|
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
|
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
|
':' `notElem` filename)
|
|
where
|
|
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
|
protocols = [ "file", "http", "https" ]
|
|
|
|
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
|
|
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
|
|
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
|
|
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
|
|
-- an anchor.
|
|
|
|
anchor :: OrgParser (F Inlines)
|
|
anchor = try $ do
|
|
anchorId <- parseAnchor
|
|
recordAnchorId anchorId
|
|
returnF $ B.spanWith (solidify anchorId, [], []) mempty
|
|
where
|
|
parseAnchor = string "<<"
|
|
*> many1 (noneOf "\t\n\r<>\"' ")
|
|
<* string ">>"
|
|
<* skipSpaces
|
|
|
|
-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
|
|
-- the org function @org-export-solidify-link-text@.
|
|
|
|
solidify :: String -> String
|
|
solidify = map replaceSpecialChar
|
|
where replaceSpecialChar c
|
|
| isAlphaNum c = c
|
|
| c `elem` "_.-:" = c
|
|
| otherwise = '-'
|
|
|
|
-- | Parses an inline code block and marks it as an babel block.
|
|
inlineCodeBlock :: OrgParser (F Inlines)
|
|
inlineCodeBlock = try $ do
|
|
string "src_"
|
|
lang <- many1 orgArgWordChar
|
|
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
|
|
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
|
|
let attrClasses = [translateLang lang, rundocBlockClass]
|
|
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
|
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
|
|
|
enclosedByPair :: Char -- ^ opening char
|
|
-> Char -- ^ closing char
|
|
-> OrgParser a -- ^ parser
|
|
-> OrgParser [a]
|
|
enclosedByPair s e p = char s *> many1Till p (char e)
|
|
|
|
emph :: OrgParser (F Inlines)
|
|
emph = fmap B.emph <$> emphasisBetween '/'
|
|
|
|
strong :: OrgParser (F Inlines)
|
|
strong = fmap B.strong <$> emphasisBetween '*'
|
|
|
|
strikeout :: OrgParser (F Inlines)
|
|
strikeout = fmap B.strikeout <$> emphasisBetween '+'
|
|
|
|
-- There is no underline, so we use strong instead.
|
|
underline :: OrgParser (F Inlines)
|
|
underline = fmap B.strong <$> emphasisBetween '_'
|
|
|
|
verbatim :: OrgParser (F Inlines)
|
|
verbatim = return . B.code <$> verbatimBetween '='
|
|
|
|
code :: OrgParser (F Inlines)
|
|
code = return . B.code <$> verbatimBetween '~'
|
|
|
|
subscript :: OrgParser (F Inlines)
|
|
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
|
|
|
|
superscript :: OrgParser (F Inlines)
|
|
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
|
|
|
|
math :: OrgParser (F Inlines)
|
|
math = return . B.math <$> choice [ math1CharBetween '$'
|
|
, mathStringBetween '$'
|
|
, rawMathBetween "\\(" "\\)"
|
|
]
|
|
|
|
displayMath :: OrgParser (F Inlines)
|
|
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
|
, rawMathBetween "$$" "$$"
|
|
]
|
|
symbol :: OrgParser (F Inlines)
|
|
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
|
where updatePositions c
|
|
| c `elem` emphasisPreChars = c <$ updateLastPreCharPos
|
|
| c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
|
|
| otherwise = return c
|
|
|
|
emphasisBetween :: Char
|
|
-> OrgParser (F Inlines)
|
|
emphasisBetween c = try $ do
|
|
startEmphasisNewlinesCounting emphasisAllowedNewlines
|
|
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
|
|
isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
|
|
when isTopLevelEmphasis
|
|
resetEmphasisNewlines
|
|
return res
|
|
|
|
verbatimBetween :: Char
|
|
-> OrgParser String
|
|
verbatimBetween c = try $
|
|
emphasisStart c *>
|
|
many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
|
|
|
|
-- | Parses a raw string delimited by @c@ using Org's math rules
|
|
mathStringBetween :: Char
|
|
-> OrgParser String
|
|
mathStringBetween c = try $ do
|
|
mathStart c
|
|
body <- many1TillNOrLessNewlines mathAllowedNewlines
|
|
(noneOf (c:"\n\r"))
|
|
(lookAhead $ mathEnd c)
|
|
final <- mathEnd c
|
|
return $ body ++ [final]
|
|
|
|
-- | Parse a single character between @c@ using math rules
|
|
math1CharBetween :: Char
|
|
-> OrgParser String
|
|
math1CharBetween c = try $ do
|
|
char c
|
|
res <- noneOf $ c:mathForbiddenBorderChars
|
|
char c
|
|
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
|
return [res]
|
|
|
|
rawMathBetween :: String
|
|
-> String
|
|
-> OrgParser String
|
|
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
|
|
|
|
-- | Parses the start (opening character) of emphasis
|
|
emphasisStart :: Char -> OrgParser Char
|
|
emphasisStart c = try $ do
|
|
guard =<< afterEmphasisPreChar
|
|
guard =<< notAfterString
|
|
char c
|
|
lookAhead (noneOf emphasisForbiddenBorderChars)
|
|
pushToInlineCharStack c
|
|
return c
|
|
|
|
-- | Parses the closing character of emphasis
|
|
emphasisEnd :: Char -> OrgParser Char
|
|
emphasisEnd c = try $ do
|
|
guard =<< notAfterForbiddenBorderChar
|
|
char c
|
|
eof <|> () <$ lookAhead acceptablePostChars
|
|
updateLastStrPos
|
|
popInlineCharStack
|
|
return c
|
|
where acceptablePostChars =
|
|
surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
|
|
|
|
mathStart :: Char -> OrgParser Char
|
|
mathStart c = try $
|
|
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
|
|
|
mathEnd :: Char -> OrgParser Char
|
|
mathEnd c = try $ do
|
|
res <- noneOf (c:mathForbiddenBorderChars)
|
|
char c
|
|
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
|
return res
|
|
|
|
|
|
enclosedInlines :: OrgParser a
|
|
-> OrgParser b
|
|
-> OrgParser (F Inlines)
|
|
enclosedInlines start end = try $
|
|
trimInlinesF . mconcat <$> enclosed start end inline
|
|
|
|
enclosedRaw :: OrgParser a
|
|
-> OrgParser b
|
|
-> OrgParser String
|
|
enclosedRaw start end = try $
|
|
start *> (onSingleLine <|> spanningTwoLines)
|
|
where onSingleLine = try $ many1Till (noneOf "\n\r") end
|
|
spanningTwoLines = try $
|
|
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
|
|
|
|
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
|
|
-- newlines.
|
|
many1TillNOrLessNewlines :: Int
|
|
-> OrgParser Char
|
|
-> OrgParser a
|
|
-> OrgParser String
|
|
many1TillNOrLessNewlines n p end = try $
|
|
nMoreLines (Just n) mempty >>= oneOrMore
|
|
where
|
|
nMoreLines Nothing cs = return cs
|
|
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
|
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
|
>>= uncurry nMoreLines
|
|
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
|
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
|
|
finalLine = try $ manyTill p end
|
|
minus1 k = k - 1
|
|
oneOrMore cs = guard (not $ null cs) *> return cs
|
|
|
|
-- Org allows customization of the way it reads emphasis. We use the defaults
|
|
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
|
|
-- for details).
|
|
|
|
-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
|
|
emphasisPreChars :: [Char]
|
|
emphasisPreChars = "\t \"'({"
|
|
|
|
-- | Chars allowed at after emphasis
|
|
emphasisPostChars :: [Char]
|
|
emphasisPostChars = "\t\n !\"'),-.:;?\\}"
|
|
|
|
-- | Chars not allowed at the (inner) border of emphasis
|
|
emphasisForbiddenBorderChars :: [Char]
|
|
emphasisForbiddenBorderChars = "\t\n\r \"',"
|
|
|
|
-- | The maximum number of newlines within
|
|
emphasisAllowedNewlines :: Int
|
|
emphasisAllowedNewlines = 1
|
|
|
|
-- LaTeX-style math: see `org-latex-regexps` for details
|
|
|
|
-- | Chars allowed after an inline ($...$) math statement
|
|
mathPostChars :: [Char]
|
|
mathPostChars = "\t\n \"'),-.:;?"
|
|
|
|
-- | Chars not allowed at the (inner) border of math
|
|
mathForbiddenBorderChars :: [Char]
|
|
mathForbiddenBorderChars = "\t\n\r ,;.$"
|
|
|
|
-- | Maximum number of newlines in an inline math statement
|
|
mathAllowedNewlines :: Int
|
|
mathAllowedNewlines = 2
|
|
|
|
-- | Whether we are right behind a char allowed before emphasis
|
|
afterEmphasisPreChar :: OrgParser Bool
|
|
afterEmphasisPreChar = do
|
|
pos <- getPosition
|
|
lastPrePos <- orgStateLastPreCharPos <$> getState
|
|
return . fromMaybe True $ (== pos) <$> lastPrePos
|
|
|
|
-- | Whether the parser is right after a forbidden border char
|
|
notAfterForbiddenBorderChar :: OrgParser Bool
|
|
notAfterForbiddenBorderChar = do
|
|
pos <- getPosition
|
|
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
|
|
return $ lastFBCPos /= Just pos
|
|
|
|
-- | Read a sub- or superscript expression
|
|
subOrSuperExpr :: OrgParser (F Inlines)
|
|
subOrSuperExpr = try $
|
|
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
|
|
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
|
|
, simpleSubOrSuperString
|
|
] >>= parseFromString (mconcat <$> many inline)
|
|
where enclosing (left, right) s = left : s ++ [right]
|
|
|
|
simpleSubOrSuperString :: OrgParser String
|
|
simpleSubOrSuperString = try $
|
|
choice [ string "*"
|
|
, mappend <$> option [] ((:[]) <$> oneOf "+-")
|
|
<*> many1 alphaNum
|
|
]
|
|
|
|
inlineLaTeX :: OrgParser (F Inlines)
|
|
inlineLaTeX = try $ do
|
|
cmd <- inlineLaTeXCommand
|
|
maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
|
|
where
|
|
parseAsMath :: String -> Maybe Inlines
|
|
parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
|
|
|
|
parseAsInlineLaTeX :: String -> Maybe Inlines
|
|
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
|
|
|
|
state :: ParserState
|
|
state = def{ stateOptions = def{ readerParseRaw = True }}
|
|
|
|
maybeRight :: Either a b -> Maybe b
|
|
maybeRight = either (const Nothing) Just
|
|
|
|
inlineLaTeXCommand :: OrgParser String
|
|
inlineLaTeXCommand = try $ do
|
|
rest <- getInput
|
|
case runParser rawLaTeXInline def "source" rest of
|
|
Right (RawInline _ cs) -> do
|
|
let len = length cs
|
|
count len anyChar
|
|
return cs
|
|
_ -> mzero
|