Org reader: extract inline parser to module
Inline parsing code is moved to a separate module. Parsers for block starts are extracted as well, as those are used in the `endline` parser. This is part of the Org-mode reader cleanup effort.
This commit is contained in:
parent
a340c7249f
commit
39e8b4276e
5 changed files with 889 additions and 756 deletions
|
@ -393,6 +393,8 @@ Library
|
|||
Text.Pandoc.Readers.Odt.Generic.XMLConverter,
|
||||
Text.Pandoc.Readers.Odt.Arrows.State,
|
||||
Text.Pandoc.Readers.Odt.Arrows.Utils,
|
||||
Text.Pandoc.Readers.Org.BlockStarts,
|
||||
Text.Pandoc.Readers.Org.Inlines,
|
||||
Text.Pandoc.Readers.Org.ParserState,
|
||||
Text.Pandoc.Readers.Org.Parsing,
|
||||
Text.Pandoc.Writers.Shared,
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
|
@ -29,6 +28,8 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
|
|||
-}
|
||||
module Text.Pandoc.Readers.Org ( readOrg ) where
|
||||
|
||||
import Text.Pandoc.Readers.Org.BlockStarts
|
||||
import Text.Pandoc.Readers.Org.Inlines
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
|
||||
|
@ -38,19 +39,16 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Compat.Monoid ((<>))
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
|
||||
import Text.Pandoc.Shared (compactify', compactify'DL)
|
||||
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
|
||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||
import Text.Pandoc.Shared ( compactify', compactify'DL )
|
||||
|
||||
import Control.Arrow (first)
|
||||
import Control.Monad (foldM, guard, mplus, mzero, when)
|
||||
import Control.Arrow ( first )
|
||||
import Control.Monad ( foldM, guard, mzero )
|
||||
import Control.Monad.Reader ( runReader )
|
||||
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
|
||||
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
|
||||
import Data.Char ( toLower, toUpper)
|
||||
import Data.List ( foldl', intersperse, isPrefixOf )
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe ( fromMaybe, isNothing )
|
||||
import Network.HTTP (urlEncode)
|
||||
import Network.HTTP ( urlEncode )
|
||||
|
||||
|
||||
-- | Parse org-mode string and return a Pandoc document.
|
||||
|
@ -59,54 +57,6 @@ readOrg :: ReaderOptions -- ^ Reader options
|
|||
-> Either PandocError Pandoc
|
||||
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||
|
||||
--
|
||||
-- Functions acting on the parser state
|
||||
--
|
||||
recordAnchorId :: String -> OrgParser ()
|
||||
recordAnchorId i = updateState $ \s ->
|
||||
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
||||
|
||||
pushToInlineCharStack :: Char -> OrgParser ()
|
||||
pushToInlineCharStack c = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
||||
|
||||
popInlineCharStack :: OrgParser ()
|
||||
popInlineCharStack = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
|
||||
|
||||
surroundingEmphasisChar :: OrgParser [Char]
|
||||
surroundingEmphasisChar =
|
||||
take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
|
||||
|
||||
startEmphasisNewlinesCounting :: Int -> OrgParser ()
|
||||
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = Just maxNewlines }
|
||||
|
||||
decEmphasisNewlinesCount :: OrgParser ()
|
||||
decEmphasisNewlinesCount = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
|
||||
|
||||
newlinesCountWithinLimits :: OrgParser Bool
|
||||
newlinesCountWithinLimits = do
|
||||
st <- getState
|
||||
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
|
||||
|
||||
resetEmphasisNewlines :: OrgParser ()
|
||||
resetEmphasisNewlines = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = Nothing }
|
||||
|
||||
addLinkFormat :: String
|
||||
-> (String -> String)
|
||||
-> OrgParser ()
|
||||
addLinkFormat key formatter = updateState $ \s ->
|
||||
let fs = orgStateLinkFormatters s
|
||||
in s{ orgStateLinkFormatters = M.insert key formatter fs }
|
||||
|
||||
addToNotesTable :: OrgNoteRecord -> OrgParser ()
|
||||
addToNotesTable note = do
|
||||
oldnotes <- orgStateNotes' <$> getState
|
||||
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
|
||||
|
||||
--
|
||||
-- Export Settings
|
||||
--
|
||||
|
@ -259,7 +209,7 @@ block = choice [ mempty <$ blanklines
|
|||
, genericDrawer
|
||||
, specialLine
|
||||
, header
|
||||
, return <$> hline
|
||||
, horizontalRule
|
||||
, list
|
||||
, latexFragment
|
||||
, noteBlock
|
||||
|
@ -457,9 +407,6 @@ indentWith num = do
|
|||
|
||||
type SwitchOption = (Char, Maybe String)
|
||||
|
||||
orgArgWord :: OrgParser String
|
||||
orgArgWord = many1 orgArgWordChar
|
||||
|
||||
-- | Parse code block arguments
|
||||
-- TODO: We currently don't handle switches.
|
||||
codeHeaderArgs :: OrgParser ([String], [(String, String)])
|
||||
|
@ -474,7 +421,10 @@ codeHeaderArgs = try $ do
|
|||
, map toRundocAttrib (("language", language) : parameters)
|
||||
)
|
||||
else ([ pandocLang ], parameters)
|
||||
where hasRundocParameters = not . null
|
||||
where
|
||||
hasRundocParameters = not . null
|
||||
toRundocAttrib = first ("rundoc-" ++)
|
||||
|
||||
|
||||
switch :: OrgParser SwitchOption
|
||||
switch = try $ simpleSwitch <|> lineNumbersSwitch
|
||||
|
@ -508,17 +458,6 @@ blockOption = try $ do
|
|||
paramValue <- option "yes" orgParamValue
|
||||
return (argKey, paramValue)
|
||||
|
||||
inlineBlockOption :: OrgParser (String, String)
|
||||
inlineBlockOption = try $ do
|
||||
argKey <- orgArgKey
|
||||
paramValue <- option "yes" orgInlineParamValue
|
||||
return (argKey, paramValue)
|
||||
|
||||
orgArgKey :: OrgParser String
|
||||
orgArgKey = try $
|
||||
skipSpaces *> char ':'
|
||||
*> many1 orgArgWordChar
|
||||
|
||||
orgParamValue :: OrgParser String
|
||||
orgParamValue = try $
|
||||
skipSpaces
|
||||
|
@ -526,19 +465,6 @@ orgParamValue = try $
|
|||
*> many1 (noneOf "\t\n\r ")
|
||||
<* skipSpaces
|
||||
|
||||
orgInlineParamValue :: OrgParser String
|
||||
orgInlineParamValue = try $
|
||||
skipSpaces
|
||||
*> notFollowedBy (char ':')
|
||||
*> many1 (noneOf "\t\n\r ]")
|
||||
<* skipSpaces
|
||||
|
||||
orgArgWordChar :: OrgParser Char
|
||||
orgArgWordChar = alphaNum <|> oneOf "-_"
|
||||
|
||||
toRundocAttrib :: (String, String) -> (String, String)
|
||||
toRundocAttrib = first ("rundoc-" ++)
|
||||
|
||||
commaEscaped :: String -> String
|
||||
commaEscaped (',':cs@('*':_)) = cs
|
||||
commaEscaped (',':cs@('#':'+':_)) = cs
|
||||
|
@ -552,7 +478,10 @@ exampleCode :: String -> Blocks
|
|||
exampleCode = B.codeBlockWith ("", ["example"], [])
|
||||
|
||||
exampleLine :: OrgParser String
|
||||
exampleLine = try $ skipSpaces *> string ": " *> anyLine
|
||||
exampleLine = try $ exampleLineStart *> anyLine
|
||||
|
||||
horizontalRule :: OrgParser (F Blocks)
|
||||
horizontalRule = return B.horizontalRule <$ try hline
|
||||
|
||||
|
||||
--
|
||||
|
@ -582,11 +511,6 @@ genericDrawer = try $ do
|
|||
drawerDiv :: String -> F Blocks -> F Blocks
|
||||
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
|
||||
|
||||
drawerStart :: OrgParser String
|
||||
drawerStart = try $
|
||||
skipSpaces *> drawerName <* skipSpaces <* newline
|
||||
where drawerName = char ':' *> manyTill nonspaceChar (char ':')
|
||||
|
||||
drawerLine :: OrgParser String
|
||||
drawerLine = anyLine
|
||||
|
||||
|
@ -639,31 +563,38 @@ figure = try $ do
|
|||
let attr = (mempty, mempty, figKeyVals)
|
||||
return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
|
||||
where
|
||||
withFigPrefix :: String -> String
|
||||
withFigPrefix cs =
|
||||
if "fig:" `isPrefixOf` cs
|
||||
then cs
|
||||
else "fig:" ++ cs
|
||||
|
||||
selfTarget :: OrgParser String
|
||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
||||
|
||||
--
|
||||
-- Comments, Options and Metadata
|
||||
--
|
||||
|
||||
addLinkFormat :: String
|
||||
-> (String -> String)
|
||||
-> OrgParser ()
|
||||
addLinkFormat key formatter = updateState $ \s ->
|
||||
let fs = orgStateLinkFormatters s
|
||||
in s{ orgStateLinkFormatters = M.insert key formatter fs }
|
||||
|
||||
specialLine :: OrgParser (F Blocks)
|
||||
specialLine = fmap return . try $ metaLine <|> commentLine
|
||||
|
||||
-- The order, in which blocks are tried, makes sure that we're not looking at
|
||||
-- the beginning of a block, so we don't need to check for it
|
||||
metaLine :: OrgParser Blocks
|
||||
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
|
||||
|
||||
-- The order, in which blocks are tried, makes sure that we're not looking at
|
||||
-- the beginning of a block, so we don't need to check for it
|
||||
metaLineStart :: OrgParser ()
|
||||
metaLineStart = try $ skipSpaces <* string "#+"
|
||||
|
||||
commentLine :: OrgParser Blocks
|
||||
commentLine = commentLineStart *> anyLine *> pure mempty
|
||||
|
||||
commentLineStart :: OrgParser ()
|
||||
commentLineStart = try $ skipSpaces <* string "# "
|
||||
|
||||
declarationLine :: OrgParser ()
|
||||
declarationLine = try $ do
|
||||
key <- metaKey
|
||||
|
@ -741,23 +672,6 @@ header = try $ do
|
|||
*> many1 tag
|
||||
<* skipSpaces
|
||||
|
||||
headerStart :: OrgParser Int
|
||||
headerStart = try $
|
||||
(length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
|
||||
|
||||
|
||||
-- Don't use (or need) the reader wrapper here, we want hline to be
|
||||
-- @show@able. Otherwise we can't use it with @notFollowedBy'@.
|
||||
|
||||
-- | Horizontal Line (five -- dashes or more)
|
||||
hline :: OrgParser Blocks
|
||||
hline = try $ do
|
||||
skipSpaces
|
||||
string "-----"
|
||||
many (char '-')
|
||||
skipSpaces
|
||||
newline
|
||||
return B.horizontalRule
|
||||
|
||||
--
|
||||
-- Tables
|
||||
|
@ -793,9 +707,6 @@ orgToPandocTable :: OrgTable
|
|||
orgToPandocTable (OrgTable aligns heads lns) caption =
|
||||
B.table caption (zip aligns $ repeat 0) heads lns
|
||||
|
||||
tableStart :: OrgParser Char
|
||||
tableStart = try $ skipSpaces *> char '|'
|
||||
|
||||
tableRows :: OrgParser [OrgTableRow]
|
||||
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
|
||||
|
||||
|
@ -899,25 +810,12 @@ latexFragment = try $ do
|
|||
, "\\end{", e, "}\n"
|
||||
]
|
||||
|
||||
latexEnvStart :: OrgParser String
|
||||
latexEnvStart = try $ do
|
||||
skipSpaces *> string "\\begin{"
|
||||
*> latexEnvName
|
||||
<* string "}"
|
||||
<* blankline
|
||||
|
||||
latexEnd :: String -> OrgParser ()
|
||||
latexEnd envName = try $
|
||||
() <$ skipSpaces
|
||||
<* string ("\\end{" ++ envName ++ "}")
|
||||
<* blankline
|
||||
|
||||
-- | Parses a LaTeX environment name.
|
||||
latexEnvName :: OrgParser String
|
||||
latexEnvName = try $ do
|
||||
mappend <$> many1 alphaNum
|
||||
<*> option "" (string "*")
|
||||
|
||||
|
||||
--
|
||||
-- Footnote defintions
|
||||
|
@ -942,7 +840,7 @@ paraOrPlain = try $ do
|
|||
-- is directly followed by a list item, in which case the block is read as
|
||||
-- plain text.
|
||||
try (guard nl
|
||||
*> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
|
||||
*> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
|
||||
*> return (B.para <$> ils))
|
||||
<|> (return (B.plain <$> ils))
|
||||
|
||||
|
@ -971,38 +869,21 @@ orderedList :: OrgParser (F Blocks)
|
|||
orderedList = fmap B.orderedList . fmap compactify' . sequence
|
||||
<$> many1 (listItem orderedListStart)
|
||||
|
||||
genericListStart :: OrgParser String
|
||||
-> OrgParser Int
|
||||
genericListStart listMarker = try $
|
||||
(+) <$> (length <$> many spaceChar)
|
||||
<*> (length <$> listMarker <* many1 spaceChar)
|
||||
|
||||
-- parses bullet list marker. maybe we know the indent level
|
||||
bulletListStart :: OrgParser Int
|
||||
bulletListStart = bulletListStart' Nothing
|
||||
|
||||
bulletListStart' :: Maybe Int -> OrgParser Int
|
||||
-- returns length of bulletList prefix, inclusive of marker
|
||||
bulletListStart' Nothing = do ind <- length <$> many spaceChar
|
||||
when (ind == 0) $ notFollowedBy (char '*')
|
||||
oneOf bullets
|
||||
many1 spaceChar
|
||||
oneOf (bullets $ ind == 0)
|
||||
skipSpaces1
|
||||
return (ind + 1)
|
||||
-- Unindented lists are legal, but they can't use '*' bullets
|
||||
-- We return n to maintain compatibility with the generic listItem
|
||||
bulletListStart' (Just n) = do count (n-1) spaceChar
|
||||
when (n == 1) $ notFollowedBy (char '*')
|
||||
oneOf bullets
|
||||
oneOf (bullets $ n == 1)
|
||||
many1 spaceChar
|
||||
return n
|
||||
|
||||
bullets :: String
|
||||
bullets = "*+-"
|
||||
|
||||
orderedListStart :: OrgParser Int
|
||||
orderedListStart = genericListStart orderedListMarker
|
||||
-- Ordered list markers allowed in org-mode
|
||||
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
|
||||
-- Unindented lists are legal, but they can't use '*' bullets.
|
||||
-- We return n to maintain compatibility with the generic listItem.
|
||||
bullets :: Bool -> String
|
||||
bullets unindented = if unindented then "+-" else "*+-"
|
||||
|
||||
definitionListItem :: OrgParser Int
|
||||
-> OrgParser (F (Inlines, [Blocks]))
|
||||
|
@ -1040,602 +921,6 @@ listContinuation markerLength = try $
|
|||
<*> many blankline)
|
||||
where listLine = try $ indentWith markerLength *> anyLineNewline
|
||||
|
||||
-- | Parse any line, include the final newline in the output.
|
||||
anyLineNewline :: OrgParser String
|
||||
anyLineNewline = (++ "\n") <$> anyLine
|
||||
|
||||
|
||||
--
|
||||
-- inline
|
||||
--
|
||||
|
||||
inline :: OrgParser (F Inlines)
|
||||
inline =
|
||||
choice [ whitespace
|
||||
, linebreak
|
||||
, cite
|
||||
, footnote
|
||||
, linkOrImage
|
||||
, anchor
|
||||
, inlineCodeBlock
|
||||
, str
|
||||
, endline
|
||||
, emph
|
||||
, strong
|
||||
, strikeout
|
||||
, underline
|
||||
, code
|
||||
, math
|
||||
, displayMath
|
||||
, verbatim
|
||||
, subscript
|
||||
, superscript
|
||||
, inlineLaTeX
|
||||
, smart
|
||||
, symbol
|
||||
] <* (guard =<< newlinesCountWithinLimits)
|
||||
<?> "inline"
|
||||
|
||||
parseInlines :: OrgParser (F Inlines)
|
||||
parseInlines = trimInlinesF . mconcat <$> many1 inline
|
||||
|
||||
-- treat these as potentially non-text when parsing inline:
|
||||
specialChars :: [Char]
|
||||
specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
|
||||
|
||||
|
||||
whitespace :: OrgParser (F Inlines)
|
||||
whitespace = pure B.space <$ skipMany1 spaceChar
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
<?> "whitespace"
|
||||
|
||||
linebreak :: OrgParser (F Inlines)
|
||||
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
|
||||
|
||||
str :: OrgParser (F Inlines)
|
||||
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
||||
<* updateLastStrPos
|
||||
|
||||
-- | An endline character that can be treated as a space, not a structural
|
||||
-- break. This should reflect the values of the Emacs variable
|
||||
-- @org-element-pagaraph-separate@.
|
||||
endline :: OrgParser (F Inlines)
|
||||
endline = try $ do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
notFollowedBy' exampleLine
|
||||
notFollowedBy' hline
|
||||
notFollowedBy' noteMarker
|
||||
notFollowedBy' tableStart
|
||||
notFollowedBy' drawerStart
|
||||
notFollowedBy' headerStart
|
||||
notFollowedBy' metaLineStart
|
||||
notFollowedBy' latexEnvStart
|
||||
notFollowedBy' commentLineStart
|
||||
notFollowedBy' bulletListStart
|
||||
notFollowedBy' orderedListStart
|
||||
decEmphasisNewlinesCount
|
||||
guard =<< newlinesCountWithinLimits
|
||||
updateLastPreCharPos
|
||||
return . return $ B.softbreak
|
||||
|
||||
cite :: OrgParser (F Inlines)
|
||||
cite = try $ do
|
||||
guardEnabled Ext_citations
|
||||
(cs, raw) <- withRaw normalCite
|
||||
return $ (flip B.cite (B.text raw)) <$> cs
|
||||
|
||||
normalCite :: OrgParser (F [Citation])
|
||||
normalCite = try $ char '['
|
||||
*> skipSpaces
|
||||
*> citeList
|
||||
<* skipSpaces
|
||||
<* char ']'
|
||||
|
||||
citeList :: OrgParser (F [Citation])
|
||||
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
|
||||
|
||||
citation :: OrgParser (F Citation)
|
||||
citation = try $ do
|
||||
pref <- prefix
|
||||
(suppress_author, key) <- citeKey
|
||||
suff <- suffix
|
||||
return $ do
|
||||
x <- pref
|
||||
y <- suff
|
||||
return $ Citation{ citationId = key
|
||||
, citationPrefix = B.toList x
|
||||
, citationSuffix = B.toList y
|
||||
, citationMode = if suppress_author
|
||||
then SuppressAuthor
|
||||
else NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0
|
||||
}
|
||||
where
|
||||
prefix = trimInlinesF . mconcat <$>
|
||||
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
|
||||
suffix = try $ do
|
||||
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||
skipSpaces
|
||||
rest <- trimInlinesF . mconcat <$>
|
||||
many (notFollowedBy (oneOf ";]") *> inline)
|
||||
return $ if hasSpace
|
||||
then (B.space <>) <$> rest
|
||||
else rest
|
||||
|
||||
footnote :: OrgParser (F Inlines)
|
||||
footnote = try $ inlineNote <|> referencedNote
|
||||
|
||||
inlineNote :: OrgParser (F Inlines)
|
||||
inlineNote = try $ do
|
||||
string "[fn:"
|
||||
ref <- many alphaNum
|
||||
char ':'
|
||||
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
|
||||
when (not $ null ref) $
|
||||
addToNotesTable ("fn:" ++ ref, note)
|
||||
return $ B.note <$> note
|
||||
|
||||
referencedNote :: OrgParser (F Inlines)
|
||||
referencedNote = try $ do
|
||||
ref <- noteMarker
|
||||
return $ do
|
||||
notes <- asksF orgStateNotes'
|
||||
case lookup ref notes of
|
||||
Nothing -> return $ B.str $ "[" ++ ref ++ "]"
|
||||
Just contents -> do
|
||||
st <- askF
|
||||
let contents' = runF contents st{ orgStateNotes' = [] }
|
||||
return $ B.note contents'
|
||||
|
||||
noteMarker :: OrgParser String
|
||||
noteMarker = try $ do
|
||||
char '['
|
||||
choice [ many1Till digit (char ']')
|
||||
, (++) <$> string "fn:"
|
||||
<*> many1Till (noneOf "\n\r\t ") (char ']')
|
||||
]
|
||||
|
||||
linkOrImage :: OrgParser (F Inlines)
|
||||
linkOrImage = explicitOrImageLink
|
||||
<|> selflinkOrImage
|
||||
<|> angleLink
|
||||
<|> plainLink
|
||||
<?> "link or image"
|
||||
|
||||
explicitOrImageLink :: OrgParser (F Inlines)
|
||||
explicitOrImageLink = try $ do
|
||||
char '['
|
||||
srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
|
||||
title <- enclosedRaw (char '[') (char ']')
|
||||
title' <- parseFromString (mconcat <$> many inline) title
|
||||
char ']'
|
||||
return $ do
|
||||
src <- srcF
|
||||
if isImageFilename title
|
||||
then pure $ B.link src "" $ B.image title mempty mempty
|
||||
else linkToInlinesF src =<< title'
|
||||
|
||||
selflinkOrImage :: OrgParser (F Inlines)
|
||||
selflinkOrImage = try $ do
|
||||
src <- char '[' *> linkTarget <* char ']'
|
||||
return $ linkToInlinesF src (B.str src)
|
||||
|
||||
plainLink :: OrgParser (F Inlines)
|
||||
plainLink = try $ do
|
||||
(orig, src) <- uri
|
||||
returnF $ B.link src "" (B.str orig)
|
||||
|
||||
angleLink :: OrgParser (F Inlines)
|
||||
angleLink = try $ do
|
||||
char '<'
|
||||
link <- plainLink
|
||||
char '>'
|
||||
return link
|
||||
|
||||
selfTarget :: OrgParser String
|
||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
||||
linkTarget :: OrgParser String
|
||||
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
|
||||
|
||||
possiblyEmptyLinkTarget :: OrgParser String
|
||||
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
|
||||
|
||||
applyCustomLinkFormat :: String -> OrgParser (F String)
|
||||
applyCustomLinkFormat link = do
|
||||
let (linkType, rest) = break (== ':') link
|
||||
return $ do
|
||||
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
|
||||
return $ maybe link ($ drop 1 rest) formatter
|
||||
|
||||
-- | Take a link and return a function which produces new inlines when given
|
||||
-- description inlines.
|
||||
linkToInlinesF :: String -> Inlines -> F Inlines
|
||||
linkToInlinesF linkStr =
|
||||
case linkStr of
|
||||
"" -> pure . B.link mempty "" -- wiki link (empty by convention)
|
||||
('#':_) -> pure . B.link linkStr "" -- document-local fraction
|
||||
_ -> case cleanLinkString linkStr of
|
||||
(Just cleanedLink) -> if isImageFilename cleanedLink
|
||||
then const . pure $ B.image cleanedLink "" ""
|
||||
else pure . B.link cleanedLink ""
|
||||
Nothing -> internalLink linkStr -- other internal link
|
||||
|
||||
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
|
||||
-- the string does not appear to be a link.
|
||||
cleanLinkString :: String -> Maybe String
|
||||
cleanLinkString s =
|
||||
case s of
|
||||
'/':_ -> Just $ "file://" ++ s -- absolute path
|
||||
'.':'/':_ -> Just s -- relative path
|
||||
'.':'.':'/':_ -> Just s -- relative path
|
||||
-- Relative path or URL (file schema)
|
||||
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
|
||||
_ | isUrl s -> Just s -- URL
|
||||
_ -> Nothing
|
||||
where
|
||||
isUrl :: String -> Bool
|
||||
isUrl cs =
|
||||
let (scheme, path) = break (== ':') cs
|
||||
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
|
||||
&& not (null path)
|
||||
|
||||
isImageFilename :: String -> Bool
|
||||
isImageFilename filename =
|
||||
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
||||
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||
':' `notElem` filename)
|
||||
where
|
||||
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
||||
protocols = [ "file", "http", "https" ]
|
||||
|
||||
internalLink :: String -> Inlines -> F Inlines
|
||||
internalLink link title = do
|
||||
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
|
||||
if anchorB
|
||||
then return $ B.link ('#':link) "" title
|
||||
else return $ B.emph title
|
||||
|
||||
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
|
||||
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
|
||||
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
|
||||
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
|
||||
-- an anchor.
|
||||
|
||||
anchor :: OrgParser (F Inlines)
|
||||
anchor = try $ do
|
||||
anchorId <- parseAnchor
|
||||
recordAnchorId anchorId
|
||||
returnF $ B.spanWith (solidify anchorId, [], []) mempty
|
||||
where
|
||||
parseAnchor = string "<<"
|
||||
*> many1 (noneOf "\t\n\r<>\"' ")
|
||||
<* string ">>"
|
||||
<* skipSpaces
|
||||
|
||||
-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
|
||||
-- the org function @org-export-solidify-link-text@.
|
||||
|
||||
solidify :: String -> String
|
||||
solidify = map replaceSpecialChar
|
||||
where replaceSpecialChar c
|
||||
| isAlphaNum c = c
|
||||
| c `elem` ("_.-:" :: String) = c
|
||||
| otherwise = '-'
|
||||
|
||||
-- | Parses an inline code block and marks it as an babel block.
|
||||
inlineCodeBlock :: OrgParser (F Inlines)
|
||||
inlineCodeBlock = try $ do
|
||||
string "src_"
|
||||
lang <- many1 orgArgWordChar
|
||||
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
|
||||
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
|
||||
let attrClasses = [translateLang lang, rundocBlockClass]
|
||||
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
||||
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
||||
|
||||
enclosedByPair :: Char -- ^ opening char
|
||||
-> Char -- ^ closing char
|
||||
-> OrgParser a -- ^ parser
|
||||
-> OrgParser [a]
|
||||
enclosedByPair s e p = char s *> many1Till p (char e)
|
||||
|
||||
emph :: OrgParser (F Inlines)
|
||||
emph = fmap B.emph <$> emphasisBetween '/'
|
||||
|
||||
strong :: OrgParser (F Inlines)
|
||||
strong = fmap B.strong <$> emphasisBetween '*'
|
||||
|
||||
strikeout :: OrgParser (F Inlines)
|
||||
strikeout = fmap B.strikeout <$> emphasisBetween '+'
|
||||
|
||||
-- There is no underline, so we use strong instead.
|
||||
underline :: OrgParser (F Inlines)
|
||||
underline = fmap B.strong <$> emphasisBetween '_'
|
||||
|
||||
verbatim :: OrgParser (F Inlines)
|
||||
verbatim = return . B.code <$> verbatimBetween '='
|
||||
|
||||
code :: OrgParser (F Inlines)
|
||||
code = return . B.code <$> verbatimBetween '~'
|
||||
|
||||
subscript :: OrgParser (F Inlines)
|
||||
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
|
||||
|
||||
superscript :: OrgParser (F Inlines)
|
||||
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
|
||||
|
||||
math :: OrgParser (F Inlines)
|
||||
math = return . B.math <$> choice [ math1CharBetween '$'
|
||||
, mathStringBetween '$'
|
||||
, rawMathBetween "\\(" "\\)"
|
||||
]
|
||||
|
||||
displayMath :: OrgParser (F Inlines)
|
||||
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||
, rawMathBetween "$$" "$$"
|
||||
]
|
||||
|
||||
updatePositions :: Char
|
||||
-> OrgParser (Char)
|
||||
updatePositions c = do
|
||||
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
||||
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
||||
return c
|
||||
|
||||
symbol :: OrgParser (F Inlines)
|
||||
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||
|
||||
emphasisBetween :: Char
|
||||
-> OrgParser (F Inlines)
|
||||
emphasisBetween c = try $ do
|
||||
startEmphasisNewlinesCounting emphasisAllowedNewlines
|
||||
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
|
||||
isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
|
||||
when isTopLevelEmphasis
|
||||
resetEmphasisNewlines
|
||||
return res
|
||||
|
||||
verbatimBetween :: Char
|
||||
-> OrgParser String
|
||||
verbatimBetween c = try $
|
||||
emphasisStart c *>
|
||||
many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
|
||||
|
||||
-- | Parses a raw string delimited by @c@ using Org's math rules
|
||||
mathStringBetween :: Char
|
||||
-> OrgParser String
|
||||
mathStringBetween c = try $ do
|
||||
mathStart c
|
||||
body <- many1TillNOrLessNewlines mathAllowedNewlines
|
||||
(noneOf (c:"\n\r"))
|
||||
(lookAhead $ mathEnd c)
|
||||
final <- mathEnd c
|
||||
return $ body ++ [final]
|
||||
|
||||
-- | Parse a single character between @c@ using math rules
|
||||
math1CharBetween :: Char
|
||||
-> OrgParser String
|
||||
math1CharBetween c = try $ do
|
||||
char c
|
||||
res <- noneOf $ c:mathForbiddenBorderChars
|
||||
char c
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return [res]
|
||||
|
||||
rawMathBetween :: String
|
||||
-> String
|
||||
-> OrgParser String
|
||||
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
|
||||
|
||||
-- | Parses the start (opening character) of emphasis
|
||||
emphasisStart :: Char -> OrgParser Char
|
||||
emphasisStart c = try $ do
|
||||
guard =<< afterEmphasisPreChar
|
||||
guard =<< notAfterString
|
||||
char c
|
||||
lookAhead (noneOf emphasisForbiddenBorderChars)
|
||||
pushToInlineCharStack c
|
||||
return c
|
||||
|
||||
-- | Parses the closing character of emphasis
|
||||
emphasisEnd :: Char -> OrgParser Char
|
||||
emphasisEnd c = try $ do
|
||||
guard =<< notAfterForbiddenBorderChar
|
||||
char c
|
||||
eof <|> () <$ lookAhead acceptablePostChars
|
||||
updateLastStrPos
|
||||
popInlineCharStack
|
||||
return c
|
||||
where acceptablePostChars =
|
||||
surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
|
||||
|
||||
mathStart :: Char -> OrgParser Char
|
||||
mathStart c = try $
|
||||
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
||||
|
||||
mathEnd :: Char -> OrgParser Char
|
||||
mathEnd c = try $ do
|
||||
res <- noneOf (c:mathForbiddenBorderChars)
|
||||
char c
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return res
|
||||
|
||||
|
||||
enclosedInlines :: OrgParser a
|
||||
-> OrgParser b
|
||||
-> OrgParser (F Inlines)
|
||||
enclosedInlines start end = try $
|
||||
trimInlinesF . mconcat <$> enclosed start end inline
|
||||
|
||||
enclosedRaw :: OrgParser a
|
||||
-> OrgParser b
|
||||
-> OrgParser String
|
||||
enclosedRaw start end = try $
|
||||
start *> (onSingleLine <|> spanningTwoLines)
|
||||
where onSingleLine = try $ many1Till (noneOf "\n\r") end
|
||||
spanningTwoLines = try $
|
||||
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
|
||||
|
||||
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
|
||||
-- newlines.
|
||||
many1TillNOrLessNewlines :: Int
|
||||
-> OrgParser Char
|
||||
-> OrgParser a
|
||||
-> OrgParser String
|
||||
many1TillNOrLessNewlines n p end = try $
|
||||
nMoreLines (Just n) mempty >>= oneOrMore
|
||||
where
|
||||
nMoreLines Nothing cs = return cs
|
||||
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
||||
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||
>>= uncurry nMoreLines
|
||||
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
|
||||
finalLine = try $ manyTill p end
|
||||
minus1 k = k - 1
|
||||
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||
|
||||
-- Org allows customization of the way it reads emphasis. We use the defaults
|
||||
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
|
||||
-- for details).
|
||||
|
||||
-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
|
||||
emphasisPreChars :: [Char]
|
||||
emphasisPreChars = "\t \"'({"
|
||||
|
||||
-- | Chars allowed at after emphasis
|
||||
emphasisPostChars :: [Char]
|
||||
emphasisPostChars = "\t\n !\"'),-.:;?\\}"
|
||||
|
||||
-- | Chars not allowed at the (inner) border of emphasis
|
||||
emphasisForbiddenBorderChars :: [Char]
|
||||
emphasisForbiddenBorderChars = "\t\n\r \"',"
|
||||
|
||||
-- | The maximum number of newlines within
|
||||
emphasisAllowedNewlines :: Int
|
||||
emphasisAllowedNewlines = 1
|
||||
|
||||
-- LaTeX-style math: see `org-latex-regexps` for details
|
||||
|
||||
-- | Chars allowed after an inline ($...$) math statement
|
||||
mathPostChars :: [Char]
|
||||
mathPostChars = "\t\n \"'),-.:;?"
|
||||
|
||||
-- | Chars not allowed at the (inner) border of math
|
||||
mathForbiddenBorderChars :: [Char]
|
||||
mathForbiddenBorderChars = "\t\n\r ,;.$"
|
||||
|
||||
-- | Maximum number of newlines in an inline math statement
|
||||
mathAllowedNewlines :: Int
|
||||
mathAllowedNewlines = 2
|
||||
|
||||
-- | Whether we are right behind a char allowed before emphasis
|
||||
afterEmphasisPreChar :: OrgParser Bool
|
||||
afterEmphasisPreChar = do
|
||||
pos <- getPosition
|
||||
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||
return . fromMaybe True $ (== pos) <$> lastPrePos
|
||||
|
||||
-- | Whether the parser is right after a forbidden border char
|
||||
notAfterForbiddenBorderChar :: OrgParser Bool
|
||||
notAfterForbiddenBorderChar = do
|
||||
pos <- getPosition
|
||||
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
|
||||
return $ lastFBCPos /= Just pos
|
||||
|
||||
-- | Read a sub- or superscript expression
|
||||
subOrSuperExpr :: OrgParser (F Inlines)
|
||||
subOrSuperExpr = try $
|
||||
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
|
||||
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
|
||||
, simpleSubOrSuperString
|
||||
] >>= parseFromString (mconcat <$> many inline)
|
||||
where enclosing (left, right) s = left : s ++ [right]
|
||||
|
||||
simpleSubOrSuperString :: OrgParser String
|
||||
simpleSubOrSuperString = try $ do
|
||||
state <- getState
|
||||
guard . exportSubSuperscripts . orgStateExportSettings $ state
|
||||
choice [ string "*"
|
||||
, mappend <$> option [] ((:[]) <$> oneOf "+-")
|
||||
<*> many1 alphaNum
|
||||
]
|
||||
|
||||
inlineLaTeX :: OrgParser (F Inlines)
|
||||
inlineLaTeX = try $ do
|
||||
cmd <- inlineLaTeXCommand
|
||||
maybe mzero returnF $
|
||||
parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
|
||||
where
|
||||
parseAsMath :: String -> Maybe Inlines
|
||||
parseAsMath cs = B.fromList <$> texMathToPandoc cs
|
||||
|
||||
parseAsInlineLaTeX :: String -> Maybe Inlines
|
||||
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
|
||||
|
||||
parseAsMathMLSym :: String -> Maybe Inlines
|
||||
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
|
||||
-- drop initial backslash and any trailing "{}"
|
||||
where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
|
||||
|
||||
state :: ParserState
|
||||
state = def{ stateOptions = def{ readerParseRaw = True }}
|
||||
|
||||
texMathToPandoc :: String -> Maybe [Inline]
|
||||
texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
|
||||
|
||||
maybeRight :: Either a b -> Maybe b
|
||||
maybeRight = either (const Nothing) Just
|
||||
|
||||
inlineLaTeXCommand :: OrgParser String
|
||||
inlineLaTeXCommand = try $ do
|
||||
rest <- getInput
|
||||
case runParser rawLaTeXInline def "source" rest of
|
||||
Right (RawInline _ cs) -> do
|
||||
-- drop any trailing whitespace, those are not be part of the command as
|
||||
-- far as org mode is concerned.
|
||||
let cmdNoSpc = dropWhileEnd isSpace cs
|
||||
let len = length cmdNoSpc
|
||||
count len anyChar
|
||||
return cmdNoSpc
|
||||
_ -> mzero
|
||||
|
||||
-- Taken from Data.OldList.
|
||||
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
|
||||
dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
|
||||
|
||||
smart :: OrgParser (F Inlines)
|
||||
smart = do
|
||||
getOption readerSmart >>= guard
|
||||
doubleQuoted <|> singleQuoted <|>
|
||||
choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
|
||||
where
|
||||
orgDash = dash <* updatePositions '-'
|
||||
orgEllipses = ellipses <* updatePositions '.'
|
||||
orgApostrophe =
|
||||
(char '\'' <|> char '\8217') <* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
*> return (B.str "\x2019")
|
||||
|
||||
singleQuoted :: OrgParser (F Inlines)
|
||||
singleQuoted = try $ do
|
||||
singleQuoteStart
|
||||
updatePositions '\''
|
||||
withQuoteContext InSingleQuote $
|
||||
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
||||
many1Till inline (singleQuoteEnd <* updatePositions '\'')
|
||||
|
||||
-- doubleQuoted will handle regular double-quoted sections, as well
|
||||
-- as dialogues with an open double-quote without a close double-quote
|
||||
-- in the same paragraph.
|
||||
doubleQuoted :: OrgParser (F Inlines)
|
||||
doubleQuoted = try $ do
|
||||
doubleQuoteStart
|
||||
updatePositions '"'
|
||||
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
||||
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
|
||||
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
||||
<|> (return $ return (B.str "\8220") <> contents)
|
||||
|
|
112
src/Text/Pandoc/Readers/Org/BlockStarts.hs
Normal file
112
src/Text/Pandoc/Readers/Org/BlockStarts.hs
Normal file
|
@ -0,0 +1,112 @@
|
|||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Org.Options
|
||||
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
Parsers for Org-mode inline elements.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.BlockStarts
|
||||
( exampleLineStart
|
||||
, hline
|
||||
, noteMarker
|
||||
, tableStart
|
||||
, drawerStart
|
||||
, headerStart
|
||||
, metaLineStart
|
||||
, latexEnvStart
|
||||
, commentLineStart
|
||||
, bulletListStart
|
||||
, orderedListStart
|
||||
) where
|
||||
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
|
||||
-- | Horizontal Line (five -- dashes or more)
|
||||
hline :: OrgParser ()
|
||||
hline = try $ do
|
||||
skipSpaces
|
||||
string "-----"
|
||||
many (char '-')
|
||||
skipSpaces
|
||||
newline
|
||||
return ()
|
||||
|
||||
-- | Read the start of a header line, return the header level
|
||||
headerStart :: OrgParser Int
|
||||
headerStart = try $
|
||||
(length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
|
||||
|
||||
tableStart :: OrgParser Char
|
||||
tableStart = try $ skipSpaces *> char '|'
|
||||
|
||||
latexEnvStart :: OrgParser String
|
||||
latexEnvStart = try $ do
|
||||
skipSpaces *> string "\\begin{"
|
||||
*> latexEnvName
|
||||
<* string "}"
|
||||
<* blankline
|
||||
where
|
||||
latexEnvName :: OrgParser String
|
||||
latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
|
||||
|
||||
|
||||
-- | Parses bullet list marker.
|
||||
bulletListStart :: OrgParser ()
|
||||
bulletListStart = try $
|
||||
choice
|
||||
[ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
|
||||
, () <$ skipSpaces1 <* char '*' <* skipSpaces1
|
||||
]
|
||||
|
||||
genericListStart :: OrgParser String
|
||||
-> OrgParser Int
|
||||
genericListStart listMarker = try $
|
||||
(+) <$> (length <$> many spaceChar)
|
||||
<*> (length <$> listMarker <* many1 spaceChar)
|
||||
|
||||
orderedListStart :: OrgParser Int
|
||||
orderedListStart = genericListStart orderedListMarker
|
||||
-- Ordered list markers allowed in org-mode
|
||||
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
|
||||
|
||||
drawerStart :: OrgParser String
|
||||
drawerStart = try $
|
||||
skipSpaces *> drawerName <* skipSpaces <* newline
|
||||
where drawerName = char ':' *> manyTill nonspaceChar (char ':')
|
||||
|
||||
metaLineStart :: OrgParser ()
|
||||
metaLineStart = try $ skipSpaces <* string "#+"
|
||||
|
||||
commentLineStart :: OrgParser ()
|
||||
commentLineStart = try $ skipSpaces <* string "# "
|
||||
|
||||
exampleLineStart :: OrgParser ()
|
||||
exampleLineStart = () <$ try (skipSpaces *> string ": ")
|
||||
|
||||
noteMarker :: OrgParser String
|
||||
noteMarker = try $ do
|
||||
char '['
|
||||
choice [ many1Till digit (char ']')
|
||||
, (++) <$> string "fn:"
|
||||
<*> many1Till (noneOf "\n\r\t ") (char ']')
|
||||
]
|
715
src/Text/Pandoc/Readers/Org/Inlines.hs
Normal file
715
src/Text/Pandoc/Readers/Org/Inlines.hs
Normal file
|
@ -0,0 +1,715 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-
|
||||
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Org.Options
|
||||
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||
|
||||
Parsers for Org-mode inline elements.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.Inlines
|
||||
( inline
|
||||
, addToNotesTable
|
||||
, parseInlines
|
||||
, isImageFilename
|
||||
, linkTarget
|
||||
) where
|
||||
|
||||
import Text.Pandoc.Readers.Org.BlockStarts
|
||||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder ( Inlines )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Compat.Monoid ( (<>) )
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
|
||||
import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
|
||||
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
||||
|
||||
import Control.Arrow ( first )
|
||||
import Control.Monad ( guard, mplus, mzero, when )
|
||||
import Data.Char ( isAlphaNum, isSpace )
|
||||
import Data.List ( isPrefixOf, isSuffixOf )
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import qualified Data.Map as M
|
||||
|
||||
-- | Prefix used for Rundoc classes and arguments.
|
||||
rundocPrefix :: String
|
||||
rundocPrefix = "rundoc-"
|
||||
|
||||
-- | The class-name used to mark rundoc blocks.
|
||||
rundocBlockClass :: String
|
||||
rundocBlockClass = rundocPrefix ++ "block"
|
||||
|
||||
toRundocAttrib :: (String, String) -> (String, String)
|
||||
toRundocAttrib = first ("rundoc-" ++)
|
||||
|
||||
translateLang :: String -> String
|
||||
translateLang "C" = "c"
|
||||
translateLang "C++" = "cpp"
|
||||
translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
|
||||
translateLang "js" = "javascript"
|
||||
translateLang "lisp" = "commonlisp"
|
||||
translateLang "R" = "r"
|
||||
translateLang "sh" = "bash"
|
||||
translateLang "sqlite" = "sql"
|
||||
translateLang cs = cs
|
||||
|
||||
--
|
||||
-- Functions acting on the parser state
|
||||
--
|
||||
recordAnchorId :: String -> OrgParser ()
|
||||
recordAnchorId i = updateState $ \s ->
|
||||
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
|
||||
|
||||
pushToInlineCharStack :: Char -> OrgParser ()
|
||||
pushToInlineCharStack c = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
||||
|
||||
popInlineCharStack :: OrgParser ()
|
||||
popInlineCharStack = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
|
||||
|
||||
surroundingEmphasisChar :: OrgParser [Char]
|
||||
surroundingEmphasisChar =
|
||||
take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
|
||||
|
||||
startEmphasisNewlinesCounting :: Int -> OrgParser ()
|
||||
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = Just maxNewlines }
|
||||
|
||||
decEmphasisNewlinesCount :: OrgParser ()
|
||||
decEmphasisNewlinesCount = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
|
||||
|
||||
newlinesCountWithinLimits :: OrgParser Bool
|
||||
newlinesCountWithinLimits = do
|
||||
st <- getState
|
||||
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
|
||||
|
||||
resetEmphasisNewlines :: OrgParser ()
|
||||
resetEmphasisNewlines = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = Nothing }
|
||||
|
||||
addToNotesTable :: OrgNoteRecord -> OrgParser ()
|
||||
addToNotesTable note = do
|
||||
oldnotes <- orgStateNotes' <$> getState
|
||||
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
|
||||
|
||||
-- | Parse a single Org-mode inline element
|
||||
inline :: OrgParser (F Inlines)
|
||||
inline =
|
||||
choice [ whitespace
|
||||
, linebreak
|
||||
, cite
|
||||
, footnote
|
||||
, linkOrImage
|
||||
, anchor
|
||||
, inlineCodeBlock
|
||||
, str
|
||||
, endline
|
||||
, emph
|
||||
, strong
|
||||
, strikeout
|
||||
, underline
|
||||
, code
|
||||
, math
|
||||
, displayMath
|
||||
, verbatim
|
||||
, subscript
|
||||
, superscript
|
||||
, inlineLaTeX
|
||||
, smart
|
||||
, symbol
|
||||
] <* (guard =<< newlinesCountWithinLimits)
|
||||
<?> "inline"
|
||||
|
||||
parseInlines :: OrgParser (F Inlines)
|
||||
parseInlines = trimInlinesF . mconcat <$> many1 inline
|
||||
|
||||
-- treat these as potentially non-text when parsing inline:
|
||||
specialChars :: [Char]
|
||||
specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
|
||||
|
||||
|
||||
whitespace :: OrgParser (F Inlines)
|
||||
whitespace = pure B.space <$ skipMany1 spaceChar
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
<?> "whitespace"
|
||||
|
||||
linebreak :: OrgParser (F Inlines)
|
||||
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
|
||||
|
||||
str :: OrgParser (F Inlines)
|
||||
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
||||
<* updateLastStrPos
|
||||
|
||||
-- | An endline character that can be treated as a space, not a structural
|
||||
-- break. This should reflect the values of the Emacs variable
|
||||
-- @org-element-pagaraph-separate@.
|
||||
endline :: OrgParser (F Inlines)
|
||||
endline = try $ do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
notFollowedBy' exampleLineStart
|
||||
notFollowedBy' hline
|
||||
notFollowedBy' noteMarker
|
||||
notFollowedBy' tableStart
|
||||
notFollowedBy' drawerStart
|
||||
notFollowedBy' headerStart
|
||||
notFollowedBy' metaLineStart
|
||||
notFollowedBy' latexEnvStart
|
||||
notFollowedBy' commentLineStart
|
||||
notFollowedBy' bulletListStart
|
||||
notFollowedBy' orderedListStart
|
||||
decEmphasisNewlinesCount
|
||||
guard =<< newlinesCountWithinLimits
|
||||
updateLastPreCharPos
|
||||
return . return $ B.softbreak
|
||||
|
||||
cite :: OrgParser (F Inlines)
|
||||
cite = try $ do
|
||||
guardEnabled Ext_citations
|
||||
(cs, raw) <- withRaw normalCite
|
||||
return $ (flip B.cite (B.text raw)) <$> cs
|
||||
|
||||
normalCite :: OrgParser (F [Citation])
|
||||
normalCite = try $ char '['
|
||||
*> skipSpaces
|
||||
*> citeList
|
||||
<* skipSpaces
|
||||
<* char ']'
|
||||
|
||||
citeList :: OrgParser (F [Citation])
|
||||
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
|
||||
|
||||
citation :: OrgParser (F Citation)
|
||||
citation = try $ do
|
||||
pref <- prefix
|
||||
(suppress_author, key) <- citeKey
|
||||
suff <- suffix
|
||||
return $ do
|
||||
x <- pref
|
||||
y <- suff
|
||||
return $ Citation{ citationId = key
|
||||
, citationPrefix = B.toList x
|
||||
, citationSuffix = B.toList y
|
||||
, citationMode = if suppress_author
|
||||
then SuppressAuthor
|
||||
else NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0
|
||||
}
|
||||
where
|
||||
prefix = trimInlinesF . mconcat <$>
|
||||
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
|
||||
suffix = try $ do
|
||||
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||
skipSpaces
|
||||
rest <- trimInlinesF . mconcat <$>
|
||||
many (notFollowedBy (oneOf ";]") *> inline)
|
||||
return $ if hasSpace
|
||||
then (B.space <>) <$> rest
|
||||
else rest
|
||||
|
||||
footnote :: OrgParser (F Inlines)
|
||||
footnote = try $ inlineNote <|> referencedNote
|
||||
|
||||
inlineNote :: OrgParser (F Inlines)
|
||||
inlineNote = try $ do
|
||||
string "[fn:"
|
||||
ref <- many alphaNum
|
||||
char ':'
|
||||
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
|
||||
when (not $ null ref) $
|
||||
addToNotesTable ("fn:" ++ ref, note)
|
||||
return $ B.note <$> note
|
||||
|
||||
referencedNote :: OrgParser (F Inlines)
|
||||
referencedNote = try $ do
|
||||
ref <- noteMarker
|
||||
return $ do
|
||||
notes <- asksF orgStateNotes'
|
||||
case lookup ref notes of
|
||||
Nothing -> return $ B.str $ "[" ++ ref ++ "]"
|
||||
Just contents -> do
|
||||
st <- askF
|
||||
let contents' = runF contents st{ orgStateNotes' = [] }
|
||||
return $ B.note contents'
|
||||
|
||||
linkOrImage :: OrgParser (F Inlines)
|
||||
linkOrImage = explicitOrImageLink
|
||||
<|> selflinkOrImage
|
||||
<|> angleLink
|
||||
<|> plainLink
|
||||
<?> "link or image"
|
||||
|
||||
explicitOrImageLink :: OrgParser (F Inlines)
|
||||
explicitOrImageLink = try $ do
|
||||
char '['
|
||||
srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
|
||||
title <- enclosedRaw (char '[') (char ']')
|
||||
title' <- parseFromString (mconcat <$> many inline) title
|
||||
char ']'
|
||||
return $ do
|
||||
src <- srcF
|
||||
if isImageFilename title
|
||||
then pure $ B.link src "" $ B.image title mempty mempty
|
||||
else linkToInlinesF src =<< title'
|
||||
|
||||
selflinkOrImage :: OrgParser (F Inlines)
|
||||
selflinkOrImage = try $ do
|
||||
src <- char '[' *> linkTarget <* char ']'
|
||||
return $ linkToInlinesF src (B.str src)
|
||||
|
||||
plainLink :: OrgParser (F Inlines)
|
||||
plainLink = try $ do
|
||||
(orig, src) <- uri
|
||||
returnF $ B.link src "" (B.str orig)
|
||||
|
||||
angleLink :: OrgParser (F Inlines)
|
||||
angleLink = try $ do
|
||||
char '<'
|
||||
link <- plainLink
|
||||
char '>'
|
||||
return link
|
||||
|
||||
linkTarget :: OrgParser String
|
||||
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
|
||||
|
||||
possiblyEmptyLinkTarget :: OrgParser String
|
||||
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
|
||||
|
||||
applyCustomLinkFormat :: String -> OrgParser (F String)
|
||||
applyCustomLinkFormat link = do
|
||||
let (linkType, rest) = break (== ':') link
|
||||
return $ do
|
||||
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
|
||||
return $ maybe link ($ drop 1 rest) formatter
|
||||
|
||||
-- | Take a link and return a function which produces new inlines when given
|
||||
-- description inlines.
|
||||
linkToInlinesF :: String -> Inlines -> F Inlines
|
||||
linkToInlinesF linkStr =
|
||||
case linkStr of
|
||||
"" -> pure . B.link mempty "" -- wiki link (empty by convention)
|
||||
('#':_) -> pure . B.link linkStr "" -- document-local fraction
|
||||
_ -> case cleanLinkString linkStr of
|
||||
(Just cleanedLink) -> if isImageFilename cleanedLink
|
||||
then const . pure $ B.image cleanedLink "" ""
|
||||
else pure . B.link cleanedLink ""
|
||||
Nothing -> internalLink linkStr -- other internal link
|
||||
|
||||
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
|
||||
-- the string does not appear to be a link.
|
||||
cleanLinkString :: String -> Maybe String
|
||||
cleanLinkString s =
|
||||
case s of
|
||||
'/':_ -> Just $ "file://" ++ s -- absolute path
|
||||
'.':'/':_ -> Just s -- relative path
|
||||
'.':'.':'/':_ -> Just s -- relative path
|
||||
-- Relative path or URL (file schema)
|
||||
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
|
||||
_ | isUrl s -> Just s -- URL
|
||||
_ -> Nothing
|
||||
where
|
||||
isUrl :: String -> Bool
|
||||
isUrl cs =
|
||||
let (scheme, path) = break (== ':') cs
|
||||
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
|
||||
&& not (null path)
|
||||
|
||||
isImageFilename :: String -> Bool
|
||||
isImageFilename filename =
|
||||
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
||||
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||
':' `notElem` filename)
|
||||
where
|
||||
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
||||
protocols = [ "file", "http", "https" ]
|
||||
|
||||
internalLink :: String -> Inlines -> F Inlines
|
||||
internalLink link title = do
|
||||
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
|
||||
if anchorB
|
||||
then return $ B.link ('#':link) "" title
|
||||
else return $ B.emph title
|
||||
|
||||
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
|
||||
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
|
||||
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
|
||||
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
|
||||
-- an anchor.
|
||||
|
||||
anchor :: OrgParser (F Inlines)
|
||||
anchor = try $ do
|
||||
anchorId <- parseAnchor
|
||||
recordAnchorId anchorId
|
||||
returnF $ B.spanWith (solidify anchorId, [], []) mempty
|
||||
where
|
||||
parseAnchor = string "<<"
|
||||
*> many1 (noneOf "\t\n\r<>\"' ")
|
||||
<* string ">>"
|
||||
<* skipSpaces
|
||||
|
||||
-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
|
||||
-- the org function @org-export-solidify-link-text@.
|
||||
|
||||
solidify :: String -> String
|
||||
solidify = map replaceSpecialChar
|
||||
where replaceSpecialChar c
|
||||
| isAlphaNum c = c
|
||||
| c `elem` ("_.-:" :: String) = c
|
||||
| otherwise = '-'
|
||||
|
||||
-- | Parses an inline code block and marks it as an babel block.
|
||||
inlineCodeBlock :: OrgParser (F Inlines)
|
||||
inlineCodeBlock = try $ do
|
||||
string "src_"
|
||||
lang <- many1 orgArgWordChar
|
||||
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
|
||||
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
|
||||
let attrClasses = [translateLang lang, rundocBlockClass]
|
||||
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
||||
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
||||
where
|
||||
inlineBlockOption :: OrgParser (String, String)
|
||||
inlineBlockOption = try $ do
|
||||
argKey <- orgArgKey
|
||||
paramValue <- option "yes" orgInlineParamValue
|
||||
return (argKey, paramValue)
|
||||
|
||||
orgInlineParamValue :: OrgParser String
|
||||
orgInlineParamValue = try $
|
||||
skipSpaces
|
||||
*> notFollowedBy (char ':')
|
||||
*> many1 (noneOf "\t\n\r ]")
|
||||
<* skipSpaces
|
||||
|
||||
|
||||
|
||||
enclosedByPair :: Char -- ^ opening char
|
||||
-> Char -- ^ closing char
|
||||
-> OrgParser a -- ^ parser
|
||||
-> OrgParser [a]
|
||||
enclosedByPair s e p = char s *> many1Till p (char e)
|
||||
|
||||
emph :: OrgParser (F Inlines)
|
||||
emph = fmap B.emph <$> emphasisBetween '/'
|
||||
|
||||
strong :: OrgParser (F Inlines)
|
||||
strong = fmap B.strong <$> emphasisBetween '*'
|
||||
|
||||
strikeout :: OrgParser (F Inlines)
|
||||
strikeout = fmap B.strikeout <$> emphasisBetween '+'
|
||||
|
||||
-- There is no underline, so we use strong instead.
|
||||
underline :: OrgParser (F Inlines)
|
||||
underline = fmap B.strong <$> emphasisBetween '_'
|
||||
|
||||
verbatim :: OrgParser (F Inlines)
|
||||
verbatim = return . B.code <$> verbatimBetween '='
|
||||
|
||||
code :: OrgParser (F Inlines)
|
||||
code = return . B.code <$> verbatimBetween '~'
|
||||
|
||||
subscript :: OrgParser (F Inlines)
|
||||
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
|
||||
|
||||
superscript :: OrgParser (F Inlines)
|
||||
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
|
||||
|
||||
math :: OrgParser (F Inlines)
|
||||
math = return . B.math <$> choice [ math1CharBetween '$'
|
||||
, mathStringBetween '$'
|
||||
, rawMathBetween "\\(" "\\)"
|
||||
]
|
||||
|
||||
displayMath :: OrgParser (F Inlines)
|
||||
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||
, rawMathBetween "$$" "$$"
|
||||
]
|
||||
|
||||
updatePositions :: Char
|
||||
-> OrgParser (Char)
|
||||
updatePositions c = do
|
||||
when (c `elem` emphasisPreChars) updateLastPreCharPos
|
||||
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
|
||||
return c
|
||||
|
||||
symbol :: OrgParser (F Inlines)
|
||||
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||
|
||||
emphasisBetween :: Char
|
||||
-> OrgParser (F Inlines)
|
||||
emphasisBetween c = try $ do
|
||||
startEmphasisNewlinesCounting emphasisAllowedNewlines
|
||||
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
|
||||
isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
|
||||
when isTopLevelEmphasis
|
||||
resetEmphasisNewlines
|
||||
return res
|
||||
|
||||
verbatimBetween :: Char
|
||||
-> OrgParser String
|
||||
verbatimBetween c = try $
|
||||
emphasisStart c *>
|
||||
many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
|
||||
|
||||
-- | Parses a raw string delimited by @c@ using Org's math rules
|
||||
mathStringBetween :: Char
|
||||
-> OrgParser String
|
||||
mathStringBetween c = try $ do
|
||||
mathStart c
|
||||
body <- many1TillNOrLessNewlines mathAllowedNewlines
|
||||
(noneOf (c:"\n\r"))
|
||||
(lookAhead $ mathEnd c)
|
||||
final <- mathEnd c
|
||||
return $ body ++ [final]
|
||||
|
||||
-- | Parse a single character between @c@ using math rules
|
||||
math1CharBetween :: Char
|
||||
-> OrgParser String
|
||||
math1CharBetween c = try $ do
|
||||
char c
|
||||
res <- noneOf $ c:mathForbiddenBorderChars
|
||||
char c
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return [res]
|
||||
|
||||
rawMathBetween :: String
|
||||
-> String
|
||||
-> OrgParser String
|
||||
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
|
||||
|
||||
-- | Parses the start (opening character) of emphasis
|
||||
emphasisStart :: Char -> OrgParser Char
|
||||
emphasisStart c = try $ do
|
||||
guard =<< afterEmphasisPreChar
|
||||
guard =<< notAfterString
|
||||
char c
|
||||
lookAhead (noneOf emphasisForbiddenBorderChars)
|
||||
pushToInlineCharStack c
|
||||
return c
|
||||
|
||||
-- | Parses the closing character of emphasis
|
||||
emphasisEnd :: Char -> OrgParser Char
|
||||
emphasisEnd c = try $ do
|
||||
guard =<< notAfterForbiddenBorderChar
|
||||
char c
|
||||
eof <|> () <$ lookAhead acceptablePostChars
|
||||
updateLastStrPos
|
||||
popInlineCharStack
|
||||
return c
|
||||
where acceptablePostChars =
|
||||
surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
|
||||
|
||||
mathStart :: Char -> OrgParser Char
|
||||
mathStart c = try $
|
||||
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
||||
|
||||
mathEnd :: Char -> OrgParser Char
|
||||
mathEnd c = try $ do
|
||||
res <- noneOf (c:mathForbiddenBorderChars)
|
||||
char c
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return res
|
||||
|
||||
|
||||
enclosedInlines :: OrgParser a
|
||||
-> OrgParser b
|
||||
-> OrgParser (F Inlines)
|
||||
enclosedInlines start end = try $
|
||||
trimInlinesF . mconcat <$> enclosed start end inline
|
||||
|
||||
enclosedRaw :: OrgParser a
|
||||
-> OrgParser b
|
||||
-> OrgParser String
|
||||
enclosedRaw start end = try $
|
||||
start *> (onSingleLine <|> spanningTwoLines)
|
||||
where onSingleLine = try $ many1Till (noneOf "\n\r") end
|
||||
spanningTwoLines = try $
|
||||
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
|
||||
|
||||
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
|
||||
-- newlines.
|
||||
many1TillNOrLessNewlines :: Int
|
||||
-> OrgParser Char
|
||||
-> OrgParser a
|
||||
-> OrgParser String
|
||||
many1TillNOrLessNewlines n p end = try $
|
||||
nMoreLines (Just n) mempty >>= oneOrMore
|
||||
where
|
||||
nMoreLines Nothing cs = return cs
|
||||
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
||||
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||
>>= uncurry nMoreLines
|
||||
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
|
||||
finalLine = try $ manyTill p end
|
||||
minus1 k = k - 1
|
||||
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||
|
||||
-- Org allows customization of the way it reads emphasis. We use the defaults
|
||||
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
|
||||
-- for details).
|
||||
|
||||
-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
|
||||
emphasisPreChars :: [Char]
|
||||
emphasisPreChars = "\t \"'({"
|
||||
|
||||
-- | Chars allowed at after emphasis
|
||||
emphasisPostChars :: [Char]
|
||||
emphasisPostChars = "\t\n !\"'),-.:;?\\}"
|
||||
|
||||
-- | Chars not allowed at the (inner) border of emphasis
|
||||
emphasisForbiddenBorderChars :: [Char]
|
||||
emphasisForbiddenBorderChars = "\t\n\r \"',"
|
||||
|
||||
-- | The maximum number of newlines within
|
||||
emphasisAllowedNewlines :: Int
|
||||
emphasisAllowedNewlines = 1
|
||||
|
||||
-- LaTeX-style math: see `org-latex-regexps` for details
|
||||
|
||||
-- | Chars allowed after an inline ($...$) math statement
|
||||
mathPostChars :: [Char]
|
||||
mathPostChars = "\t\n \"'),-.:;?"
|
||||
|
||||
-- | Chars not allowed at the (inner) border of math
|
||||
mathForbiddenBorderChars :: [Char]
|
||||
mathForbiddenBorderChars = "\t\n\r ,;.$"
|
||||
|
||||
-- | Maximum number of newlines in an inline math statement
|
||||
mathAllowedNewlines :: Int
|
||||
mathAllowedNewlines = 2
|
||||
|
||||
-- | Whether we are right behind a char allowed before emphasis
|
||||
afterEmphasisPreChar :: OrgParser Bool
|
||||
afterEmphasisPreChar = do
|
||||
pos <- getPosition
|
||||
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||
return . fromMaybe True $ (== pos) <$> lastPrePos
|
||||
|
||||
-- | Whether the parser is right after a forbidden border char
|
||||
notAfterForbiddenBorderChar :: OrgParser Bool
|
||||
notAfterForbiddenBorderChar = do
|
||||
pos <- getPosition
|
||||
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
|
||||
return $ lastFBCPos /= Just pos
|
||||
|
||||
-- | Read a sub- or superscript expression
|
||||
subOrSuperExpr :: OrgParser (F Inlines)
|
||||
subOrSuperExpr = try $
|
||||
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
|
||||
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
|
||||
, simpleSubOrSuperString
|
||||
] >>= parseFromString (mconcat <$> many inline)
|
||||
where enclosing (left, right) s = left : s ++ [right]
|
||||
|
||||
simpleSubOrSuperString :: OrgParser String
|
||||
simpleSubOrSuperString = try $ do
|
||||
state <- getState
|
||||
guard . exportSubSuperscripts . orgStateExportSettings $ state
|
||||
choice [ string "*"
|
||||
, mappend <$> option [] ((:[]) <$> oneOf "+-")
|
||||
<*> many1 alphaNum
|
||||
]
|
||||
|
||||
inlineLaTeX :: OrgParser (F Inlines)
|
||||
inlineLaTeX = try $ do
|
||||
cmd <- inlineLaTeXCommand
|
||||
maybe mzero returnF $
|
||||
parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
|
||||
where
|
||||
parseAsMath :: String -> Maybe Inlines
|
||||
parseAsMath cs = B.fromList <$> texMathToPandoc cs
|
||||
|
||||
parseAsInlineLaTeX :: String -> Maybe Inlines
|
||||
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
|
||||
|
||||
parseAsMathMLSym :: String -> Maybe Inlines
|
||||
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
|
||||
-- drop initial backslash and any trailing "{}"
|
||||
where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
|
||||
|
||||
state :: ParserState
|
||||
state = def{ stateOptions = def{ readerParseRaw = True }}
|
||||
|
||||
texMathToPandoc :: String -> Maybe [Inline]
|
||||
texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
|
||||
|
||||
maybeRight :: Either a b -> Maybe b
|
||||
maybeRight = either (const Nothing) Just
|
||||
|
||||
inlineLaTeXCommand :: OrgParser String
|
||||
inlineLaTeXCommand = try $ do
|
||||
rest <- getInput
|
||||
case runParser rawLaTeXInline def "source" rest of
|
||||
Right (RawInline _ cs) -> do
|
||||
-- drop any trailing whitespace, those are not be part of the command as
|
||||
-- far as org mode is concerned.
|
||||
let cmdNoSpc = dropWhileEnd isSpace cs
|
||||
let len = length cmdNoSpc
|
||||
count len anyChar
|
||||
return cmdNoSpc
|
||||
_ -> mzero
|
||||
|
||||
-- Taken from Data.OldList.
|
||||
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
|
||||
dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
|
||||
|
||||
smart :: OrgParser (F Inlines)
|
||||
smart = do
|
||||
getOption readerSmart >>= guard
|
||||
doubleQuoted <|> singleQuoted <|>
|
||||
choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
|
||||
where
|
||||
orgDash = dash <* updatePositions '-'
|
||||
orgEllipses = ellipses <* updatePositions '.'
|
||||
orgApostrophe =
|
||||
(char '\'' <|> char '\8217') <* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
*> return (B.str "\x2019")
|
||||
|
||||
singleQuoted :: OrgParser (F Inlines)
|
||||
singleQuoted = try $ do
|
||||
singleQuoteStart
|
||||
updatePositions '\''
|
||||
withQuoteContext InSingleQuote $
|
||||
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
||||
many1Till inline (singleQuoteEnd <* updatePositions '\'')
|
||||
|
||||
-- doubleQuoted will handle regular double-quoted sections, as well
|
||||
-- as dialogues with an open double-quote without a close double-quote
|
||||
-- in the same paragraph.
|
||||
doubleQuoted :: OrgParser (F Inlines)
|
||||
doubleQuoted = try $ do
|
||||
doubleQuoteStart
|
||||
updatePositions '"'
|
||||
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
||||
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
|
||||
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
||||
<|> (return $ return (B.str "\8220") <> contents)
|
|
@ -34,10 +34,14 @@ module Text.Pandoc.Readers.Org.Parsing
|
|||
, blanklines
|
||||
, newline
|
||||
, parseFromString
|
||||
, skipSpaces1
|
||||
, inList
|
||||
, withContext
|
||||
, updateLastForbiddenCharPos
|
||||
, updateLastPreCharPos
|
||||
, orgArgKey
|
||||
, orgArgWord
|
||||
, orgArgWordChar
|
||||
-- * Re-exports from Text.Pandoc.Parser
|
||||
, ParserContext (..)
|
||||
, many1Till
|
||||
|
@ -133,6 +137,10 @@ parseFromString parser str' = do
|
|||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||
return result
|
||||
|
||||
-- | Skip one or more tab or space characters.
|
||||
skipSpaces1 :: OrgParser ()
|
||||
skipSpaces1 = skipMany1 spaceChar
|
||||
|
||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||
newline :: OrgParser Char
|
||||
newline =
|
||||
|
@ -180,3 +188,14 @@ updateLastForbiddenCharPos = getPosition >>= \p ->
|
|||
updateLastPreCharPos :: OrgParser ()
|
||||
updateLastPreCharPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
||||
|
||||
orgArgKey :: OrgParser String
|
||||
orgArgKey = try $
|
||||
skipSpaces *> char ':'
|
||||
*> many1 orgArgWordChar
|
||||
|
||||
orgArgWord :: OrgParser String
|
||||
orgArgWord = many1 orgArgWordChar
|
||||
|
||||
orgArgWordChar :: OrgParser Char
|
||||
orgArgWordChar = alphaNum <|> oneOf "-_"
|
||||
|
|
Loading…
Add table
Reference in a new issue