Merge pull request #1256 from tarleb/org-reader-improvements
Org reader improvements
This commit is contained in:
commit
6a2361c457
5 changed files with 641 additions and 252 deletions
|
@ -55,6 +55,13 @@ Copyright (C) 2010 Paul Rivier
|
|||
|
||||
Released under the GPL.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
src/Text/Pandoc/Readers/Org.hs
|
||||
tests/Tests/Readers/Org.hs
|
||||
Copyright (C) 2014 Albert Krewinkel
|
||||
|
||||
Released under the GPL.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
src/Text/Pandoc/Biblio.hs
|
||||
Copyright (C) 2008-2010 Andrea Rossato
|
||||
|
|
|
@ -861,22 +861,6 @@ definitionList = do
|
|||
items <- fmap sequence $ many1 definitionListItem
|
||||
return $ B.definitionList <$> fmap compactify'DL items
|
||||
|
||||
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
|
||||
compactify'DL items =
|
||||
let defs = concatMap snd items
|
||||
defBlocks = reverse $ concatMap B.toList defs
|
||||
isPara (Para _) = True
|
||||
isPara _ = False
|
||||
in case defBlocks of
|
||||
(Para x:_) -> if not $ any isPara (drop 1 defBlocks)
|
||||
then let (t,ds) = last items
|
||||
lastDef = B.toList $ last ds
|
||||
ds' = init ds ++
|
||||
[B.fromList $ init lastDef ++ [Plain x]]
|
||||
in init items ++ [(t, ds')]
|
||||
else items
|
||||
_ -> items
|
||||
|
||||
--
|
||||
-- paragraph block
|
||||
--
|
||||
|
@ -1892,4 +1876,3 @@ doubleQuoted = try $ do
|
|||
(withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
|
||||
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
||||
<|> (return $ return (B.str "\8220") <> contents)
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-
|
||||
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
|
||||
|
||||
|
@ -24,26 +25,32 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
|
||||
|
||||
Conversion of Org-Mode to 'Pandoc' document.
|
||||
Conversion of org-mode formatted plain text to 'Pandoc' document.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org ( readOrg ) where
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
|
||||
, trimInlines )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import qualified Text.Pandoc.Parsing as P
|
||||
import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
|
||||
import Text.Pandoc.Shared (compactify')
|
||||
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
|
||||
, newline, orderedListMarker
|
||||
, parseFromString
|
||||
, updateLastStrPos )
|
||||
import Text.Pandoc.Shared (compactify', compactify'DL)
|
||||
|
||||
import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
|
||||
import Control.Arrow ((***))
|
||||
import Control.Monad (guard, when)
|
||||
import Control.Applicative ( Applicative, pure
|
||||
, (<$>), (<$), (<*>), (<*), (*>), (<**>) )
|
||||
import Control.Monad (foldM, guard, liftM, liftM2, when)
|
||||
import Control.Monad.Reader (Reader, runReader, ask, asks)
|
||||
import Data.Char (toLower)
|
||||
import Data.Default
|
||||
import Data.List (foldl', isPrefixOf, isSuffixOf)
|
||||
import Data.Maybe (listToMaybe, fromMaybe)
|
||||
import Data.Monoid (mconcat, mempty, mappend)
|
||||
import Data.List (intersperse, isPrefixOf, isSuffixOf)
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe (listToMaybe, fromMaybe, isJust)
|
||||
import Data.Monoid (Monoid, mconcat, mempty, mappend)
|
||||
|
||||
-- | Parse org-mode string and return a Pandoc document.
|
||||
readOrg :: ReaderOptions -- ^ Reader options
|
||||
|
@ -53,27 +60,35 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
|||
|
||||
type OrgParser = Parser [Char] OrgParserState
|
||||
|
||||
parseOrg:: OrgParser Pandoc
|
||||
parseOrg :: OrgParser Pandoc
|
||||
parseOrg = do
|
||||
blocks' <- B.toList <$> parseBlocks
|
||||
blocks' <- parseBlocks
|
||||
st <- getState
|
||||
let meta = orgStateMeta st
|
||||
return $ Pandoc meta $ filter (/= Null) blocks'
|
||||
let meta = runF (orgStateMeta' st) st
|
||||
return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
|
||||
|
||||
--
|
||||
-- Parser State for Org
|
||||
--
|
||||
|
||||
type OrgNoteRecord = (String, F Blocks)
|
||||
type OrgNoteTable = [OrgNoteRecord]
|
||||
|
||||
type OrgBlockAttributes = M.Map String String
|
||||
|
||||
-- | Org-mode parser state
|
||||
data OrgParserState = OrgParserState
|
||||
{ orgStateOptions :: ReaderOptions
|
||||
, orgStateBlockAttributes :: OrgBlockAttributes
|
||||
, orgStateEmphasisCharStack :: [Char]
|
||||
, orgStateEmphasisNewlines :: Maybe Int
|
||||
, orgStateLastForbiddenCharPos :: Maybe SourcePos
|
||||
, orgStateLastPreCharPos :: Maybe SourcePos
|
||||
, orgStateLastStrPos :: Maybe SourcePos
|
||||
, orgStateMeta :: Meta
|
||||
} deriving (Show)
|
||||
, orgStateMeta' :: F Meta
|
||||
, orgStateNotes' :: OrgNoteTable
|
||||
}
|
||||
|
||||
instance HasReaderOptions OrgParserState where
|
||||
extractReaderOptions = orgStateOptions
|
||||
|
@ -90,14 +105,30 @@ instance Default OrgParserState where
|
|||
defaultOrgParserState :: OrgParserState
|
||||
defaultOrgParserState = OrgParserState
|
||||
{ orgStateOptions = def
|
||||
, orgStateBlockAttributes = M.empty
|
||||
, orgStateEmphasisCharStack = []
|
||||
, orgStateEmphasisNewlines = Nothing
|
||||
, orgStateLastForbiddenCharPos = Nothing
|
||||
, orgStateLastPreCharPos = Nothing
|
||||
, orgStateLastStrPos = Nothing
|
||||
, orgStateMeta = nullMeta
|
||||
, orgStateMeta' = return nullMeta
|
||||
, orgStateNotes' = []
|
||||
}
|
||||
|
||||
addBlockAttribute :: String -> String -> OrgParser ()
|
||||
addBlockAttribute key val = updateState $ \s ->
|
||||
let attrs = orgStateBlockAttributes s
|
||||
in s{ orgStateBlockAttributes = M.insert key val attrs }
|
||||
|
||||
lookupBlockAttribute :: String -> OrgParser (Maybe String)
|
||||
lookupBlockAttribute key =
|
||||
M.lookup key . orgStateBlockAttributes <$> getState
|
||||
|
||||
resetBlockAttributes :: OrgParser ()
|
||||
resetBlockAttributes = updateState $ \s ->
|
||||
s{ orgStateBlockAttributes = orgStateBlockAttributes def }
|
||||
|
||||
updateLastStrPos :: OrgParser ()
|
||||
updateLastStrPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ orgStateLastStrPos = Just p }
|
||||
|
@ -111,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p ->
|
|||
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
||||
|
||||
pushToInlineCharStack :: Char -> OrgParser ()
|
||||
pushToInlineCharStack c = updateState $ \st ->
|
||||
st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
|
||||
pushToInlineCharStack c = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
|
||||
|
||||
popInlineCharStack :: OrgParser ()
|
||||
popInlineCharStack = updateState $ \st ->
|
||||
st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
|
||||
popInlineCharStack = updateState $ \s ->
|
||||
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
|
||||
|
||||
surroundingEmphasisChar :: OrgParser [Char]
|
||||
surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
|
||||
|
||||
startEmphasisNewlinesCounting :: Int -> OrgParser ()
|
||||
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
|
||||
s { orgStateEmphasisNewlines = Just maxNewlines }
|
||||
s{ orgStateEmphasisNewlines = Just maxNewlines }
|
||||
|
||||
decEmphasisNewlinesCount :: OrgParser ()
|
||||
decEmphasisNewlinesCount = updateState $ \s ->
|
||||
|
@ -138,6 +169,48 @@ resetEmphasisNewlines :: OrgParser ()
|
|||
resetEmphasisNewlines = updateState $ \s ->
|
||||
s{ orgStateEmphasisNewlines = Nothing }
|
||||
|
||||
addToNotesTable :: OrgNoteRecord -> OrgParser ()
|
||||
addToNotesTable note = do
|
||||
oldnotes <- orgStateNotes' <$> getState
|
||||
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
|
||||
|
||||
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
|
||||
-- of the state saved and restored.
|
||||
parseFromString :: OrgParser a -> String -> OrgParser a
|
||||
parseFromString parser str' = do
|
||||
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
|
||||
result <- P.parseFromString parser str'
|
||||
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
|
||||
return result
|
||||
|
||||
|
||||
--
|
||||
-- Adaptions and specializations of parsing utilities
|
||||
--
|
||||
|
||||
newtype F a = F { unF :: Reader OrgParserState a
|
||||
} deriving (Monad, Applicative, Functor)
|
||||
|
||||
runF :: F a -> OrgParserState -> a
|
||||
runF = runReader . unF
|
||||
|
||||
askF :: F OrgParserState
|
||||
askF = F ask
|
||||
|
||||
asksF :: (OrgParserState -> a) -> F a
|
||||
asksF f = F $ asks f
|
||||
|
||||
instance Monoid a => Monoid (F a) where
|
||||
mempty = return mempty
|
||||
mappend = liftM2 mappend
|
||||
mconcat = fmap mconcat . sequence
|
||||
|
||||
trimInlinesF :: F Inlines -> F Inlines
|
||||
trimInlinesF = liftM trimInlines
|
||||
|
||||
|
||||
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
|
||||
newline :: OrgParser Char
|
||||
newline =
|
||||
P.newline
|
||||
|
@ -148,37 +221,83 @@ newline =
|
|||
-- parsing blocks
|
||||
--
|
||||
|
||||
parseBlocks :: OrgParser Blocks
|
||||
parseBlocks :: OrgParser (F Blocks)
|
||||
parseBlocks = mconcat <$> manyTill block eof
|
||||
|
||||
block :: OrgParser Blocks
|
||||
block :: OrgParser (F Blocks)
|
||||
block = choice [ mempty <$ blanklines
|
||||
, orgBlock
|
||||
, optionalAttributes $ choice
|
||||
[ orgBlock
|
||||
, figure
|
||||
, table
|
||||
]
|
||||
, example
|
||||
, drawer
|
||||
, figure
|
||||
, specialLine
|
||||
, header
|
||||
, hline
|
||||
, return <$> hline
|
||||
, list
|
||||
, table
|
||||
, latexFragment
|
||||
, noteBlock
|
||||
, paraOrPlain
|
||||
] <?> "block"
|
||||
|
||||
optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
|
||||
optionalAttributes parser = try $
|
||||
resetBlockAttributes *> parseBlockAttributes *> parser
|
||||
|
||||
parseBlockAttributes :: OrgParser ()
|
||||
parseBlockAttributes = do
|
||||
attrs <- many attribute
|
||||
() <$ mapM (uncurry parseAndAddAttribute) attrs
|
||||
where
|
||||
attribute :: OrgParser (String, String)
|
||||
attribute = try $ do
|
||||
key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
|
||||
val <- skipSpaces *> anyLine
|
||||
return (map toLower key, val)
|
||||
|
||||
parseAndAddAttribute :: String -> String -> OrgParser ()
|
||||
parseAndAddAttribute key value = do
|
||||
let key' = map toLower key
|
||||
() <$ addBlockAttribute key' value
|
||||
|
||||
lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
|
||||
lookupInlinesAttr attr = try $ do
|
||||
val <- lookupBlockAttribute attr
|
||||
maybe (return Nothing)
|
||||
(fmap Just . parseFromString parseInlines)
|
||||
val
|
||||
|
||||
|
||||
--
|
||||
-- Org Blocks (#+BEGIN_... / #+END_...)
|
||||
--
|
||||
|
||||
orgBlock :: OrgParser Blocks
|
||||
orgBlock :: OrgParser (F Blocks)
|
||||
orgBlock = try $ do
|
||||
(indent, blockType, args) <- blockHeader
|
||||
blockStr <- rawBlockContent indent blockType
|
||||
content <- rawBlockContent indent blockType
|
||||
contentBlocks <- parseFromString parseBlocks (content ++ "\n")
|
||||
let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
|
||||
case blockType of
|
||||
"comment" -> return mempty
|
||||
"src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr
|
||||
_ -> B.divWith ("", [blockType], [])
|
||||
<$> parseFromString parseBlocks blockStr
|
||||
"html" -> returnF $ B.rawBlock "html" content
|
||||
"latex" -> returnF $ B.rawBlock "latex" content
|
||||
"ascii" -> returnF $ B.rawBlock "ascii" content
|
||||
"example" -> returnF $ exampleCode content
|
||||
"quote" -> return $ B.blockQuote <$> contentBlocks
|
||||
"verse" -> parseVerse content
|
||||
"src" -> codeBlockWithAttr classArgs content
|
||||
_ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks
|
||||
where
|
||||
returnF :: a -> OrgParser (F a)
|
||||
returnF = return . return
|
||||
|
||||
parseVerse :: String -> OrgParser (F Blocks)
|
||||
parseVerse cs =
|
||||
fmap B.para . mconcat . intersperse (pure B.linebreak)
|
||||
<$> mapM (parseFromString parseInlines) (lines cs)
|
||||
|
||||
blockHeader :: OrgParser (Int, String, [String])
|
||||
blockHeader = (,,) <$> blockIndent
|
||||
|
@ -188,6 +307,18 @@ blockHeader = (,,) <$> blockIndent
|
|||
blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
|
||||
blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
|
||||
|
||||
codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
|
||||
codeBlockWithAttr classArgs content = do
|
||||
identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
|
||||
caption <- lookupInlinesAttr "caption"
|
||||
let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
|
||||
return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
|
||||
where
|
||||
labelDiv blk value =
|
||||
B.divWith nullAttr <$> (mappend <$> labelledBlock value
|
||||
<*> pure blk)
|
||||
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
|
||||
|
||||
rawBlockContent :: Int -> String -> OrgParser String
|
||||
rawBlockContent indent blockType =
|
||||
unlines . map commaEscaped <$> manyTill indentedLine blockEnder
|
||||
|
@ -222,15 +353,18 @@ commaEscaped (',':cs@('*':_)) = cs
|
|||
commaEscaped (',':cs@('#':'+':_)) = cs
|
||||
commaEscaped cs = cs
|
||||
|
||||
example :: OrgParser Blocks
|
||||
example = try $
|
||||
B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
|
||||
example :: OrgParser (F Blocks)
|
||||
example = try $ do
|
||||
return . return . exampleCode =<< unlines <$> many1 exampleLine
|
||||
|
||||
exampleCode :: String -> Blocks
|
||||
exampleCode = B.codeBlockWith ("", ["example"], [])
|
||||
|
||||
exampleLine :: OrgParser String
|
||||
exampleLine = try $ string ": " *> anyLine
|
||||
|
||||
-- Drawers for properties or a logbook
|
||||
drawer :: OrgParser Blocks
|
||||
drawer :: OrgParser (F Blocks)
|
||||
drawer = try $ do
|
||||
drawerStart
|
||||
manyTill drawerLine (try drawerEnd)
|
||||
|
@ -256,41 +390,31 @@ drawerEnd = try $
|
|||
--
|
||||
|
||||
-- Figures (Image on a line by itself, preceded by name and/or caption)
|
||||
figure :: OrgParser Blocks
|
||||
figure :: OrgParser (F Blocks)
|
||||
figure = try $ do
|
||||
(tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
|
||||
<$> nameAndOrCaption
|
||||
(cap, nam) <- nameAndCaption
|
||||
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
|
||||
guard (isImageFilename src)
|
||||
return . B.para $ B.image src tit cap
|
||||
where withFigPrefix cs = if "fig:" `isPrefixOf` cs
|
||||
then cs
|
||||
else "fig:" ++ cs
|
||||
|
||||
nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines)
|
||||
nameAndOrCaption = try $ nameFirst <|> captionFirst
|
||||
return $ do
|
||||
cap' <- cap
|
||||
return $ B.para $ B.image src nam cap'
|
||||
where
|
||||
nameFirst = try $ do
|
||||
n <- name
|
||||
c <- optionMaybe caption
|
||||
return (Just n, c)
|
||||
captionFirst = try $ do
|
||||
c <- caption
|
||||
n <- optionMaybe name
|
||||
return (n, Just c)
|
||||
|
||||
caption :: OrgParser Inlines
|
||||
caption = try $ annotation "CAPTION" *> inlinesTillNewline
|
||||
|
||||
name :: OrgParser String
|
||||
name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline
|
||||
|
||||
annotation :: String -> OrgParser String
|
||||
annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
|
||||
nameAndCaption =
|
||||
do
|
||||
maybeCap <- lookupInlinesAttr "caption"
|
||||
maybeNam <- lookupBlockAttribute "name"
|
||||
guard $ isJust maybeCap || isJust maybeNam
|
||||
return ( fromMaybe mempty maybeCap
|
||||
, maybe mempty withFigPrefix maybeNam )
|
||||
withFigPrefix cs =
|
||||
if "fig:" `isPrefixOf` cs
|
||||
then cs
|
||||
else "fig:" ++ cs
|
||||
|
||||
--
|
||||
-- Comments, Options and Metadata
|
||||
specialLine :: OrgParser Blocks
|
||||
specialLine = try $ metaLine <|> commentLine
|
||||
specialLine :: OrgParser (F Blocks)
|
||||
specialLine = fmap return . try $ metaLine <|> commentLine
|
||||
|
||||
metaLine :: OrgParser Blocks
|
||||
metaLine = try $ metaLineStart *> declarationLine
|
||||
|
@ -308,29 +432,41 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
|
|||
|
||||
declarationLine :: OrgParser Blocks
|
||||
declarationLine = try $ do
|
||||
meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
|
||||
updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' }
|
||||
key <- metaKey
|
||||
inlinesF <- metaInlines
|
||||
updateState $ \st ->
|
||||
let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
|
||||
in st { orgStateMeta' = orgStateMeta' st <> meta' }
|
||||
return mempty
|
||||
|
||||
metaValue :: OrgParser MetaValue
|
||||
metaValue = MetaInlines . B.toList <$> inlinesTillNewline
|
||||
metaInlines :: OrgParser (F MetaValue)
|
||||
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
|
||||
|
||||
metaKey :: OrgParser String
|
||||
metaKey = map toLower <$> many1 (noneOf ": \n\r")
|
||||
<* char ':'
|
||||
<* skipSpaces
|
||||
|
||||
--
|
||||
-- Headers
|
||||
--
|
||||
|
||||
-- | Headers
|
||||
header :: OrgParser Blocks
|
||||
header = try $
|
||||
B.header <$> headerStart
|
||||
<*> inlinesTillNewline
|
||||
header :: OrgParser (F Blocks)
|
||||
header = try $ do
|
||||
level <- headerStart
|
||||
title <- inlinesTillNewline
|
||||
return $ B.header level <$> title
|
||||
|
||||
headerStart :: OrgParser Int
|
||||
headerStart = try $
|
||||
(length <$> many1 (char '*')) <* many1 (char ' ')
|
||||
|
||||
-- Horizontal Line (five dashes or more)
|
||||
|
||||
-- Don't use (or need) the reader wrapper here, we want hline to be
|
||||
-- @show@able. Otherwise we can't use it with @notFollowedBy'@.
|
||||
|
||||
-- | Horizontal Line (five -- dashes or more)
|
||||
hline :: OrgParser Blocks
|
||||
hline = try $ do
|
||||
skipSpaces
|
||||
|
@ -344,27 +480,30 @@ hline = try $ do
|
|||
-- Tables
|
||||
--
|
||||
|
||||
data OrgTableRow = OrgContentRow [Blocks]
|
||||
data OrgTableRow = OrgContentRow (F [Blocks])
|
||||
| OrgAlignRow [Alignment]
|
||||
| OrgHlineRow
|
||||
deriving (Eq, Show)
|
||||
|
||||
data OrgTable = OrgTable
|
||||
{ orgTableColumns :: Int
|
||||
, orgTableAlignments :: [Alignment]
|
||||
, orgTableHeader :: [Blocks]
|
||||
, orgTableRows :: [[Blocks]]
|
||||
} deriving (Eq, Show)
|
||||
}
|
||||
|
||||
table :: OrgParser Blocks
|
||||
table :: OrgParser (F Blocks)
|
||||
table = try $ do
|
||||
lookAhead tableStart
|
||||
orgToPandocTable . normalizeTable . rowsToTable <$> tableRows
|
||||
do
|
||||
rows <- tableRows
|
||||
cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
|
||||
return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
|
||||
|
||||
orgToPandocTable :: OrgTable
|
||||
-> Inlines
|
||||
-> Blocks
|
||||
orgToPandocTable (OrgTable _ aligns heads lns) =
|
||||
B.table "" (zip aligns $ repeat 0) heads lns
|
||||
orgToPandocTable (OrgTable _ aligns heads lns) caption =
|
||||
B.table caption (zip aligns $ repeat 0) heads lns
|
||||
|
||||
tableStart :: OrgParser Char
|
||||
tableStart = try $ skipSpaces *> char '|'
|
||||
|
@ -374,11 +513,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
|
|||
|
||||
tableContentRow :: OrgParser OrgTableRow
|
||||
tableContentRow = try $
|
||||
OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
|
||||
OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
|
||||
|
||||
tableContentCell :: OrgParser Blocks
|
||||
tableContentCell :: OrgParser (F Blocks)
|
||||
tableContentCell = try $
|
||||
B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
|
||||
fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
|
||||
|
||||
endOfCell :: OrgParser Char
|
||||
endOfCell = try $ char '|' <|> lookAhead newline
|
||||
|
@ -410,8 +549,8 @@ tableHline = try $
|
|||
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
|
||||
|
||||
rowsToTable :: [OrgTableRow]
|
||||
-> OrgTable
|
||||
rowsToTable = foldl' (flip rowToContent) zeroTable
|
||||
-> F OrgTable
|
||||
rowsToTable = foldM (flip rowToContent) zeroTable
|
||||
where zeroTable = OrgTable 0 mempty mempty mempty
|
||||
|
||||
normalizeTable :: OrgTable
|
||||
|
@ -430,57 +569,113 @@ normalizeTable (OrgTable cols aligns heads lns) =
|
|||
-- line as a header. All other horizontal lines are discarded.
|
||||
rowToContent :: OrgTableRow
|
||||
-> OrgTable
|
||||
-> OrgTable
|
||||
rowToContent OrgHlineRow = maybeBodyToHeader
|
||||
rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
|
||||
rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as
|
||||
-> F OrgTable
|
||||
rowToContent OrgHlineRow t = maybeBodyToHeader t
|
||||
rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
|
||||
rowToContent (OrgContentRow rf) t = do
|
||||
rs <- rf
|
||||
setLongestRow rs =<< appendToBody rs t
|
||||
|
||||
setLongestRow :: [a]
|
||||
-> OrgTable
|
||||
-> OrgTable
|
||||
setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
|
||||
-> F OrgTable
|
||||
setLongestRow rs t =
|
||||
return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
|
||||
|
||||
maybeBodyToHeader :: OrgTable
|
||||
-> OrgTable
|
||||
-> F OrgTable
|
||||
maybeBodyToHeader t = case t of
|
||||
OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
|
||||
t{ orgTableHeader = b , orgTableRows = [] }
|
||||
_ -> t
|
||||
return t{ orgTableHeader = b , orgTableRows = [] }
|
||||
_ -> return t
|
||||
|
||||
appendToBody :: [Blocks]
|
||||
-> OrgTable
|
||||
-> OrgTable
|
||||
appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
|
||||
-> F OrgTable
|
||||
appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
|
||||
|
||||
setAligns :: [Alignment]
|
||||
-> OrgTable
|
||||
-> OrgTable
|
||||
setAligns aligns t = t{ orgTableAlignments = aligns }
|
||||
-> F OrgTable
|
||||
setAligns aligns t = return $ t{ orgTableAlignments = aligns }
|
||||
|
||||
|
||||
--
|
||||
-- LaTeX fragments
|
||||
--
|
||||
latexFragment :: OrgParser (F Blocks)
|
||||
latexFragment = try $ do
|
||||
envName <- latexEnvStart
|
||||
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
|
||||
return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
|
||||
where
|
||||
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
|
||||
, c
|
||||
, "\\end{", e, "}\n"
|
||||
]
|
||||
|
||||
latexEnvStart :: OrgParser String
|
||||
latexEnvStart = try $ do
|
||||
skipSpaces *> string "\\begin{"
|
||||
*> latexEnvName
|
||||
<* string "}"
|
||||
<* blankline
|
||||
|
||||
latexEnd :: String -> OrgParser ()
|
||||
latexEnd envName = try $
|
||||
() <$ skipSpaces
|
||||
<* string ("\\end{" ++ envName ++ "}")
|
||||
<* blankline
|
||||
|
||||
-- | Parses a LaTeX environment name.
|
||||
latexEnvName :: OrgParser String
|
||||
latexEnvName = try $ do
|
||||
mappend <$> many1 alphaNum
|
||||
<*> option "" (string "*")
|
||||
|
||||
|
||||
--
|
||||
-- Footnote defintions
|
||||
--
|
||||
noteBlock :: OrgParser (F Blocks)
|
||||
noteBlock = try $ do
|
||||
ref <- noteMarker <* skipSpaces
|
||||
content <- mconcat <$> blocksTillHeaderOrNote
|
||||
addToNotesTable (ref, content)
|
||||
return mempty
|
||||
where
|
||||
blocksTillHeaderOrNote =
|
||||
many1Till block (eof <|> () <$ lookAhead noteMarker
|
||||
<|> () <$ lookAhead headerStart)
|
||||
|
||||
-- Paragraphs or Plain text
|
||||
paraOrPlain :: OrgParser Blocks
|
||||
paraOrPlain :: OrgParser (F Blocks)
|
||||
paraOrPlain = try $
|
||||
parseInlines <**> option B.plain (try $ newline *> pure B.para)
|
||||
parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
|
||||
|
||||
inlinesTillNewline :: OrgParser Inlines
|
||||
inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
|
||||
inlinesTillNewline :: OrgParser (F Inlines)
|
||||
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
|
||||
|
||||
|
||||
--
|
||||
-- list blocks
|
||||
--
|
||||
|
||||
list :: OrgParser Blocks
|
||||
list :: OrgParser (F Blocks)
|
||||
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
|
||||
|
||||
definitionList :: OrgParser Blocks
|
||||
definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart)
|
||||
definitionList :: OrgParser (F Blocks)
|
||||
definitionList = fmap B.definitionList . fmap compactify'DL . sequence
|
||||
<$> many1 (definitionListItem bulletListStart)
|
||||
|
||||
bulletList :: OrgParser Blocks
|
||||
bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
|
||||
bulletList :: OrgParser (F Blocks)
|
||||
bulletList = fmap B.bulletList . fmap compactify' . sequence
|
||||
<$> many1 (listItem bulletListStart)
|
||||
|
||||
orderedList :: OrgParser Blocks
|
||||
orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
|
||||
orderedList :: OrgParser (F Blocks)
|
||||
-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
|
||||
orderedList = fmap B.orderedList . fmap compactify' . sequence
|
||||
<$> many1 (listItem orderedListStart)
|
||||
|
||||
genericListStart :: OrgParser String
|
||||
-> OrgParser Int
|
||||
|
@ -499,33 +694,36 @@ orderedListStart = genericListStart orderedListMarker
|
|||
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
|
||||
|
||||
definitionListItem :: OrgParser Int
|
||||
-> OrgParser (Inlines, [Blocks])
|
||||
-> OrgParser (F (Inlines, [Blocks]))
|
||||
definitionListItem parseMarkerGetLength = try $ do
|
||||
markerLength <- parseMarkerGetLength
|
||||
term <- manyTill (noneOf "\n\r") (try $ string "::")
|
||||
first <- anyLineNewline
|
||||
blank <- option "" ("\n" <$ blankline)
|
||||
cont <- concat <$> many (listContinuation markerLength)
|
||||
term' <- parseFromString inline term
|
||||
contents' <- parseFromString parseBlocks $ first ++ cont
|
||||
return (term', [contents'])
|
||||
contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
|
||||
return $ (,) <$> term' <*> fmap (:[]) contents'
|
||||
|
||||
|
||||
-- parse raw text for one list item, excluding start marker and continuations
|
||||
listItem :: OrgParser Int
|
||||
-> OrgParser Blocks
|
||||
-> OrgParser (F Blocks)
|
||||
listItem start = try $ do
|
||||
markerLength <- try start
|
||||
firstLine <- anyLineNewline
|
||||
blank <- option "" ("\n" <$ blankline)
|
||||
rest <- concat <$> many (listContinuation markerLength)
|
||||
parseFromString parseBlocks $ firstLine ++ rest
|
||||
parseFromString parseBlocks $ firstLine ++ blank ++ rest
|
||||
|
||||
-- continuation of a list item - indented and separated by blankline or endline.
|
||||
-- Note: nested lists are parsed as continuations.
|
||||
listContinuation :: Int
|
||||
-> OrgParser String
|
||||
listContinuation markerLength = try $
|
||||
mappend <$> many blankline
|
||||
<*> (concat <$> many1 listLine)
|
||||
notFollowedBy' blankline
|
||||
*> (mappend <$> (concat <$> many1 listLine)
|
||||
<*> many blankline)
|
||||
where listLine = try $ indentWith markerLength *> anyLineNewline
|
||||
|
||||
anyLineNewline :: OrgParser String
|
||||
|
@ -536,11 +734,12 @@ anyLineNewline = (++ "\n") <$> anyLine
|
|||
-- inline
|
||||
--
|
||||
|
||||
inline :: OrgParser Inlines
|
||||
inline :: OrgParser (F Inlines)
|
||||
inline =
|
||||
choice [ whitespace
|
||||
, linebreak
|
||||
, link
|
||||
, footnote
|
||||
, linkOrImage
|
||||
, str
|
||||
, endline
|
||||
, emph
|
||||
|
@ -557,67 +756,104 @@ inline =
|
|||
] <* (guard =<< newlinesCountWithinLimits)
|
||||
<?> "inline"
|
||||
|
||||
parseInlines :: OrgParser Inlines
|
||||
parseInlines = trimInlines . mconcat <$> many1 inline
|
||||
parseInlines :: OrgParser (F Inlines)
|
||||
parseInlines = trimInlinesF . mconcat <$> many1 inline
|
||||
|
||||
-- treat these as potentially non-text when parsing inline:
|
||||
specialChars :: [Char]
|
||||
specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
|
||||
|
||||
|
||||
whitespace :: OrgParser Inlines
|
||||
whitespace = B.space <$ skipMany1 spaceChar
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
whitespace :: OrgParser (F Inlines)
|
||||
whitespace = pure B.space <$ skipMany1 spaceChar
|
||||
<* updateLastPreCharPos
|
||||
<* updateLastForbiddenCharPos
|
||||
<?> "whitespace"
|
||||
|
||||
linebreak :: OrgParser Inlines
|
||||
linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
|
||||
linebreak :: OrgParser (F Inlines)
|
||||
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
|
||||
|
||||
str :: OrgParser Inlines
|
||||
str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
||||
<* updateLastStrPos
|
||||
str :: OrgParser (F Inlines)
|
||||
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
||||
<* updateLastStrPos
|
||||
|
||||
-- an endline character that can be treated as a space, not a structural break
|
||||
endline :: OrgParser Inlines
|
||||
-- | An endline character that can be treated as a space, not a structural
|
||||
-- break. This should reflect the values of the Emacs variable
|
||||
-- @org-element-pagaraph-separate@.
|
||||
endline :: OrgParser (F Inlines)
|
||||
endline = try $ do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
notFollowedBy' exampleLine
|
||||
notFollowedBy' hline
|
||||
notFollowedBy' noteMarker
|
||||
notFollowedBy' tableStart
|
||||
notFollowedBy' drawerStart
|
||||
notFollowedBy' headerStart
|
||||
notFollowedBy' metaLineStart
|
||||
notFollowedBy' latexEnvStart
|
||||
notFollowedBy' commentLineStart
|
||||
notFollowedBy' bulletListStart
|
||||
notFollowedBy' orderedListStart
|
||||
decEmphasisNewlinesCount
|
||||
guard =<< newlinesCountWithinLimits
|
||||
updateLastPreCharPos
|
||||
return B.space
|
||||
return . return $ B.space
|
||||
|
||||
link :: OrgParser Inlines
|
||||
link = explicitOrImageLink <|> selflinkOrImage <?> "link"
|
||||
footnote :: OrgParser (F Inlines)
|
||||
footnote = try $ inlineNote <|> referencedNote
|
||||
|
||||
explicitOrImageLink :: OrgParser Inlines
|
||||
inlineNote :: OrgParser (F Inlines)
|
||||
inlineNote = try $ do
|
||||
string "[fn:"
|
||||
ref <- many alphaNum
|
||||
char ':'
|
||||
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
|
||||
when (not $ null ref) $
|
||||
addToNotesTable ("fn:" ++ ref, note)
|
||||
return $ B.note <$> note
|
||||
|
||||
referencedNote :: OrgParser (F Inlines)
|
||||
referencedNote = try $ do
|
||||
ref <- noteMarker
|
||||
return $ do
|
||||
notes <- asksF orgStateNotes'
|
||||
case lookup ref notes of
|
||||
Nothing -> return $ B.str $ "[" ++ ref ++ "]"
|
||||
Just contents -> do
|
||||
st <- askF
|
||||
let contents' = runF contents st{ orgStateNotes' = [] }
|
||||
return $ B.note contents'
|
||||
|
||||
noteMarker :: OrgParser String
|
||||
noteMarker = try $ do
|
||||
char '['
|
||||
choice [ many1Till digit (char ']')
|
||||
, (++) <$> string "fn:"
|
||||
<*> many1Till (noneOf "\n\r\t ") (char ']')
|
||||
]
|
||||
|
||||
linkOrImage :: OrgParser (F Inlines)
|
||||
linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
|
||||
|
||||
explicitOrImageLink :: OrgParser (F Inlines)
|
||||
explicitOrImageLink = try $ do
|
||||
char '['
|
||||
src <- linkTarget
|
||||
title <- enclosedRaw (char '[') (char ']')
|
||||
title' <- parseFromString (mconcat <$> many inline) title
|
||||
char ']'
|
||||
return . B.link src ""
|
||||
$ if isImageFilename src && isImageFilename title
|
||||
then B.image title "" ""
|
||||
else title'
|
||||
return $ B.link src "" <$>
|
||||
if isImageFilename src && isImageFilename title
|
||||
then return $ B.image title mempty mempty
|
||||
else title'
|
||||
|
||||
selflinkOrImage :: OrgParser Inlines
|
||||
selflinkOrImage :: OrgParser (F Inlines)
|
||||
selflinkOrImage = try $ do
|
||||
src <- char '[' *> linkTarget <* char ']'
|
||||
return $ if isImageFilename src
|
||||
then B.image src "" ""
|
||||
else B.link src "" (B.str src)
|
||||
return . return $ if isImageFilename src
|
||||
then B.image src "" ""
|
||||
else B.link src "" (B.str src)
|
||||
|
||||
selfTarget :: OrgParser String
|
||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
@ -628,57 +864,56 @@ linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
|
|||
isImageFilename :: String -> Bool
|
||||
isImageFilename filename =
|
||||
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
||||
any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||
':' `notElem` filename
|
||||
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||
':' `notElem` filename)
|
||||
where
|
||||
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
||||
protocols = [ "file", "http", "https" ]
|
||||
|
||||
emph :: OrgParser Inlines
|
||||
emph = B.emph <$> emphasisBetween '/'
|
||||
emph :: OrgParser (F Inlines)
|
||||
emph = fmap B.emph <$> emphasisBetween '/'
|
||||
|
||||
strong :: OrgParser Inlines
|
||||
strong = B.strong <$> emphasisBetween '*'
|
||||
strong :: OrgParser (F Inlines)
|
||||
strong = fmap B.strong <$> emphasisBetween '*'
|
||||
|
||||
strikeout :: OrgParser Inlines
|
||||
strikeout = B.strikeout <$> emphasisBetween '+'
|
||||
strikeout :: OrgParser (F Inlines)
|
||||
strikeout = fmap B.strikeout <$> emphasisBetween '+'
|
||||
|
||||
-- There is no underline, so we use strong instead.
|
||||
underline :: OrgParser Inlines
|
||||
underline = B.strong <$> emphasisBetween '_'
|
||||
underline :: OrgParser (F Inlines)
|
||||
underline = fmap B.strong <$> emphasisBetween '_'
|
||||
|
||||
code :: OrgParser Inlines
|
||||
code = B.code <$> verbatimBetween '='
|
||||
code :: OrgParser (F Inlines)
|
||||
code = return . B.code <$> verbatimBetween '='
|
||||
|
||||
verbatim :: OrgParser Inlines
|
||||
verbatim = B.rawInline "" <$> verbatimBetween '~'
|
||||
verbatim :: OrgParser (F Inlines)
|
||||
verbatim = return . B.rawInline "" <$> verbatimBetween '~'
|
||||
|
||||
math :: OrgParser Inlines
|
||||
math = B.math <$> choice [ math1CharBetween '$'
|
||||
, mathStringBetween '$'
|
||||
, rawMathBetween "\\(" "\\)"
|
||||
]
|
||||
subscript :: OrgParser (F Inlines)
|
||||
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
|
||||
|
||||
displayMath :: OrgParser Inlines
|
||||
displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||
, rawMathBetween "$$" "$$"
|
||||
]
|
||||
superscript :: OrgParser (F Inlines)
|
||||
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
|
||||
|
||||
subscript :: OrgParser Inlines
|
||||
subscript = B.subscript <$> try (char '_' *> subOrSuperExpr)
|
||||
math :: OrgParser (F Inlines)
|
||||
math = return . B.math <$> choice [ math1CharBetween '$'
|
||||
, mathStringBetween '$'
|
||||
, rawMathBetween "\\(" "\\)"
|
||||
]
|
||||
|
||||
superscript :: OrgParser Inlines
|
||||
superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
|
||||
|
||||
symbol :: OrgParser Inlines
|
||||
symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||
displayMath :: OrgParser (F Inlines)
|
||||
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||
, rawMathBetween "$$" "$$"
|
||||
]
|
||||
symbol :: OrgParser (F Inlines)
|
||||
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||
where updatePositions c
|
||||
| c `elem` emphasisPreChars = c <$ updateLastPreCharPos
|
||||
| c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
|
||||
| otherwise = return c
|
||||
|
||||
emphasisBetween :: Char
|
||||
-> OrgParser Inlines
|
||||
-> OrgParser (F Inlines)
|
||||
emphasisBetween c = try $ do
|
||||
startEmphasisNewlinesCounting emphasisAllowedNewlines
|
||||
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
|
||||
|
@ -711,7 +946,7 @@ math1CharBetween c = try $ do
|
|||
char c
|
||||
res <- noneOf $ c:mathForbiddenBorderChars
|
||||
char c
|
||||
eof <|> lookAhead (oneOf mathPostChars) *> return ()
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return [res]
|
||||
|
||||
rawMathBetween :: String
|
||||
|
@ -734,12 +969,12 @@ emphasisEnd :: Char -> OrgParser Char
|
|||
emphasisEnd c = try $ do
|
||||
guard =<< notAfterForbiddenBorderChar
|
||||
char c
|
||||
eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
|
||||
oneOf (x ++ emphasisPostChars))
|
||||
*> return ()
|
||||
eof <|> () <$ lookAhead acceptablePostChars
|
||||
updateLastStrPos
|
||||
popInlineCharStack
|
||||
return c
|
||||
where acceptablePostChars =
|
||||
surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
|
||||
|
||||
mathStart :: Char -> OrgParser Char
|
||||
mathStart c = try $
|
||||
|
@ -749,15 +984,15 @@ mathEnd :: Char -> OrgParser Char
|
|||
mathEnd c = try $ do
|
||||
res <- noneOf (c:mathForbiddenBorderChars)
|
||||
char c
|
||||
eof <|> lookAhead (oneOf mathPostChars *> pure ())
|
||||
eof <|> () <$ lookAhead (oneOf mathPostChars)
|
||||
return res
|
||||
|
||||
|
||||
enclosedInlines :: OrgParser a
|
||||
-> OrgParser b
|
||||
-> OrgParser Inlines
|
||||
-> OrgParser (F Inlines)
|
||||
enclosedInlines start end = try $
|
||||
trimInlines . mconcat <$> enclosed start end inline
|
||||
trimInlinesF . mconcat <$> enclosed start end inline
|
||||
|
||||
enclosedRaw :: OrgParser a
|
||||
-> OrgParser b
|
||||
|
@ -843,25 +1078,13 @@ notAfterForbiddenBorderChar = do
|
|||
return $ lastFBCPos /= Just pos
|
||||
|
||||
-- | Read a sub- or superscript expression
|
||||
subOrSuperExpr :: OrgParser Inlines
|
||||
subOrSuperExpr = try $ do
|
||||
choice [ balancedSexp '{' '}'
|
||||
, balancedSexp '(' ')' >>= return . enclosing ('(', ')')
|
||||
subOrSuperExpr :: OrgParser (F Inlines)
|
||||
subOrSuperExpr = try $
|
||||
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
|
||||
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
|
||||
, simpleSubOrSuperString
|
||||
] >>= parseFromString (mconcat <$> many inline)
|
||||
|
||||
-- | Read a balanced sexp
|
||||
balancedSexp :: Char
|
||||
-> Char
|
||||
-> OrgParser String
|
||||
balancedSexp l r = try $ do
|
||||
char l
|
||||
res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r"))
|
||||
<|> try (string [l, r])
|
||||
<|> enclosing (l, r) <$> balancedSexp l r
|
||||
)
|
||||
char r
|
||||
return res
|
||||
where enclosing (left, right) s = left : s ++ [right]
|
||||
|
||||
simpleSubOrSuperString :: OrgParser String
|
||||
simpleSubOrSuperString = try $
|
||||
|
@ -869,8 +1092,3 @@ simpleSubOrSuperString = try $
|
|||
, mappend <$> option [] ((:[]) <$> oneOf "+-")
|
||||
<*> many1 alphaNum
|
||||
]
|
||||
|
||||
enclosing :: (a, a)
|
||||
-> [a]
|
||||
-> [a]
|
||||
enclosing (left, right) s = left : s ++ [right]
|
||||
|
|
|
@ -56,6 +56,7 @@ module Text.Pandoc.Shared (
|
|||
stringify,
|
||||
compactify,
|
||||
compactify',
|
||||
compactify'DL,
|
||||
Element (..),
|
||||
hierarchicalize,
|
||||
uniqueIdent,
|
||||
|
@ -82,7 +83,7 @@ module Text.Pandoc.Shared (
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Walk
|
||||
import Text.Pandoc.Generic
|
||||
import Text.Pandoc.Builder (Blocks, ToMetaValue(..))
|
||||
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import System.Environment (getProgName)
|
||||
|
@ -435,6 +436,21 @@ compactify' items =
|
|||
_ -> items
|
||||
_ -> items
|
||||
|
||||
-- | Like @compactify'@, but akts on items of definition lists.
|
||||
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
|
||||
compactify'DL items =
|
||||
let defs = concatMap snd items
|
||||
defBlocks = reverse $ concatMap B.toList defs
|
||||
in case defBlocks of
|
||||
(Para x:_) -> if not $ any isPara (drop 1 defBlocks)
|
||||
then let (t,ds) = last items
|
||||
lastDef = B.toList $ last ds
|
||||
ds' = init ds ++
|
||||
[B.fromList $ init lastDef ++ [Plain x]]
|
||||
in init items ++ [(t, ds')]
|
||||
else items
|
||||
_ -> items
|
||||
|
||||
isPara :: Block -> Bool
|
||||
isPara (Para _) = True
|
||||
isPara _ = False
|
||||
|
@ -698,5 +714,3 @@ safeRead s = case reads s of
|
|||
(d,x):_
|
||||
| all isSpace x -> return d
|
||||
_ -> fail $ "Could not read `" ++ s ++ "'"
|
||||
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import Tests.Arbitrary()
|
|||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc
|
||||
import Data.List (intersperse)
|
||||
import Data.Monoid (mempty, mconcat)
|
||||
import Data.Monoid (mempty, mappend, mconcat)
|
||||
|
||||
org :: String -> Pandoc
|
||||
org = readOrg def
|
||||
|
@ -98,6 +98,10 @@ tests =
|
|||
"line \\\\ \nbreak" =?>
|
||||
para ("line" <> linebreak <> "break")
|
||||
|
||||
, "Inline note" =:
|
||||
"[fn::Schreib mir eine E-Mail]" =?>
|
||||
para (note $ para "Schreib mir eine E-Mail")
|
||||
|
||||
, "Markup-chars not occuring on word break are symbols" =:
|
||||
unlines [ "this+that+ +so+on"
|
||||
, "seven*eight* nine*"
|
||||
|
@ -359,29 +363,6 @@ tests =
|
|||
, "#+END_COMMENT"] =?>
|
||||
(mempty::Blocks)
|
||||
|
||||
, "Source Block in Text" =:
|
||||
unlines [ "Low German greeting"
|
||||
, " #+BEGIN_SRC haskell"
|
||||
, " main = putStrLn greeting"
|
||||
, " where greeting = \"moin\""
|
||||
, " #+END_SRC" ] =?>
|
||||
let attr' = ("", ["haskell"], [])
|
||||
code' = "main = putStrLn greeting\n" ++
|
||||
" where greeting = \"moin\"\n"
|
||||
in mconcat [ para $ spcSep [ "Low", "German", "greeting" ]
|
||||
, codeBlockWith attr' code'
|
||||
]
|
||||
|
||||
, "Source Block" =:
|
||||
unlines [ " #+BEGIN_SRC haskell"
|
||||
, " main = putStrLn greeting"
|
||||
, " where greeting = \"moin\""
|
||||
, " #+END_SRC" ] =?>
|
||||
let attr' = ("", ["haskell"], [])
|
||||
code' = "main = putStrLn greeting\n" ++
|
||||
" where greeting = \"moin\"\n"
|
||||
in codeBlockWith attr' code'
|
||||
|
||||
, "Figure" =:
|
||||
unlines [ "#+caption: A very courageous man."
|
||||
, "#+name: goodguy"
|
||||
|
@ -402,6 +383,48 @@ tests =
|
|||
] =?>
|
||||
para (image "the-red-queen.jpg" "fig:redqueen"
|
||||
"Used as a metapher in evolutionary biology.")
|
||||
|
||||
, "Footnote" =:
|
||||
unlines [ "A footnote[1]"
|
||||
, ""
|
||||
, "[1] First paragraph"
|
||||
, ""
|
||||
, "second paragraph"
|
||||
] =?>
|
||||
para (mconcat
|
||||
[ "A", space, "footnote"
|
||||
, note $ mconcat [ para ("First" <> space <> "paragraph")
|
||||
, para ("second" <> space <> "paragraph")
|
||||
]
|
||||
])
|
||||
|
||||
, "Two footnotes" =:
|
||||
unlines [ "Footnotes[fn:1][fn:2]"
|
||||
, ""
|
||||
, "[fn:1] First note."
|
||||
, ""
|
||||
, "[fn:2] Second note."
|
||||
] =?>
|
||||
para (mconcat
|
||||
[ "Footnotes"
|
||||
, note $ para ("First" <> space <> "note.")
|
||||
, note $ para ("Second" <> space <> "note.")
|
||||
])
|
||||
|
||||
, "Footnote followed by header" =:
|
||||
unlines [ "Another note[fn:yay]"
|
||||
, ""
|
||||
, "[fn:yay] This is great!"
|
||||
, ""
|
||||
, "** Headline"
|
||||
] =?>
|
||||
mconcat
|
||||
[ para (mconcat
|
||||
[ "Another", space, "note"
|
||||
, note $ para ("This" <> space <> "is" <> space <> "great!")
|
||||
])
|
||||
, header 2 "Headline"
|
||||
]
|
||||
]
|
||||
|
||||
, testGroup "Lists" $
|
||||
|
@ -537,13 +560,36 @@ tests =
|
|||
, ("TTL", [ plain $ "transistor-transistor" <> space <>
|
||||
"logic" ])
|
||||
, ("PSK", [ mconcat
|
||||
[ para $ "phase-shift" <> space <> "keying"
|
||||
, plain $ spcSep [ "a", "digital"
|
||||
, "modulation", "scheme" ]
|
||||
[ para $ "phase-shift" <> space <> "keying"
|
||||
, para $ spcSep [ "a", "digital"
|
||||
, "modulation", "scheme" ]
|
||||
]
|
||||
]
|
||||
)
|
||||
])
|
||||
]
|
||||
|
||||
, "Compact definition list" =:
|
||||
unlines [ "- ATP :: adenosine 5' triphosphate"
|
||||
, "- DNA :: deoxyribonucleic acid"
|
||||
, "- PCR :: polymerase chain reaction"
|
||||
, ""
|
||||
] =?>
|
||||
definitionList
|
||||
[ ("ATP", [ plain $ spcSep [ "adenosine", "5'", "triphosphate" ] ])
|
||||
, ("DNA", [ plain $ spcSep [ "deoxyribonucleic", "acid" ] ])
|
||||
, ("PCR", [ plain $ spcSep [ "polymerase", "chain", "reaction" ] ])
|
||||
]
|
||||
|
||||
, "Loose bullet list" =:
|
||||
unlines [ "- apple"
|
||||
, ""
|
||||
, "- orange"
|
||||
, ""
|
||||
, "- peach"
|
||||
] =?>
|
||||
bulletList [ para "apple"
|
||||
, para "orange"
|
||||
, para "peach"
|
||||
]
|
||||
]
|
||||
|
||||
, testGroup "Tables"
|
||||
|
@ -656,5 +702,126 @@ tests =
|
|||
[ [ plain "1" , plain "One" , plain "foo" ]
|
||||
, [ plain "2" , plain mempty , plain mempty ]
|
||||
]
|
||||
|
||||
, "Table with caption" =:
|
||||
unlines [ "#+CAPTION: Hitchhiker's Multiplication Table"
|
||||
, "| x | 6 |"
|
||||
, "| 9 | 42 |"
|
||||
] =?>
|
||||
table "Hitchhiker's Multiplication Table"
|
||||
[(AlignDefault, 0), (AlignDefault, 0)]
|
||||
[]
|
||||
[ [ plain "x", plain "6" ]
|
||||
, [ plain "9", plain "42" ]
|
||||
]
|
||||
]
|
||||
|
||||
, testGroup "Blocks and fragments"
|
||||
[ "Source block" =:
|
||||
unlines [ " #+BEGIN_SRC haskell"
|
||||
, " main = putStrLn greeting"
|
||||
, " where greeting = \"moin\""
|
||||
, " #+END_SRC" ] =?>
|
||||
let attr' = ("", ["haskell"], [])
|
||||
code' = "main = putStrLn greeting\n" ++
|
||||
" where greeting = \"moin\"\n"
|
||||
in codeBlockWith attr' code'
|
||||
|
||||
, "Source block between paragraphs" =:
|
||||
unlines [ "Low German greeting"
|
||||
, " #+BEGIN_SRC haskell"
|
||||
, " main = putStrLn greeting"
|
||||
, " where greeting = \"Moin!\""
|
||||
, " #+END_SRC" ] =?>
|
||||
let attr' = ("", ["haskell"], [])
|
||||
code' = "main = putStrLn greeting\n" ++
|
||||
" where greeting = \"Moin!\"\n"
|
||||
in mconcat [ para $ spcSep [ "Low", "German", "greeting" ]
|
||||
, codeBlockWith attr' code'
|
||||
]
|
||||
|
||||
, "Example block" =:
|
||||
unlines [ "#+begin_example"
|
||||
, "A chosen representation of"
|
||||
, "a rule."
|
||||
, "#+eND_exAMPle"
|
||||
] =?>
|
||||
codeBlockWith ("", ["example"], [])
|
||||
"A chosen representation of\na rule.\n"
|
||||
|
||||
, "HTML block" =:
|
||||
unlines [ "#+BEGIN_HTML"
|
||||
, "<aside>HTML5 is pretty nice.</aside>"
|
||||
, "#+END_HTML"
|
||||
] =?>
|
||||
rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n"
|
||||
|
||||
, "Quote block" =:
|
||||
unlines [ "#+BEGIN_QUOTE"
|
||||
, "/Niemand/ hat die Absicht, eine Mauer zu errichten!"
|
||||
, "#+END_QUOTE"
|
||||
] =?>
|
||||
blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht,"
|
||||
, "eine", "Mauer", "zu", "errichten!"
|
||||
]))
|
||||
|
||||
, "Verse block" =:
|
||||
unlines [ "The first lines of Goethe's /Faust/:"
|
||||
, "#+begin_verse"
|
||||
, "Habe nun, ach! Philosophie,"
|
||||
, "Juristerei und Medizin,"
|
||||
, "Und leider auch Theologie!"
|
||||
, "Durchaus studiert, mit heißem Bemühn."
|
||||
, "#+end_verse"
|
||||
] =?>
|
||||
mconcat
|
||||
[ para $ spcSep [ "The", "first", "lines", "of"
|
||||
, "Goethe's", emph "Faust" <> ":"]
|
||||
, para $ mconcat
|
||||
[ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ]
|
||||
, linebreak
|
||||
, spcSep [ "Juristerei", "und", "Medizin," ]
|
||||
, linebreak
|
||||
, spcSep [ "Und", "leider", "auch", "Theologie!" ]
|
||||
, linebreak
|
||||
, spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ]
|
||||
]
|
||||
]
|
||||
|
||||
, "LaTeX fragment" =:
|
||||
unlines [ "\\begin{equation}"
|
||||
, "X_i = \\begin{cases}"
|
||||
, " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) = \\alpha(i)\\\\"
|
||||
, " C_{\\alpha(i)} & \\text{otherwise}"
|
||||
, " \\end{cases}"
|
||||
, "\\end{equation}"
|
||||
] =?>
|
||||
rawBlock "latex"
|
||||
(unlines [ "\\begin{equation}"
|
||||
, "X_i = \\begin{cases}"
|
||||
, " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" ++
|
||||
" \\alpha(i)\\\\"
|
||||
, " C_{\\alpha(i)} & \\text{otherwise}"
|
||||
, " \\end{cases}"
|
||||
, "\\end{equation}"
|
||||
])
|
||||
|
||||
, "Code block with caption" =:
|
||||
unlines [ "#+CAPTION: Functor laws in Haskell"
|
||||
, "#+NAME: functor-laws"
|
||||
, "#+BEGIN_SRC haskell"
|
||||
, "fmap id = id"
|
||||
, "fmap (p . q) = (fmap p) . (fmap q)"
|
||||
, "#+END_SRC"
|
||||
] =?>
|
||||
divWith
|
||||
nullAttr
|
||||
(mappend
|
||||
(plain $ spanWith ("", ["label"], [])
|
||||
(spcSep [ "Functor", "laws", "in", "Haskell" ]))
|
||||
(codeBlockWith ("functor-laws", ["haskell"], [])
|
||||
(unlines [ "fmap id = id"
|
||||
, "fmap (p . q) = (fmap p) . (fmap q)"
|
||||
])))
|
||||
]
|
||||
]
|
||||
|
|
Loading…
Add table
Reference in a new issue