pandoc/src/Text/Pandoc/Readers/Org/Blocks.hs
2017-06-03 13:35:19 +02:00

825 lines
29 KiB
Haskell

{-
Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RecordWildCards #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
Copyright : Copyright (C) 2014-2017 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Parsers for Org-mode block elements.
-}
module Text.Pandoc.Readers.Org.Blocks
( blockList
, meta
) where
import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks)
import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
originalLang, translateLang)
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
import Control.Monad (foldM, guard, mzero, void)
import Data.Char (isSpace, toLower, toUpper)
import Data.Default (Default)
import Data.List (foldl', isPrefixOf)
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid ((<>))
--
-- parsing blocks
--
-- | Get a list of blocks.
blockList :: PandocMonad m => OrgParser m [Block]
blockList = do
headlines <- documentTree blocks inline
st <- getState
headlineBlocks <- headlineToBlocks $ runF headlines st
-- ignore first headline, it's the document's title
return . drop 1 . B.toList $ headlineBlocks
-- | Get the meta information saved in the state.
meta :: Monad m => OrgParser m Meta
meta = do
meta' <- metaExport
runF meta' <$> getState
blocks :: PandocMonad m => OrgParser m (F Blocks)
blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
block :: PandocMonad m => OrgParser m (F Blocks)
block = choice [ mempty <$ blanklines
, table
, orgBlock
, figure
, example
, genericDrawer
, include
, specialLine
, horizontalRule
, list
, latexFragment
, noteBlock
, paraOrPlain
] <?> "block"
-- | Parse a horizontal rule into a block element
horizontalRule :: Monad m => OrgParser m (F Blocks)
horizontalRule = return B.horizontalRule <$ try hline
--
-- Block Attributes
--
-- | Attributes that may be added to figures (like a name or caption).
data BlockAttributes = BlockAttributes
{ blockAttrName :: Maybe String
, blockAttrLabel :: Maybe String
, blockAttrCaption :: Maybe (F Inlines)
, blockAttrKeyValues :: [(String, String)]
}
-- | Convert BlockAttributes into pandoc Attr
attrFromBlockAttributes :: BlockAttributes -> Attr
attrFromBlockAttributes BlockAttributes{..} =
let
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
Nothing -> []
Just clsStr -> words clsStr
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
stringyMetaAttribute :: Monad m => OrgParser m (String, String)
stringyMetaAttribute = try $ do
metaLineStart
attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
skipSpaces
attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
blockAttributes = try $ do
kv <- many stringyMetaAttribute
guard $ all (attrCheck . fst) kv
let caption = foldl' (appendValues "CAPTION") Nothing kv
let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
let name = lookup "NAME" kv
let label = lookup "LABEL" kv
caption' <- case caption of
Nothing -> return Nothing
Just s -> Just <$> parseFromString inlines (s ++ "\n")
kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
, blockAttrCaption = caption'
, blockAttrKeyValues = kvAttrs'
}
where
attrCheck :: String -> Bool
attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"]
appendValues :: String -> Maybe String -> (String, String) -> Maybe String
appendValues attrName accValue (key, value) =
if key /= attrName
then accValue
else case accValue of
Just acc -> Just $ acc ++ ' ':value
Nothing -> Just value
-- | Parse key-value pairs for HTML attributes
keyValues :: Monad m => OrgParser m [(String, String)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
key :: Monad m => OrgParser m String
key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
value :: Monad m => OrgParser m String
value = skipSpaces *> manyTill anyChar endOfValue
endOfValue :: Monad m => OrgParser m ()
endOfValue =
lookAhead $ (() <$ try (many1 spaceChar <* key))
<|> () <$ newline
--
-- Org Blocks (#+BEGIN_... / #+END_...)
--
-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
orgBlock :: PandocMonad m => OrgParser m (F Blocks)
orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
case map toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
"latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
"ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
"example" -> rawBlockLines (return . exampleCode)
"quote" -> parseBlockLines (fmap B.blockQuote)
"verse" -> verseBlock
"src" -> codeBlock blockAttrs
_ -> parseBlockLines $
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
blockHeaderStart :: Monad m => OrgParser m String
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
lowercase :: String -> String
lowercase = map toLower
rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
parseFromString blocks (raw ++ "\n")
-- | Read the raw string content of a block
rawBlockContent :: Monad m => String -> OrgParser m String
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
return
. unlines
. stripIndent
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines
where
rawLine :: Monad m => OrgParser m String
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
stripIndent :: [String] -> [String]
stripIndent strs = map (drop (shortestIndent strs)) strs
shortestIndent :: [String] -> Int
shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
. filter (not . null)
tabsToSpaces :: Int -> String -> String
tabsToSpaces _ [] = []
tabsToSpaces tabLen cs'@(c:cs) =
case c of
' ' -> ' ':tabsToSpaces tabLen cs
'\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
_ -> cs'
commaEscaped :: String -> String
commaEscaped (',':cs@('*':_)) = cs
commaEscaped (',':cs@('#':'+':_)) = cs
commaEscaped (' ':cs) = ' ':commaEscaped cs
commaEscaped ('\t':cs) = '\t':commaEscaped cs
commaEscaped cs = cs
-- | Read but ignore all remaining block headers.
ignHeaders :: Monad m => OrgParser m ()
ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
exportBlock :: Monad m => String -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
returnF (B.rawBlock (map toLower exportType) contents)
verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.lineBlock . sequence
<$> mapM parseVerseLine (lines content)
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
parseVerseLine cs = do
let (initialSpaces, indentedLine) = span isSpace cs
let nbspIndent = if null initialSpaces
then mempty
else B.str $ map (const '\160') initialSpaces
line <- parseFromString inlines (indentedLine ++ "\n")
return (trimInlinesF $ pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
content <- rawBlockContent blockType
resultsContent <- option mempty babelResultsBlock
let id' = fromMaybe mempty $ blockAttrName blockAttrs
let codeBlck = B.codeBlockWith ( id', classes, kv ) content
let labelledBlck = maybe (pure codeBlck)
(labelDiv codeBlck)
(blockAttrCaption blockAttrs)
return $
(if exportsCode kv then labelledBlck else mempty) <>
(if exportsResults kv then resultsContent else mempty)
where
labelDiv :: Blocks -> F Inlines -> F Blocks
labelDiv blk value =
B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
exportsCode :: [(String, String)] -> Bool
exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
exportsResults :: [(String, String)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-- | Parse the result of an evaluated babel code block.
babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks)
babelResultsBlock = try $ do
blanklines
resultsMarker <|>
(lookAhead . void . try $
manyTill (metaLineStart *> anyLineNewline) resultsMarker)
block
where
resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
(switchClasses, switchKv) <- switchesAsAttributes
parameters <- manyTill blockOption newline
return ( translateLang language : switchClasses
, originalLang language <> switchKv <> parameters
)
switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
switchesAsAttributes = try $ do
switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
return $ foldr addToAttr ([], []) switches
where
addToAttr :: (Char, Maybe String, SwitchPolarity)
-> ([String], [(String, String)])
-> ([String], [(String, String)])
addToAttr ('n', lineNum, pol) (cls, kv) =
let kv' = case lineNum of
Just num -> ("startFrom", num):kv
Nothing -> kv
cls' = case pol of
SwitchPlus -> "continuedSourceBlock":cls
SwitchMinus -> cls
in ("numberLines":cls', kv')
addToAttr _ x = x
-- | Whether a switch flag is specified with @+@ or @-@.
data SwitchPolarity = SwitchPlus | SwitchMinus
deriving (Show, Eq)
-- | Parses a switch's polarity.
switchPolarity :: Monad m => OrgParser m SwitchPolarity
switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-- | Parses a source block switch option.
switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch
where
simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
labelSwitch = genericSwitch 'l' $
char '"' *> many1Till nonspaceChar (char '"')
-- | Generic source block switch-option parser.
genericSwitch :: Monad m
=> Char
-> OrgParser m String
-> OrgParser m (Char, Maybe String, SwitchPolarity)
genericSwitch c p = try $ do
polarity <- switchPolarity <* char c <* skipSpaces
arg <- optionMaybe p
return (c, arg, polarity)
-- | Reads a line number switch option. The line number switch can be used with
-- example and source blocks.
lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
lineNumberSwitch = genericSwitch 'n' (many digit)
blockOption :: Monad m => OrgParser m (String, String)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
orgParamValue :: Monad m => OrgParser m String
orgParamValue = try $
skipSpaces
*> notFollowedBy orgArgKey
*> noneOf "\n\r" `many1Till` endOfValue
<* skipSpaces
where
endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r")
<|> try (skipSpaces1 <* orgArgKey)
--
-- Drawers
--
-- | A generic drawer which has no special meaning for org-mode.
-- Whether or not this drawer is included in the output depends on the drawers
-- export setting.
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
name <- map toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
state <- getState
-- Include drawer if it is explicitly included in or not explicitly excluded
-- from the list of drawers that should be exported. PROPERTIES drawers are
-- never exported.
case exportDrawers . orgStateExportSettings $ state of
_ | name == "PROPERTIES" -> return mempty
Left names | name `elem` names -> return mempty
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
parseLines = parseFromString blocks . (++ "\n") . unlines
drawerDiv :: String -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
drawerLine :: Monad m => OrgParser m String
drawerLine = anyLine
drawerEnd :: Monad m => OrgParser m String
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
--
-- Figures
--
-- | Figures or an image paragraph (i.e. an image on a line by itself). Only
-- images with a caption attribute are interpreted as figures.
figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
case cleanLinkString src of
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
selfTarget :: PandocMonad m => OrgParser m String
selfTarget = try $ char '[' *> linkTarget <* char ']'
imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
imageBlock isFigure figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
figTitle = (if isFigure then withFigPrefix else id) figName
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption
withFigPrefix :: String -> String
withFigPrefix cs =
if "fig:" `isPrefixOf` cs
then cs
else "fig:" ++ cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
--
-- Examples
--
-- | Example code marked up by a leading colon.
example :: Monad m => OrgParser m (F Blocks)
example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
where
exampleLine :: Monad m => OrgParser m String
exampleLine = try $ exampleLineStart *> anyLine
exampleCode :: String -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
--
-- Comments, Options and Metadata
--
specialLine :: PandocMonad m => OrgParser m (F Blocks)
specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
-- | Include the content of a file.
include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
blockType <- optionMaybe $ skipSpaces *> many1 alphaNum
blocksParser <- case blockType of
Just "example" ->
return $ pure . B.codeBlock <$> parseRaw
Just "export" -> do
format <- skipSpaces *> many (noneOf "\n\r\t ")
return $ pure . B.rawBlock format <$> parseRaw
Just "src" -> do
language <- skipSpaces *> many (noneOf "\n\r\t ")
let attr = (mempty, [language], mempty)
return $ pure . B.codeBlockWith attr <$> parseRaw
_ -> return $ pure . B.fromList <$> blockList
anyLine
insertIncludedFileF blocksParser ["."] filename
where
includeTarget :: PandocMonad m => OrgParser m FilePath
includeTarget = do
char '"'
manyTill (noneOf "\n\r\t") (char '"')
parseRaw :: PandocMonad m => OrgParser m String
parseRaw = many anyChar
rawExportLine :: PandocMonad m => OrgParser m Blocks
rawExportLine = try $ do
metaLineStart
key <- metaKey
if key `elem` ["latex", "html", "texinfo", "beamer"]
then B.rawBlock key <$> anyLine
else mzero
commentLine :: Monad m => OrgParser m Blocks
commentLine = commentLineStart *> anyLine *> pure mempty
--
-- Tables
--
data ColumnProperty = ColumnProperty
{ columnAlignment :: Maybe Alignment
, columnRelWidth :: Maybe Int
} deriving (Show, Eq)
instance Default ColumnProperty where
def = ColumnProperty Nothing Nothing
data OrgTableRow = OrgContentRow (F [Blocks])
| OrgAlignRow [ColumnProperty]
| OrgHlineRow
-- OrgTable is strongly related to the pandoc table ADT. Using the same
-- (i.e. pandoc-global) ADT would mean that the reader would break if the
-- global structure was to be changed, which would be bad. The final table
-- should be generated using a builder function.
data OrgTable = OrgTable
{ orgTableColumnProperties :: [ColumnProperty]
, orgTableHeader :: [Blocks]
, orgTableRows :: [[Blocks]]
}
table :: PandocMonad m => OrgParser m (F Blocks)
table = gridTableWith blocks True <|> orgTable
-- | A normal org table
orgTable :: PandocMonad m => OrgParser m (F Blocks)
orgTable = try $ do
-- don't allow a table on the first line of a list item; org requires that
-- tables start at first non-space character on the line
let isFirstInListItem st = orgStateParserContext st == ListItemState &&
isNothing (orgStateLastPreCharPos st)
guard =<< not . isFirstInListItem <$> getState
blockAttrs <- blockAttributes
lookAhead tableStart
do
rows <- tableRows
let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
orgToPandocTable :: OrgTable
-> Inlines
-> Blocks
orgToPandocTable (OrgTable colProps heads lns) caption =
let totalWidth = if any isJust (map columnRelWidth colProps)
then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
else Nothing
in B.table caption (map (convertColProp totalWidth) colProps) heads lns
where
convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double)
convertColProp totalWidth colProp =
let
align' = fromMaybe AlignDefault $ columnAlignment colProp
width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
<$> columnRelWidth colProp
<*> totalWidth
in (align', width')
tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
tableContentRow = try $
OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
tableContentCell = try $
fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: Monad m => OrgParser m OrgTableRow
tableAlignRow = try $ do
tableStart
colProps <- many1Till columnPropertyCell newline
-- Empty rows are regular (i.e. content) rows, not alignment rows.
guard $ any (/= def) colProps
return $ OrgAlignRow colProps
columnPropertyCell :: Monad m => OrgParser m ColumnProperty
columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
where
emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell)
propCell = try $ ColumnProperty
<$> (skipSpaces
*> char '<'
*> optionMaybe tableAlignFromChar)
<*> (optionMaybe (many1 digit >>= safeRead)
<* char '>'
<* emptyCell)
tableAlignFromChar :: Monad m => OrgParser m Alignment
tableAlignFromChar = try $
choice [ char 'l' *> return AlignLeft
, char 'c' *> return AlignCenter
, char 'r' *> return AlignRight
]
tableHline :: Monad m => OrgParser m OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
endOfCell :: Monad m => OrgParser m Char
endOfCell = try $ char '|' <|> lookAhead newline
rowsToTable :: [OrgTableRow]
-> F OrgTable
rowsToTable = foldM rowToContent emptyTable
where emptyTable = OrgTable mempty mempty mempty
normalizeTable :: OrgTable -> OrgTable
normalizeTable (OrgTable colProps heads rows) =
OrgTable colProps' heads rows
where
refRow = if heads /= mempty
then heads
else case rows of
(r:_) -> r
_ -> mempty
cols = length refRow
fillColumns base padding = take cols $ base ++ repeat padding
colProps' = fillColumns colProps def
-- One or more horizontal rules after the first content line mark the previous
-- line as a header. All other horizontal lines are discarded.
rowToContent :: OrgTable
-> OrgTableRow
-> F OrgTable
rowToContent tbl row =
case row of
OrgHlineRow -> return singleRowPromotedToHeader
OrgAlignRow props -> return . setProperties $ props
OrgContentRow cs -> appendToBody cs
where
singleRowPromotedToHeader :: OrgTable
singleRowPromotedToHeader = case tbl of
OrgTable{ orgTableHeader = [], orgTableRows = [b] } ->
tbl{ orgTableHeader = b , orgTableRows = [] }
_ -> tbl
setProperties :: [ColumnProperty] -> OrgTable
setProperties ps = tbl{ orgTableColumnProperties = ps }
appendToBody :: F [Blocks] -> F OrgTable
appendToBody frow = do
newRow <- frow
let oldRows = orgTableRows tbl
-- NOTE: This is an inefficient O(n) operation. This should be changed
-- if performance ever becomes a problem.
return tbl{ orgTableRows = oldRows ++ [newRow] }
--
-- LaTeX fragments
--
latexFragment :: Monad m => OrgParser m (F Blocks)
latexFragment = try $ do
envName <- latexEnvStart
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
returnF $ B.rawBlock "latex" (content `inLatexEnv` envName)
where
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
, c
, "\\end{", e, "}\n"
]
latexEnd :: Monad m => String -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
<* string ("\\end{" ++ envName ++ "}")
<* blankline
--
-- Footnote defintions
--
noteBlock :: PandocMonad m => OrgParser m (F Blocks)
noteBlock = try $ do
ref <- noteMarker <* skipSpaces <* updateLastPreCharPos
content <- mconcat <$> blocksTillHeaderOrNote
addToNotesTable (ref, content)
return mempty
where
blocksTillHeaderOrNote =
many1Till block (eof <|> () <$ lookAhead noteMarker
<|> () <$ lookAhead headerStart)
-- Paragraphs or Plain text
paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
paraOrPlain = try $ do
-- Make sure we are not looking at a headline
notFollowedBy' (char '*' *> oneOf " *")
ils <- inlines
nl <- option False (newline *> return True)
-- Read block as paragraph, except if we are in a list context and the block
-- is directly followed by a list item, in which case the block is read as
-- plain text.
try (guard nl
*> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
*> return (B.para <$> ils))
<|> return (B.plain <$> ils)
--
-- list blocks
--
list :: PandocMonad m => OrgParser m (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
definitionList :: PandocMonad m => OrgParser m (F Blocks)
definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
fmap (B.definitionList . compactifyDL) . sequence
<$> many1 (definitionListItem $ bulletListStart' (Just n))
bulletList :: PandocMonad m => OrgParser m (F Blocks)
bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
fmap (B.bulletList . compactify) . sequence
<$> many1 (listItem (bulletListStart' $ Just n))
orderedList :: PandocMonad m => OrgParser m (F Blocks)
orderedList = fmap (B.orderedList . compactify) . sequence
<$> many1 (listItem orderedListStart)
bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int
-- returns length of bulletList prefix, inclusive of marker
bulletListStart' Nothing = do ind <- length <$> many spaceChar
oneOf (bullets $ ind == 0)
skipSpaces1
return (ind + 1)
bulletListStart' (Just n) = do count (n-1) spaceChar
oneOf (bullets $ n == 1)
many1 spaceChar
return n
-- Unindented lists are legal, but they can't use '*' bullets.
-- We return n to maintain compatibility with the generic listItem.
bullets :: Bool -> String
bullets unindented = if unindented then "+-" else "*+-"
definitionListItem :: PandocMonad m
=> OrgParser m Int
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
term <- manyTill (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
cont <- concat <$> many (listContinuation markerLength)
term' <- parseFromString inlines term
contents' <- parseFromString blocks $ line1 ++ blank ++ cont
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-- parse raw text for one list item, excluding start marker and continuations
listItem :: PandocMonad m
=> OrgParser m Int
-> OrgParser m (F Blocks)
listItem start = try . withContext ListItemState $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- concat <$> many (listContinuation markerLength)
parseFromString blocks $ firstLine ++ blank ++ rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Monad m => Int
-> OrgParser m String
listContinuation markerLength = try $
notFollowedBy' blankline
*> (mappend <$> (concat <$> many1 listLine)
<*> many blankline)
where
listLine = try $ indentWith markerLength *> anyLineNewline