2017-03-04 13:03:41 +01:00
|
|
|
{-# LANGUAGE RelaxedPolyRec #-}
|
2015-02-18 13:04:08 +00:00
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
2016-11-28 17:13:46 -05:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
{-
|
2015-04-26 10:18:29 -07:00
|
|
|
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Readers.Markdown
|
2015-04-26 10:18:29 -07:00
|
|
|
Copyright : Copyright (C) 2006-2015 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
License : GNU GPL, version 2 or above
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion of markdown-formatted plain text to 'Pandoc' document.
|
|
|
|
-}
|
2016-12-05 11:38:36 +01:00
|
|
|
module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2017-03-04 13:03:41 +01:00
|
|
|
import Control.Monad
|
2017-03-05 01:36:40 +01:00
|
|
|
import Control.Monad.Except (throwError)
|
2017-03-04 13:03:41 +01:00
|
|
|
import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
|
|
|
|
import qualified Data.HashMap.Strict as H
|
|
|
|
import Data.List (findIndex, intercalate, sortBy, transpose)
|
2010-05-08 10:03:02 -07:00
|
|
|
import qualified Data.Map as M
|
2008-08-04 03:15:34 +00:00
|
|
|
import Data.Maybe
|
2017-03-04 13:03:41 +01:00
|
|
|
import Data.Monoid ((<>))
|
|
|
|
import Data.Ord (comparing)
|
|
|
|
import Data.Scientific (base10Exponent, coefficient)
|
2017-03-05 10:24:39 +01:00
|
|
|
import qualified Data.Set as Set
|
2013-05-10 22:53:35 -07:00
|
|
|
import Data.Text (Text)
|
2017-03-04 13:03:41 +01:00
|
|
|
import qualified Data.Text as T
|
|
|
|
import qualified Data.Vector as V
|
|
|
|
import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
|
2013-05-10 22:53:35 -07:00
|
|
|
import qualified Data.Yaml as Yaml
|
2017-03-04 13:03:41 +01:00
|
|
|
import System.FilePath (addExtension, takeExtension)
|
|
|
|
import Text.HTML.TagSoup
|
|
|
|
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
import qualified Text.Pandoc.Builder as B
|
2017-03-04 13:03:41 +01:00
|
|
|
import Text.Pandoc.Class (PandocMonad, report)
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.Pandoc.Emoji (emojis)
|
|
|
|
import Text.Pandoc.Generic (bottomUp)
|
2017-02-10 23:59:47 +01:00
|
|
|
import Text.Pandoc.Logging
|
2017-03-04 13:03:41 +01:00
|
|
|
import Text.Pandoc.Options
|
|
|
|
import Text.Pandoc.Parsing hiding (tableWith)
|
2015-12-12 17:28:52 -08:00
|
|
|
import Text.Pandoc.Pretty (charWidth)
|
2017-03-04 13:03:41 +01:00
|
|
|
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
|
|
|
|
isCommentTag, isInlineTag, isTextTag)
|
|
|
|
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
|
|
|
|
import Text.Pandoc.Shared
|
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
2013-02-28 22:05:22 -08:00
|
|
|
import Text.Pandoc.XML (fromEntities)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
type MarkdownParser m = ParserT [Char] ParserState m
|
2013-01-03 20:32:15 -08:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- | Read markdown from an input string and return a Pandoc document.
|
2016-11-28 17:13:46 -05:00
|
|
|
readMarkdown :: PandocMonad m
|
|
|
|
=> ReaderOptions -- ^ Reader options
|
2012-07-25 22:35:41 -07:00
|
|
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
2016-11-28 17:13:46 -05:00
|
|
|
-> m Pandoc
|
|
|
|
readMarkdown opts s = do
|
|
|
|
parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
|
|
|
|
case parsed of
|
|
|
|
Right result -> return result
|
2017-03-04 13:03:41 +01:00
|
|
|
Left e -> throwError e
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
trimInlinesF :: F Inlines -> F Inlines
|
|
|
|
trimInlinesF = liftM trimInlines
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- Constants and data structure definitions
|
|
|
|
--
|
|
|
|
|
2011-01-19 15:06:56 -08:00
|
|
|
isBulletListMarker :: Char -> Bool
|
|
|
|
isBulletListMarker '*' = True
|
|
|
|
isBulletListMarker '+' = True
|
|
|
|
isBulletListMarker '-' = True
|
|
|
|
isBulletListMarker _ = False
|
|
|
|
|
|
|
|
isHruleChar :: Char -> Bool
|
|
|
|
isHruleChar '*' = True
|
|
|
|
isHruleChar '-' = True
|
|
|
|
isHruleChar '_' = True
|
|
|
|
isHruleChar _ = False
|
2008-07-11 16:33:21 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
setextHChars :: String
|
2007-11-03 23:27:58 +00:00
|
|
|
setextHChars = "=-"
|
|
|
|
|
2011-01-19 15:14:23 -08:00
|
|
|
isBlank :: Char -> Bool
|
|
|
|
isBlank ' ' = True
|
|
|
|
isBlank '\t' = True
|
|
|
|
isBlank '\n' = True
|
|
|
|
isBlank _ = False
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- auxiliary functions
|
|
|
|
--
|
|
|
|
|
2014-09-26 13:32:08 +04:00
|
|
|
-- | Succeeds when we're in list context.
|
2016-11-28 17:13:46 -05:00
|
|
|
inList :: PandocMonad m => MarkdownParser m ()
|
2014-09-26 13:32:08 +04:00
|
|
|
inList = do
|
|
|
|
ctx <- stateParserContext <$> getState
|
|
|
|
guard (ctx == ListItemState)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
spnl :: PandocMonad m => ParserT [Char] st m ()
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
spnl = try $ do
|
|
|
|
skipSpaces
|
|
|
|
optional newline
|
|
|
|
skipSpaces
|
|
|
|
notFollowedBy (char '\n')
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
indentSpaces :: PandocMonad m => MarkdownParser m String
|
2007-11-03 23:27:58 +00:00
|
|
|
indentSpaces = try $ do
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2009-04-29 19:28:23 +00:00
|
|
|
count tabStop (char ' ') <|>
|
|
|
|
string "\t" <?> "indentation"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
nonindentSpaces :: PandocMonad m => MarkdownParser m String
|
2007-11-03 23:27:58 +00:00
|
|
|
nonindentSpaces = do
|
2017-03-11 18:42:39 +01:00
|
|
|
n <- skipNonindentSpaces
|
|
|
|
return $ replicate n ' '
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2014-08-12 11:10:48 -07:00
|
|
|
-- returns number of spaces parsed
|
2016-11-28 17:13:46 -05:00
|
|
|
skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces = do
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2017-03-11 18:42:39 +01:00
|
|
|
atMostSpaces (tabStop - 1) <* notFollowedBy spaceChar
|
2009-04-29 19:28:31 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int
|
2014-08-12 11:10:48 -07:00
|
|
|
atMostSpaces n
|
|
|
|
| n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
|
|
|
|
| otherwise = return 0
|
2009-04-29 19:28:31 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
litChar :: PandocMonad m => MarkdownParser m Char
|
2011-12-05 20:55:23 -08:00
|
|
|
litChar = escapedChar'
|
2013-02-15 20:27:29 -08:00
|
|
|
<|> characterReference
|
2011-12-05 20:55:23 -08:00
|
|
|
<|> noneOf "\n"
|
2013-01-16 11:25:17 -08:00
|
|
|
<|> try (newline >> notFollowedBy blankline >> return ' ')
|
2011-12-05 20:55:23 -08:00
|
|
|
|
2007-12-24 04:22:31 +00:00
|
|
|
-- | Parse a sequence of inline elements between square brackets,
|
|
|
|
-- including inlines between balanced pairs of square brackets.
|
2016-11-28 17:13:46 -05:00
|
|
|
inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-19 17:02:59 -07:00
|
|
|
inlinesInBalancedBrackets = do
|
2007-12-24 04:22:31 +00:00
|
|
|
char '['
|
2015-04-19 17:02:59 -07:00
|
|
|
(_, raw) <- withRaw $ charsInBalancedBrackets 1
|
|
|
|
guard $ not $ null raw
|
|
|
|
parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m ()
|
2015-04-19 17:02:59 -07:00
|
|
|
charsInBalancedBrackets 0 = return ()
|
|
|
|
charsInBalancedBrackets openBrackets =
|
|
|
|
(char '[' >> charsInBalancedBrackets (openBrackets + 1))
|
|
|
|
<|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
|
|
|
|
<|> (( (() <$ code)
|
2015-05-13 09:50:16 -07:00
|
|
|
<|> (() <$ (escapedChar'))
|
2015-04-19 17:02:59 -07:00
|
|
|
<|> (newline >> notFollowedBy blankline)
|
|
|
|
<|> skipMany1 (noneOf "[]`\n\\")
|
2015-05-13 09:50:16 -07:00
|
|
|
<|> (() <$ count 1 (oneOf "`\\"))
|
2015-04-19 17:02:59 -07:00
|
|
|
) >> charsInBalancedBrackets openBrackets)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- document structure
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
|
2016-04-10 09:13:53 -07:00
|
|
|
rawTitleBlockLine = do
|
2010-02-28 11:21:19 +00:00
|
|
|
char '%'
|
|
|
|
skipSpaces
|
2016-04-10 09:13:53 -07:00
|
|
|
first <- anyLine
|
|
|
|
rest <- many $ try $ do spaceChar
|
|
|
|
notFollowedBy blankline
|
|
|
|
skipSpaces
|
|
|
|
anyLine
|
|
|
|
return $ trim $ unlines (first:rest)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
|
2016-04-10 09:13:53 -07:00
|
|
|
titleLine = try $ do
|
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
res <- parseFromString (many inline) raw
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ trimInlinesF $ mconcat res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
|
2012-07-26 22:32:53 -07:00
|
|
|
authorsLine = try $ do
|
2016-04-10 09:13:53 -07:00
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
let sep = (char ';' <* spaces) <|> newline
|
|
|
|
let pAuthors = sepEndBy
|
|
|
|
(trimInlinesF . mconcat <$> many
|
|
|
|
(try $ notFollowedBy sep >> inline))
|
|
|
|
sep
|
|
|
|
sequence <$> parseFromString pAuthors raw
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
dateLine = try $ do
|
2016-04-10 09:13:53 -07:00
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
res <- parseFromString (many inline) raw
|
|
|
|
return $ trimInlinesF $ mconcat res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
titleBlock :: PandocMonad m => MarkdownParser m ()
|
2013-08-18 18:39:04 -07:00
|
|
|
titleBlock = pandocTitleBlock <|> mmdTitleBlock
|
2012-08-12 19:27:13 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
|
2012-08-12 19:27:13 -07:00
|
|
|
pandocTitleBlock = try $ do
|
|
|
|
guardEnabled Ext_pandoc_title_block
|
2013-02-12 19:55:37 -08:00
|
|
|
lookAhead (char '%')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
title <- option mempty titleLine
|
2015-04-18 18:34:55 -07:00
|
|
|
author <- option (return []) authorsLine
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
date <- option mempty dateLine
|
2007-11-03 23:27:58 +00:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
let meta' = do title' <- title
|
|
|
|
author' <- author
|
|
|
|
date' <- date
|
|
|
|
return $
|
|
|
|
(if B.isNull title' then id else B.setMeta "title" title')
|
|
|
|
. (if null author' then id else B.setMeta "author" author')
|
|
|
|
. (if B.isNull date' then id else B.setMeta "date" date')
|
|
|
|
$ nullMeta
|
|
|
|
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
|
|
|
|
|
2017-03-04 13:03:41 +01:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2013-08-18 18:39:04 -07:00
|
|
|
yamlMetaBlock = try $ do
|
2013-07-02 20:54:30 -07:00
|
|
|
guardEnabled Ext_yaml_metadata_block
|
2013-07-01 22:29:04 -07:00
|
|
|
pos <- getPosition
|
2013-05-10 22:53:35 -07:00
|
|
|
string "---"
|
|
|
|
blankline
|
2013-10-29 10:57:48 -07:00
|
|
|
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
|
2013-08-07 08:43:42 -07:00
|
|
|
rawYamlLines <- manyTill anyLine stopLine
|
|
|
|
-- by including --- and ..., we allow yaml blocks with just comments:
|
|
|
|
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
|
2013-05-10 22:53:35 -07:00
|
|
|
optional blanklines
|
2017-03-05 01:36:40 +01:00
|
|
|
case Yaml.decodeEither' $ UTF8.fromString rawYaml of
|
|
|
|
Right (Yaml.Object hashmap) -> do
|
|
|
|
let alist = H.toList hashmap
|
|
|
|
mapM_ (\(k, v) -> do
|
|
|
|
if ignorable k
|
|
|
|
then return ()
|
|
|
|
else do
|
|
|
|
v' <- yamlToMeta v
|
2017-03-05 09:28:44 +01:00
|
|
|
let k' = T.unpack k
|
|
|
|
updateState $ \st -> st{ stateMeta' =
|
|
|
|
(do m <- stateMeta' st
|
|
|
|
-- if there's already a value, leave it unchanged
|
|
|
|
case lookupMeta k' m of
|
|
|
|
Just _ -> return m
|
|
|
|
Nothing -> do
|
|
|
|
v'' <- v'
|
|
|
|
return $ B.setMeta (T.unpack k) v'' m)}
|
2017-03-05 01:36:40 +01:00
|
|
|
) alist
|
|
|
|
Right Yaml.Null -> return ()
|
|
|
|
Right _ -> do
|
|
|
|
logMessage $
|
|
|
|
CouldNotParseYamlMetadata "not an object"
|
|
|
|
pos
|
|
|
|
return ()
|
|
|
|
Left err' -> do
|
|
|
|
case err' of
|
|
|
|
InvalidYaml (Just YamlParseException{
|
|
|
|
yamlProblem = problem
|
|
|
|
, yamlContext = _ctxt
|
|
|
|
, yamlProblemMark = Yaml.YamlMark {
|
|
|
|
yamlLine = yline
|
|
|
|
, yamlColumn = ycol
|
|
|
|
}}) ->
|
|
|
|
logMessage $ CouldNotParseYamlMetadata
|
|
|
|
problem (setSourceLine
|
|
|
|
(setSourceColumn pos
|
|
|
|
(sourceColumn pos + ycol))
|
|
|
|
(sourceLine pos + 1 + yline))
|
|
|
|
_ -> logMessage $ CouldNotParseYamlMetadata
|
|
|
|
(show err') pos
|
|
|
|
return ()
|
2013-08-18 18:39:04 -07:00
|
|
|
return mempty
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2013-07-01 22:39:56 -07:00
|
|
|
-- ignore fields ending with _
|
2013-05-10 22:53:35 -07:00
|
|
|
ignorable :: Text -> Bool
|
2015-04-18 18:34:55 -07:00
|
|
|
ignorable t = (T.pack "_") `T.isSuffixOf` t
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2017-03-05 01:36:40 +01:00
|
|
|
toMetaValue :: PandocMonad m
|
|
|
|
=> Text -> MarkdownParser m (F MetaValue)
|
|
|
|
toMetaValue x = toMeta <$> parseFromString parseBlocks (T.unpack x)
|
2015-02-18 13:04:08 +00:00
|
|
|
where
|
2017-03-05 01:36:40 +01:00
|
|
|
toMeta p = do
|
|
|
|
p' <- p
|
|
|
|
return $
|
|
|
|
case B.toList p' of
|
|
|
|
[Plain xs] -> MetaInlines xs
|
|
|
|
[Para xs]
|
|
|
|
| endsWithNewline x -> MetaBlocks [Para xs]
|
|
|
|
| otherwise -> MetaInlines xs
|
|
|
|
bs -> MetaBlocks bs
|
2015-02-18 13:04:08 +00:00
|
|
|
endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
|
2017-03-05 01:36:40 +01:00
|
|
|
|
|
|
|
yamlToMeta :: PandocMonad m
|
|
|
|
=> Yaml.Value -> MarkdownParser m (F MetaValue)
|
|
|
|
yamlToMeta (Yaml.String t) = toMetaValue t
|
|
|
|
yamlToMeta (Yaml.Number n)
|
2014-04-24 11:09:07 -07:00
|
|
|
-- avoid decimal points for numbers that don't need them:
|
2017-03-05 01:36:40 +01:00
|
|
|
| base10Exponent n >= 0 = return $ return $ MetaString $ show
|
2014-04-24 11:09:07 -07:00
|
|
|
$ coefficient n * (10 ^ base10Exponent n)
|
2017-03-05 01:36:40 +01:00
|
|
|
| otherwise = return $ return $ MetaString $ show n
|
|
|
|
yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
|
|
|
|
yamlToMeta (Yaml.Array xs) = do
|
|
|
|
xs' <- mapM yamlToMeta (V.toList xs)
|
|
|
|
return $ do
|
|
|
|
xs'' <- sequence xs'
|
|
|
|
return $ B.toMetaValue xs''
|
|
|
|
yamlToMeta (Yaml.Object o) = do
|
|
|
|
let alist = H.toList o
|
|
|
|
foldM (\m (k,v) -> do
|
|
|
|
if ignorable k
|
|
|
|
then return m
|
|
|
|
else do
|
|
|
|
v' <- yamlToMeta v
|
|
|
|
return $ do
|
|
|
|
MetaMap m' <- m
|
|
|
|
v'' <- v'
|
|
|
|
return (MetaMap $ M.insert (T.unpack k) v'' m'))
|
|
|
|
(return $ MetaMap M.empty)
|
|
|
|
alist
|
|
|
|
yamlToMeta _ = return $ return $ MetaString ""
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
stopLine :: PandocMonad m => MarkdownParser m ()
|
2013-05-10 22:53:35 -07:00
|
|
|
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
|
2012-08-12 19:27:13 -07:00
|
|
|
mmdTitleBlock = try $ do
|
|
|
|
guardEnabled Ext_mmd_title_block
|
2015-10-26 21:34:06 -07:00
|
|
|
firstPair <- kvPair False
|
|
|
|
restPairs <- many (kvPair True)
|
|
|
|
let kvPairs = firstPair : restPairs
|
2012-08-12 19:27:13 -07:00
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
|
|
|
|
return (Meta $ M.fromList kvPairs) }
|
2012-08-12 19:27:13 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
|
2015-10-26 21:34:06 -07:00
|
|
|
kvPair allowEmpty = try $ do
|
2012-08-12 19:27:13 -07:00
|
|
|
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
|
2015-10-26 21:34:06 -07:00
|
|
|
val <- trim <$> manyTill anyChar
|
2012-08-12 19:27:13 -07:00
|
|
|
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
|
2015-10-26 21:34:06 -07:00
|
|
|
guard $ allowEmpty || not (null val)
|
2012-08-12 19:27:13 -07:00
|
|
|
let key' = concat $ words $ map toLower key
|
2015-10-26 21:34:06 -07:00
|
|
|
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
|
2012-08-12 19:27:13 -07:00
|
|
|
return (key',val')
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
|
2007-11-03 23:27:58 +00:00
|
|
|
parseMarkdown = do
|
2013-08-18 18:39:04 -07:00
|
|
|
optional titleBlock
|
2008-08-04 03:15:34 +00:00
|
|
|
blocks <- parseBlocks
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
st <- getState
|
2017-03-05 01:36:40 +01:00
|
|
|
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
|
|
|
|
meta <- stateMeta' st
|
|
|
|
return $ Pandoc meta bs) st
|
2017-02-17 19:59:54 +01:00
|
|
|
reportLogMessages
|
2017-03-05 01:36:40 +01:00
|
|
|
(do guardEnabled Ext_east_asian_line_breaks
|
|
|
|
return $ bottomUp softBreakFilter doc) <|> return doc
|
2015-12-12 17:28:52 -08:00
|
|
|
|
|
|
|
softBreakFilter :: [Inline] -> [Inline]
|
|
|
|
softBreakFilter (x:SoftBreak:y:zs) =
|
|
|
|
case (stringify x, stringify y) of
|
|
|
|
(xs@(_:_), (c:_))
|
|
|
|
| charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
|
|
|
|
_ -> x:SoftBreak:y:zs
|
|
|
|
softBreakFilter xs = xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
referenceKey = try $ do
|
2013-01-04 12:01:09 -08:00
|
|
|
pos <- getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(_,raw) <- reference
|
2007-11-03 23:27:58 +00:00
|
|
|
char ':'
|
2008-08-10 23:26:32 +00:00
|
|
|
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
|
2010-12-10 12:14:51 -08:00
|
|
|
let sourceURL = liftM unwords $ many $ try $ do
|
2011-01-19 15:06:56 -08:00
|
|
|
skipMany spaceChar
|
2014-05-02 22:58:47 -07:00
|
|
|
notFollowedBy' referenceTitle
|
2015-11-19 22:58:19 -08:00
|
|
|
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ reference)
|
2013-02-15 22:56:53 -08:00
|
|
|
many1 $ notFollowedBy space >> litChar
|
2014-07-16 07:54:44 -07:00
|
|
|
let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
|
2010-12-10 12:14:51 -08:00
|
|
|
src <- try betweenAngles <|> sourceURL
|
2007-11-03 23:27:58 +00:00
|
|
|
tit <- option "" referenceTitle
|
2015-04-02 21:09:08 -07:00
|
|
|
attr <- option nullAttr $ try $
|
2015-11-19 22:58:19 -08:00
|
|
|
guardEnabled Ext_link_attributes >> skipSpaces >> attributes
|
|
|
|
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
|
2015-04-02 21:09:08 -07:00
|
|
|
>> many (try $ spnl >> keyValAttr)
|
2007-11-03 23:27:58 +00:00
|
|
|
blanklines
|
2015-04-02 21:09:08 -07:00
|
|
|
let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
|
|
|
|
target = (escapeURI $ trimr src, tit)
|
2007-11-03 23:27:58 +00:00
|
|
|
st <- getState
|
2012-08-01 22:40:07 -07:00
|
|
|
let oldkeys = stateKeys st
|
2013-01-04 12:01:09 -08:00
|
|
|
let key = toKey raw
|
|
|
|
case M.lookup key oldkeys of
|
2017-02-17 19:59:54 +01:00
|
|
|
Just _ -> logMessage $ DuplicateLinkReference raw pos
|
2013-01-04 12:01:09 -08:00
|
|
|
Nothing -> return ()
|
2015-04-02 21:09:08 -07:00
|
|
|
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return mempty
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
referenceTitle :: PandocMonad m => MarkdownParser m String
|
2011-12-05 20:55:23 -08:00
|
|
|
referenceTitle = try $ do
|
2008-08-10 23:26:32 +00:00
|
|
|
skipSpaces >> optional newline >> skipSpaces
|
2013-03-13 19:18:20 -07:00
|
|
|
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
|
2013-01-16 11:25:17 -08:00
|
|
|
|
|
|
|
-- A link title in quotes
|
2016-11-28 17:13:46 -05:00
|
|
|
quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
|
2013-01-16 11:25:17 -08:00
|
|
|
quotedTitle c = try $ do
|
|
|
|
char c
|
|
|
|
notFollowedBy spaces
|
|
|
|
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
|
2013-02-15 20:27:29 -08:00
|
|
|
let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
|
2013-01-16 11:25:17 -08:00
|
|
|
let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
|
|
|
|
unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-08-19 11:12:18 -07:00
|
|
|
-- | PHP Markdown Extra style abbreviation key. Currently
|
|
|
|
-- we just skip them, since Pandoc doesn't have an element for
|
|
|
|
-- an abbreviation.
|
2016-11-28 17:13:46 -05:00
|
|
|
abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-08-19 11:12:18 -07:00
|
|
|
abbrevKey = do
|
|
|
|
guardEnabled Ext_abbreviations
|
|
|
|
try $ do
|
|
|
|
char '*'
|
|
|
|
reference
|
|
|
|
char ':'
|
|
|
|
skipMany (satisfy (/= '\n'))
|
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return mempty
|
2012-08-19 11:12:18 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
noteMarker :: PandocMonad m => MarkdownParser m String
|
2011-01-19 15:14:23 -08:00
|
|
|
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawLine :: PandocMonad m => MarkdownParser m String
|
2011-02-01 22:35:27 -08:00
|
|
|
rawLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy blankline
|
2010-12-08 08:17:16 -08:00
|
|
|
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
|
2011-02-01 22:35:27 -08:00
|
|
|
optional indentSpaces
|
|
|
|
anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawLines :: PandocMonad m => MarkdownParser m String
|
2011-02-01 22:35:27 -08:00
|
|
|
rawLines = do
|
|
|
|
first <- anyLine
|
|
|
|
rest <- many rawLine
|
|
|
|
return $ unlines (first:rest)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
noteBlock = try $ do
|
2013-01-04 12:08:18 -08:00
|
|
|
pos <- getPosition
|
2010-12-08 08:17:16 -08:00
|
|
|
skipNonindentSpaces
|
2007-11-03 23:27:58 +00:00
|
|
|
ref <- noteMarker
|
|
|
|
char ':'
|
|
|
|
optional blankline
|
|
|
|
optional indentSpaces
|
2012-09-29 11:57:32 -04:00
|
|
|
first <- rawLines
|
|
|
|
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
|
|
|
|
let raw = unlines (first:rest) ++ "\n"
|
2007-11-03 23:27:58 +00:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
parsed <- parseFromString parseBlocks raw
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
let newnote = (ref, parsed)
|
2013-01-04 12:08:18 -08:00
|
|
|
oldnotes <- stateNotes' <$> getState
|
|
|
|
case lookup ref oldnotes of
|
2017-02-17 19:59:54 +01:00
|
|
|
Just _ -> logMessage $ DuplicateNoteReference ref pos
|
2013-01-04 12:08:18 -08:00
|
|
|
Nothing -> return ()
|
|
|
|
updateState $ \s -> s { stateNotes' = newnote : oldnotes }
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return mempty
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- parsing blocks
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
parseBlocks = mconcat <$> manyTill block eof
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
block :: PandocMonad m => MarkdownParser m (F Blocks)
|
2014-02-25 22:43:58 -08:00
|
|
|
block = do
|
|
|
|
pos <- getPosition
|
|
|
|
res <- choice [ mempty <$ blanklines
|
2013-06-19 09:22:34 -07:00
|
|
|
, codeBlockFenced
|
2013-08-18 18:39:04 -07:00
|
|
|
, yamlMetaBlock
|
2013-11-03 21:16:47 -08:00
|
|
|
-- note: bulletList needs to be before header because of
|
|
|
|
-- the possibility of empty list items: -
|
|
|
|
, bulletList
|
2012-07-26 20:29:08 -07:00
|
|
|
, header
|
2013-01-21 21:35:51 -08:00
|
|
|
, lhsCodeBlock
|
2013-08-15 22:39:14 -07:00
|
|
|
, divHtml
|
2012-08-04 10:49:05 -07:00
|
|
|
, htmlBlock
|
2012-07-26 20:29:08 -07:00
|
|
|
, table
|
|
|
|
, codeBlockIndented
|
2017-03-22 21:18:55 +01:00
|
|
|
, latexMacro
|
2015-02-25 08:33:42 -08:00
|
|
|
, rawTeXBlock
|
|
|
|
, lineBlock
|
2012-07-26 20:29:08 -07:00
|
|
|
, blockQuote
|
|
|
|
, hrule
|
|
|
|
, orderedList
|
|
|
|
, definitionList
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, noteBlock
|
|
|
|
, referenceKey
|
2012-08-19 11:12:18 -07:00
|
|
|
, abbrevKey
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, para
|
2012-07-26 20:29:08 -07:00
|
|
|
, plain
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
] <?> "block"
|
2017-02-10 23:59:47 +01:00
|
|
|
report $ ParsingTrace
|
|
|
|
(take 60 $ show $ B.toList $ runF res defaultParserState) pos
|
2014-02-25 22:43:58 -08:00
|
|
|
return res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- header blocks
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
header :: PandocMonad m => MarkdownParser m (F Blocks)
|
2008-07-23 23:10:05 +00:00
|
|
|
header = setextHeader <|> atxHeader <?> "header"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
atxChar :: PandocMonad m => MarkdownParser m Char
|
2014-10-14 13:28:28 +02:00
|
|
|
atxChar = do
|
|
|
|
exts <- getOption readerExtensions
|
2017-01-14 13:06:27 +01:00
|
|
|
return $ if extensionEnabled Ext_literate_haskell exts
|
|
|
|
then '='
|
|
|
|
else '#'
|
2014-10-14 13:28:28 +02:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
atxHeader = try $ do
|
2014-10-14 13:28:28 +02:00
|
|
|
level <- atxChar >>= many1 . char >>= return . length
|
2013-07-29 08:38:46 -07:00
|
|
|
notFollowedBy $ guardEnabled Ext_fancy_lists >>
|
|
|
|
(char '.' <|> char ')') -- this would be a list
|
2017-03-20 21:51:29 +01:00
|
|
|
guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar
|
2007-11-03 23:27:58 +00:00
|
|
|
skipSpaces
|
2015-05-13 23:02:54 -07:00
|
|
|
(text, raw) <- withRaw $
|
|
|
|
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
|
2013-01-10 20:22:18 -08:00
|
|
|
attr <- atxClosing
|
2015-04-02 21:09:08 -07:00
|
|
|
attr' <- registerHeader attr (runF text defaultParserState)
|
2015-05-13 23:02:54 -07:00
|
|
|
guardDisabled Ext_implicit_header_references
|
2015-04-02 21:09:08 -07:00
|
|
|
<|> registerImplicitHeader raw attr'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.headerWith attr' level <$> text
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
atxClosing :: PandocMonad m => MarkdownParser m Attr
|
2013-01-16 09:17:20 -08:00
|
|
|
atxClosing = try $ do
|
|
|
|
attr' <- option nullAttr
|
|
|
|
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
|
2014-10-14 13:28:28 +02:00
|
|
|
skipMany . char =<< atxChar
|
2013-01-16 09:17:20 -08:00
|
|
|
skipSpaces
|
|
|
|
attr <- option attr'
|
|
|
|
(guardEnabled Ext_header_attributes >> attributes)
|
|
|
|
blanklines
|
|
|
|
return attr
|
2013-01-10 20:22:18 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
|
2013-01-16 09:17:20 -08:00
|
|
|
setextHeaderEnd = try $ do
|
|
|
|
attr <- option nullAttr
|
|
|
|
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
|
|
|
|
<|> (guardEnabled Ext_header_attributes >> attributes)
|
|
|
|
blanklines
|
|
|
|
return attr
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
|
2013-01-16 09:17:20 -08:00
|
|
|
mmdHeaderIdentifier = do
|
2017-03-05 16:34:47 +01:00
|
|
|
(_, raw) <- reference
|
|
|
|
let raw' = trim $ stripFirstAndLast raw
|
|
|
|
let ident = concat $ words $ map toLower raw'
|
|
|
|
let attr = (ident, [], [])
|
|
|
|
guardDisabled Ext_implicit_header_references
|
|
|
|
<|> registerImplicitHeader raw' attr
|
2013-01-16 09:17:20 -08:00
|
|
|
skipSpaces
|
2017-03-05 16:34:47 +01:00
|
|
|
return attr
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
setextHeader = try $ do
|
2010-11-28 20:19:32 -08:00
|
|
|
-- This lookahead prevents us from wasting time parsing Inlines
|
|
|
|
-- unless necessary -- it gives a significant performance boost.
|
|
|
|
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
|
2015-07-23 02:29:37 -04:00
|
|
|
skipSpaces
|
2015-05-13 23:02:54 -07:00
|
|
|
(text, raw) <- withRaw $
|
|
|
|
trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
|
2013-01-10 20:22:18 -08:00
|
|
|
attr <- setextHeaderEnd
|
2007-11-22 19:09:38 +00:00
|
|
|
underlineChar <- oneOf setextHChars
|
|
|
|
many (char underlineChar)
|
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
|
2015-04-02 21:09:08 -07:00
|
|
|
attr' <- registerHeader attr (runF text defaultParserState)
|
2015-05-13 23:02:54 -07:00
|
|
|
guardDisabled Ext_implicit_header_references
|
2015-04-02 21:09:08 -07:00
|
|
|
<|> registerImplicitHeader raw attr'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.headerWith attr' level <$> text
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
|
2015-04-02 21:09:08 -07:00
|
|
|
registerImplicitHeader raw attr@(ident, _, _) = do
|
2015-05-13 23:02:54 -07:00
|
|
|
let key = toKey $ "[" ++ raw ++ "]"
|
|
|
|
updateState (\s -> s { stateHeaderKeys =
|
2015-04-02 21:09:08 -07:00
|
|
|
M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
|
2015-05-13 23:02:54 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- hrule block
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
hrule = try $ do
|
|
|
|
skipSpaces
|
2011-01-19 15:06:56 -08:00
|
|
|
start <- satisfy isHruleChar
|
2007-11-03 23:27:58 +00:00
|
|
|
count 2 (skipSpaces >> char start)
|
2009-04-29 19:28:39 +00:00
|
|
|
skipMany (spaceChar <|> char start)
|
2007-11-03 23:27:58 +00:00
|
|
|
newline
|
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return B.horizontalRule
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- code blocks
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
indentedLine :: PandocMonad m => MarkdownParser m String
|
2015-04-18 18:34:55 -07:00
|
|
|
indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
blockDelimiter :: PandocMonad m
|
|
|
|
=> (Char -> Bool)
|
2012-01-28 12:25:24 -08:00
|
|
|
-> Maybe Int
|
2016-11-28 17:13:46 -05:00
|
|
|
-> ParserT [Char] st m Int
|
2012-01-28 12:25:24 -08:00
|
|
|
blockDelimiter f len = try $ do
|
|
|
|
c <- lookAhead (satisfy f)
|
2012-08-21 20:11:10 -07:00
|
|
|
case len of
|
|
|
|
Just l -> count l (char c) >> many (char c) >> return l
|
2015-04-18 18:34:55 -07:00
|
|
|
Nothing -> count 3 (char c) >> many (char c) >>=
|
|
|
|
return . (+ 3) . length
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
attributes :: PandocMonad m => MarkdownParser m Attr
|
2008-02-09 03:19:43 +00:00
|
|
|
attributes = try $ do
|
2008-02-09 03:18:54 +00:00
|
|
|
char '{'
|
2012-02-08 17:18:08 -08:00
|
|
|
spnl
|
2014-07-11 12:51:26 +01:00
|
|
|
attrs <- many (attribute <* spnl)
|
2008-02-09 03:18:54 +00:00
|
|
|
char '}'
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ foldl (\x f -> f x) nullAttr attrs
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
|
2013-03-02 21:08:33 -08:00
|
|
|
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
identifier :: PandocMonad m => MarkdownParser m String
|
2008-02-09 03:19:43 +00:00
|
|
|
identifier = do
|
|
|
|
first <- letter
|
2009-07-03 02:33:52 +00:00
|
|
|
rest <- many $ alphaNum <|> oneOf "-_:."
|
2008-02-09 03:19:43 +00:00
|
|
|
return (first:rest)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
identifierAttr = try $ do
|
|
|
|
char '#'
|
|
|
|
result <- identifier
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ \(_,cs,kvs) -> (result,cs,kvs)
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
classAttr = try $ do
|
|
|
|
char '.'
|
2013-01-17 11:24:45 -08:00
|
|
|
result <- identifier
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
keyValAttr = try $ do
|
|
|
|
key <- identifier
|
|
|
|
char '='
|
2013-02-12 11:27:42 -08:00
|
|
|
val <- enclosed (char '"') (char '"') litChar
|
|
|
|
<|> enclosed (char '\'') (char '\'') litChar
|
|
|
|
<|> many (escapedChar' <|> noneOf " \t\n\r}")
|
2015-09-25 23:01:34 -07:00
|
|
|
return $ \(id',cs,kvs) ->
|
|
|
|
case key of
|
|
|
|
"id" -> (val,cs,kvs)
|
|
|
|
"class" -> (id',cs ++ words val,kvs)
|
|
|
|
_ -> (id',cs,kvs ++ [(key,val)])
|
2013-03-02 21:08:33 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
|
2013-03-02 21:08:33 -08:00
|
|
|
specialAttr = do
|
|
|
|
char '-'
|
|
|
|
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-08-21 19:21:51 -07:00
|
|
|
codeBlockFenced = try $ do
|
2013-01-23 13:07:07 -08:00
|
|
|
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
|
|
|
|
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
|
|
|
|
size <- blockDelimiter (== c) Nothing
|
2012-08-21 20:11:10 -07:00
|
|
|
skipMany spaceChar
|
|
|
|
attr <- option ([],[],[]) $
|
2013-01-23 13:07:07 -08:00
|
|
|
try (guardEnabled Ext_fenced_code_attributes >> attributes)
|
2014-05-27 12:44:39 -07:00
|
|
|
<|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
|
2012-08-21 20:11:10 -07:00
|
|
|
blankline
|
2013-01-23 13:07:07 -08:00
|
|
|
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
|
2008-02-09 03:19:17 +00:00
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2014-05-27 12:44:39 -07:00
|
|
|
-- correctly handle github language identifiers
|
|
|
|
toLanguageId :: String -> String
|
|
|
|
toLanguageId = map toLower . go
|
2017-03-04 13:03:41 +01:00
|
|
|
where go "c++" = "cpp"
|
2014-05-27 12:44:39 -07:00
|
|
|
go "objective-c" = "objectivec"
|
2017-03-04 13:03:41 +01:00
|
|
|
go x = x
|
2014-05-27 12:44:39 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
|
2008-02-09 03:18:54 +00:00
|
|
|
codeBlockIndented = do
|
2012-07-26 22:32:53 -07:00
|
|
|
contents <- many1 (indentedLine <|>
|
2007-11-03 23:27:58 +00:00
|
|
|
try (do b <- blanklines
|
|
|
|
l <- indentedLine
|
|
|
|
return $ b ++ l))
|
|
|
|
optional blanklines
|
2012-07-25 20:42:15 -07:00
|
|
|
classes <- getOption readerIndentedCodeClasses
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeBlockWith ("", classes, []) $
|
2009-12-05 04:46:57 +00:00
|
|
|
stripTrailingNewlines $ concat contents
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2008-12-02 22:41:51 +00:00
|
|
|
lhsCodeBlock = do
|
2012-08-08 23:18:19 -07:00
|
|
|
guardEnabled Ext_literate_haskell
|
2015-04-18 18:34:55 -07:00
|
|
|
(return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
lhsCodeBlockInverseBird)
|
2008-12-02 22:42:55 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
|
2008-12-02 22:42:55 +00:00
|
|
|
lhsCodeBlockLaTeX = try $ do
|
|
|
|
string "\\begin{code}"
|
|
|
|
manyTill spaceChar newline
|
|
|
|
contents <- many1Till anyChar (try $ string "\\end{code}")
|
|
|
|
blanklines
|
|
|
|
return $ stripTrailingNewlines contents
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockBirdWith c = try $ do
|
2008-12-02 22:41:51 +00:00
|
|
|
pos <- getPosition
|
|
|
|
when (sourceColumn pos /= 1) $ fail "Not in first column"
|
2010-03-14 23:23:20 +00:00
|
|
|
lns <- many1 $ birdTrackLine c
|
2008-12-02 22:41:51 +00:00
|
|
|
-- if (as is normal) there is always a space after >, drop it
|
|
|
|
let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
|
|
|
|
then map (drop 1) lns
|
|
|
|
else lns
|
|
|
|
blanklines
|
2008-12-02 22:42:55 +00:00
|
|
|
return $ intercalate "\n" lns'
|
2008-12-02 22:41:51 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
|
2011-03-02 11:18:38 -08:00
|
|
|
birdTrackLine c = try $ do
|
2010-03-14 23:23:20 +00:00
|
|
|
char c
|
2011-03-02 11:18:38 -08:00
|
|
|
-- allow html tags on left margin:
|
|
|
|
when (c == '<') $ notFollowedBy letter
|
2013-01-25 18:32:15 -08:00
|
|
|
anyLine
|
2008-12-02 22:41:51 +00:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- block quotes
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
|
2014-07-11 12:51:26 +01:00
|
|
|
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
|
2007-11-03 23:27:58 +00:00
|
|
|
emailBlockQuote = try $ do
|
|
|
|
emailBlockQuoteStart
|
2012-09-29 11:57:32 -04:00
|
|
|
let emailLine = many $ nonEndline <|> try
|
|
|
|
(endline >> notFollowedBy emailBlockQuoteStart >>
|
|
|
|
return '\n')
|
|
|
|
let emailSep = try (newline >> emailBlockQuoteStart)
|
|
|
|
first <- emailLine
|
|
|
|
rest <- many $ try $ emailSep >> emailLine
|
|
|
|
let raw = first:rest
|
2007-11-03 23:27:58 +00:00
|
|
|
newline <|> (eof >> return '\n')
|
|
|
|
optional blanklines
|
|
|
|
return raw
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-07-26 22:32:53 -07:00
|
|
|
blockQuote = do
|
2007-12-08 19:32:18 +00:00
|
|
|
raw <- emailBlockQuote
|
2007-11-03 23:27:58 +00:00
|
|
|
-- parse the extracted block, which may contain various block elements:
|
2015-04-18 18:34:55 -07:00
|
|
|
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
|
|
|
|
return $ B.blockQuote <$> contents
|
2012-07-26 22:32:53 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- list blocks
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
bulletListStart :: PandocMonad m => MarkdownParser m ()
|
2007-11-03 23:27:58 +00:00
|
|
|
bulletListStart = try $ do
|
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
|
2011-01-19 15:06:56 -08:00
|
|
|
satisfy isBulletListMarker
|
2014-08-12 11:10:48 -07:00
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
() <$ atMostSpaces (tabStop - (endpos - startpos))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
|
2007-11-03 23:27:58 +00:00
|
|
|
anyOrderedListStart = try $ do
|
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
|
2014-08-12 11:10:48 -07:00
|
|
|
res <- do guardDisabled Ext_fancy_lists
|
2014-10-18 13:57:48 -07:00
|
|
|
start <- many1 digit >>= safeRead
|
2014-08-12 11:10:48 -07:00
|
|
|
char '.'
|
2014-10-18 13:57:48 -07:00
|
|
|
return (start, DefaultStyle, DefaultDelim)
|
2014-08-12 11:10:48 -07:00
|
|
|
<|> do (num, style, delim) <- anyOrderedListMarker
|
|
|
|
-- if it could be an abbreviated first name,
|
|
|
|
-- insist on more than one space
|
|
|
|
when (delim == Period && (style == UpperAlpha ||
|
|
|
|
(style == UpperRoman &&
|
|
|
|
num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
|
|
|
|
() <$ spaceChar
|
|
|
|
return (num, style, delim)
|
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
atMostSpaces (tabStop - (endpos - startpos))
|
|
|
|
return res
|
2008-09-12 00:05:32 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
listStart :: PandocMonad m => MarkdownParser m ()
|
2015-04-18 18:34:55 -07:00
|
|
|
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
listLine :: PandocMonad m => MarkdownParser m String
|
2008-09-12 00:05:32 +00:00
|
|
|
listLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy' (do indentSpaces
|
2013-12-19 20:43:25 -05:00
|
|
|
many spaceChar
|
2008-09-12 00:05:32 +00:00
|
|
|
listStart)
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser
|
2014-05-04 16:21:18 -07:00
|
|
|
optional (() <$ indentSpaces)
|
2014-08-12 11:10:48 -07:00
|
|
|
listLineCommon
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
listLineCommon :: PandocMonad m => MarkdownParser m String
|
2014-08-12 11:10:48 -07:00
|
|
|
listLineCommon = concat <$> manyTill
|
2013-12-07 19:56:54 -08:00
|
|
|
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
|
|
|
|
<|> liftM snd (htmlTag isCommentTag)
|
|
|
|
<|> count 1 anyChar
|
|
|
|
) newline
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
2016-11-28 17:13:46 -05:00
|
|
|
rawListItem :: PandocMonad m
|
|
|
|
=> MarkdownParser m a
|
|
|
|
-> MarkdownParser m String
|
2012-01-02 17:04:59 -08:00
|
|
|
rawListItem start = try $ do
|
|
|
|
start
|
2014-08-12 11:10:48 -07:00
|
|
|
first <- listLineCommon
|
2013-11-03 21:16:47 -08:00
|
|
|
rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
|
2007-11-03 23:27:58 +00:00
|
|
|
blanks <- many blankline
|
2013-04-19 20:23:50 -07:00
|
|
|
return $ unlines (first:rest) ++ blanks
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
-- continuation of a list item - indented and separated by blankline
|
2007-11-03 23:27:58 +00:00
|
|
|
-- or (in compact lists) endline.
|
|
|
|
-- note: nested lists are parsed as continuations
|
2016-11-28 17:13:46 -05:00
|
|
|
listContinuation :: PandocMonad m => MarkdownParser m String
|
2008-09-12 00:05:32 +00:00
|
|
|
listContinuation = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
lookAhead indentSpaces
|
2008-09-12 00:05:32 +00:00
|
|
|
result <- many1 listContinuationLine
|
2007-11-03 23:27:58 +00:00
|
|
|
blanks <- many blankline
|
|
|
|
return $ concat result ++ blanks
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser = do
|
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
case inHtmlBlock of
|
|
|
|
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
|
|
|
|
Nothing -> return ()
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
listContinuationLine :: PandocMonad m => MarkdownParser m String
|
2008-09-12 00:05:32 +00:00
|
|
|
listContinuationLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy blankline
|
2008-09-12 00:05:32 +00:00
|
|
|
notFollowedBy' listStart
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser
|
2007-11-03 23:27:58 +00:00
|
|
|
optional indentSpaces
|
2013-01-25 18:32:15 -08:00
|
|
|
result <- anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
return $ result ++ "\n"
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
listItem :: PandocMonad m
|
|
|
|
=> MarkdownParser m a
|
|
|
|
-> MarkdownParser m (F Blocks)
|
2012-04-13 11:12:18 -07:00
|
|
|
listItem start = try $ do
|
2012-01-02 17:04:59 -08:00
|
|
|
first <- rawListItem start
|
2008-09-12 00:05:32 +00:00
|
|
|
continuations <- many listContinuation
|
2007-11-03 23:27:58 +00:00
|
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
|
|
-- count as list item markers, even if not separated by blank space.
|
|
|
|
-- see definition of "endline"
|
|
|
|
state <- getState
|
|
|
|
let oldContext = stateParserContext state
|
|
|
|
setState $ state {stateParserContext = ListItemState}
|
|
|
|
-- parse the extracted block, which may contain various block elements:
|
|
|
|
let raw = concat (first:continuations)
|
|
|
|
contents <- parseFromString parseBlocks raw
|
|
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
|
|
return contents
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
orderedList = try $ do
|
|
|
|
(start, style, delim) <- lookAhead anyOrderedListStart
|
2013-12-19 20:19:24 -05:00
|
|
|
unless (style `elem` [DefaultStyle, Decimal, Example] &&
|
|
|
|
delim `elem` [DefaultDelim, Period]) $
|
2012-07-27 15:45:47 -07:00
|
|
|
guardEnabled Ext_fancy_lists
|
2012-07-27 16:00:27 -07:00
|
|
|
when (style == Example) $ guardEnabled Ext_example_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ listItem
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
( try $ do
|
|
|
|
optional newline -- if preceded by Plain block in a list
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
skipNonindentSpaces
|
2014-08-12 11:10:48 -07:00
|
|
|
res <- orderedListMarker style delim
|
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
atMostSpaces (tabStop - (endpos - startpos))
|
|
|
|
return res )
|
2012-07-27 15:45:47 -07:00
|
|
|
start' <- option 1 $ guardEnabled Ext_startnum >> return start
|
2017-01-27 21:36:45 +01:00
|
|
|
return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
bulletList = do
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ listItem bulletListStart
|
2017-01-27 21:36:45 +01:00
|
|
|
return $ B.bulletList <$> fmap compactify items
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- definition lists
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
defListMarker :: PandocMonad m => MarkdownParser m ()
|
2009-12-07 08:26:53 +00:00
|
|
|
defListMarker = do
|
|
|
|
sps <- nonindentSpaces
|
|
|
|
char ':' <|> char '~'
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2009-12-07 08:26:53 +00:00
|
|
|
let remaining = tabStop - (length sps + 1)
|
|
|
|
if remaining > 0
|
2015-04-18 10:13:32 -07:00
|
|
|
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
|
2012-07-20 14:19:06 -07:00
|
|
|
else mzero
|
2009-12-07 08:26:53 +00:00
|
|
|
return ()
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
|
2014-07-20 16:33:59 -07:00
|
|
|
definitionListItem compact = try $ do
|
|
|
|
rawLine' <- anyLine
|
|
|
|
raw <- many1 $ defRawBlock compact
|
2015-04-18 18:34:55 -07:00
|
|
|
term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
|
2015-04-26 11:20:53 -07:00
|
|
|
contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
|
2014-07-20 16:33:59 -07:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ liftM2 (,) term (sequence contents)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
|
2014-07-20 16:33:59 -07:00
|
|
|
defRawBlock compact = try $ do
|
|
|
|
hasBlank <- option False $ blankline >> return True
|
2009-12-07 08:26:53 +00:00
|
|
|
defListMarker
|
2007-11-03 23:27:58 +00:00
|
|
|
firstline <- anyLine
|
2014-07-20 16:33:59 -07:00
|
|
|
let dline = try
|
|
|
|
( do notFollowedBy blankline
|
2015-05-03 15:06:40 -07:00
|
|
|
notFollowedByHtmlCloser
|
2014-07-20 16:33:59 -07:00
|
|
|
if compact -- laziness not compatible with compact
|
|
|
|
then () <$ indentSpaces
|
|
|
|
else (() <$ indentSpaces)
|
|
|
|
<|> notFollowedBy defListMarker
|
|
|
|
anyLine )
|
|
|
|
rawlines <- many dline
|
|
|
|
cont <- liftM concat $ many $ try $ do
|
|
|
|
trailing <- option "" blanklines
|
|
|
|
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
|
|
|
|
lns <- many dline
|
|
|
|
return $ trailing ++ unlines (ln:lns)
|
|
|
|
return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
|
|
|
|
if hasBlank || not (null cont) then "\n\n" else ""
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
definitionList = try $ do
|
2015-04-18 18:34:55 -07:00
|
|
|
lookAhead (anyLine >>
|
|
|
|
optional (blankline >> notFollowedBy (table >> return ())) >>
|
2015-04-18 10:13:32 -07:00
|
|
|
-- don't capture table caption as def list!
|
|
|
|
defListMarker)
|
2014-07-20 16:33:59 -07:00
|
|
|
compactDefinitionList <|> normalDefinitionList
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
compactDefinitionList = do
|
|
|
|
guardEnabled Ext_compact_definition_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ definitionListItem True
|
2017-01-27 21:36:45 +01:00
|
|
|
return $ B.definitionList <$> fmap compactifyDL items
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
normalDefinitionList = do
|
|
|
|
guardEnabled Ext_definition_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ definitionListItem False
|
|
|
|
return $ B.definitionList <$> items
|
2014-07-20 16:33:59 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- paragraph block
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
para :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-07-26 22:32:53 -07:00
|
|
|
para = try $ do
|
2013-01-14 20:53:08 -08:00
|
|
|
exts <- getOption readerExtensions
|
2015-04-18 18:34:55 -07:00
|
|
|
result <- trimInlinesF . mconcat <$> many1 inline
|
|
|
|
option (B.plain <$> result)
|
2013-01-14 20:53:08 -08:00
|
|
|
$ try $ do
|
|
|
|
newline
|
|
|
|
(blanklines >> return mempty)
|
2013-09-08 11:49:13 -07:00
|
|
|
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
|
|
|
|
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
|
|
|
|
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
|
|
|
|
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
|
2014-09-26 13:32:08 +04:00
|
|
|
-- Avoid creating a paragraph in a nested list.
|
|
|
|
notFollowedBy' inList >>
|
2013-09-08 11:49:13 -07:00
|
|
|
() <$ lookAhead listStart)
|
2014-08-31 12:55:47 -07:00
|
|
|
<|> do guardEnabled Ext_native_divs
|
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
case inHtmlBlock of
|
|
|
|
Just "div" -> () <$
|
|
|
|
lookAhead (htmlTag (~== TagClose "div"))
|
|
|
|
_ -> mzero
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
result' <- result
|
|
|
|
case B.toList result' of
|
2015-04-02 21:09:08 -07:00
|
|
|
[Image attr alt (src,tit)]
|
2017-01-14 13:06:27 +01:00
|
|
|
| Ext_implicit_figures `extensionEnabled` exts ->
|
2013-01-15 08:45:46 -08:00
|
|
|
-- the fig: at beginning of title indicates a figure
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.para $ B.singleton
|
2015-04-02 21:09:08 -07:00
|
|
|
$ Image attr alt (src,'f':'i':'g':':':tit)
|
2015-04-18 18:34:55 -07:00
|
|
|
_ -> return $ B.para result'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
plain :: PandocMonad m => MarkdownParser m (F Blocks)
|
2015-04-18 18:34:55 -07:00
|
|
|
plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
-- raw html
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
htmlElement :: PandocMonad m => MarkdownParser m String
|
2013-12-15 12:27:29 -08:00
|
|
|
htmlElement = rawVerbatimBlock
|
|
|
|
<|> strictHtmlBlock
|
|
|
|
<|> liftM snd (htmlTag isBlockTag)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-08-12 22:04:23 -07:00
|
|
|
htmlBlock = do
|
|
|
|
guardEnabled Ext_raw_html
|
2014-07-07 15:47:51 -06:00
|
|
|
try (do
|
|
|
|
(TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
|
|
|
|
(guard (t `elem` ["pre","style","script"]) >>
|
2015-04-18 18:34:55 -07:00
|
|
|
(return . B.rawBlock "html") <$> rawVerbatimBlock)
|
2014-07-20 17:44:28 -07:00
|
|
|
<|> (do guardEnabled Ext_markdown_attribute
|
|
|
|
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
|
|
|
|
markdownAttribute <-
|
|
|
|
case lookup "markdown" attrs of
|
|
|
|
Just "0" -> False <$ updateState (\st -> st{
|
|
|
|
stateMarkdownAttribute = False })
|
|
|
|
Just _ -> True <$ updateState (\st -> st{
|
|
|
|
stateMarkdownAttribute = True })
|
|
|
|
Nothing -> return oldMarkdownAttribute
|
|
|
|
res <- if markdownAttribute
|
|
|
|
then rawHtmlBlocks
|
|
|
|
else htmlBlock'
|
|
|
|
updateState $ \st -> st{ stateMarkdownAttribute =
|
|
|
|
oldMarkdownAttribute }
|
|
|
|
return res)
|
2014-07-07 15:47:51 -06:00
|
|
|
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
|
|
|
|
<|> htmlBlock'
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-07-26 20:29:08 -07:00
|
|
|
htmlBlock' = try $ do
|
2008-01-03 21:32:32 +00:00
|
|
|
first <- htmlElement
|
2014-07-07 15:47:51 -06:00
|
|
|
skipMany spaceChar
|
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.rawBlock "html" first
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
|
2012-07-24 21:41:57 -07:00
|
|
|
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
|
2016-02-21 07:55:35 -08:00
|
|
|
rawVerbatimBlock = htmlInBalanced isVerbTag
|
|
|
|
where isVerbTag (TagOpen "pre" _) = True
|
|
|
|
isVerbTag (TagOpen "style" _) = True
|
|
|
|
isVerbTag (TagOpen "script" _) = True
|
|
|
|
isVerbTag _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2017-03-22 21:18:55 +01:00
|
|
|
latexMacro :: PandocMonad m => MarkdownParser m (F Blocks)
|
|
|
|
latexMacro = try $ do
|
|
|
|
guardEnabled Ext_latex_macros
|
|
|
|
skipNonindentSpaces
|
|
|
|
res <- macro
|
|
|
|
return $ return res
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2011-01-03 13:20:57 -08:00
|
|
|
rawTeXBlock = do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_raw_tex
|
2014-06-23 12:41:47 -07:00
|
|
|
result <- (B.rawBlock "latex" . concat <$>
|
2015-04-18 18:34:55 -07:00
|
|
|
rawLaTeXBlock `sepEndBy1` blankline)
|
2014-06-23 12:41:47 -07:00
|
|
|
<|> (B.rawBlock "context" . concat <$>
|
|
|
|
rawConTeXtEnvironment `sepEndBy1` blankline)
|
2011-01-03 13:20:57 -08:00
|
|
|
spaces
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return result
|
2011-01-03 13:20:57 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
rawHtmlBlocks = do
|
2014-07-07 15:47:51 -06:00
|
|
|
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
|
|
|
|
-- try to find closing tag
|
|
|
|
-- we set stateInHtmlBlock so that closing tags that can be either block or
|
|
|
|
-- inline will not be parsed as inline tags
|
|
|
|
oldInHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
|
|
|
|
let closer = htmlTag (\x -> x ~== TagClose tagtype)
|
|
|
|
contents <- mconcat <$> many (notFollowedBy' closer >> block)
|
|
|
|
result <-
|
|
|
|
(closer >>= \(_, rawcloser) -> return (
|
2015-04-18 18:34:55 -07:00
|
|
|
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
|
2014-07-07 15:47:51 -06:00
|
|
|
contents <>
|
2015-04-18 18:34:55 -07:00
|
|
|
return (B.rawBlock "html" rawcloser)))
|
|
|
|
<|> return (return (B.rawBlock "html" raw) <> contents)
|
2014-07-07 15:47:51 -06:00
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
|
|
|
|
return result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-08-15 09:42:16 -07:00
|
|
|
-- remove markdown="1" attribute
|
|
|
|
stripMarkdownAttribute :: String -> String
|
|
|
|
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
|
|
|
|
where filterAttrib (TagOpen t as) = TagOpen t
|
|
|
|
[(k,v) | (k,v) <- as, k /= "markdown"]
|
|
|
|
filterAttrib x = x
|
|
|
|
|
2013-01-13 12:34:18 -08:00
|
|
|
--
|
|
|
|
-- line block
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
|
2013-01-13 12:34:18 -08:00
|
|
|
lineBlock = try $ do
|
|
|
|
guardEnabled Ext_line_blocks
|
|
|
|
lines' <- lineBlockLines >>=
|
2015-04-18 18:34:55 -07:00
|
|
|
mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
|
2016-10-13 08:46:44 +02:00
|
|
|
return $ B.lineBlock <$> sequence lines'
|
2013-01-13 12:34:18 -08:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- Tables
|
2012-07-26 22:32:53 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a dashed line with optional trailing spaces; return its length
|
|
|
|
-- and the length including trailing space.
|
2016-11-28 17:13:46 -05:00
|
|
|
dashedLine :: PandocMonad m
|
|
|
|
=> Char
|
|
|
|
-> ParserT [Char] st m (Int, Int)
|
2007-11-03 23:27:58 +00:00
|
|
|
dashedLine ch = do
|
|
|
|
dashes <- many1 (char ch)
|
|
|
|
sp <- many spaceChar
|
2015-04-07 13:46:14 +01:00
|
|
|
let lengthDashes = length dashes
|
|
|
|
lengthSp = length sp
|
|
|
|
return (lengthDashes, lengthDashes + lengthSp)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
-- Parse a table header with dashed lines of '-' preceded by
|
2009-12-05 21:34:46 +00:00
|
|
|
-- one (or zero) line of text.
|
2016-11-28 17:13:46 -05:00
|
|
|
simpleTableHeader :: PandocMonad m
|
|
|
|
=> Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
|
2009-12-05 21:34:46 +00:00
|
|
|
simpleTableHeader headless = try $ do
|
|
|
|
rawContent <- if headless
|
|
|
|
then return ""
|
|
|
|
else anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
2008-07-11 16:33:21 +00:00
|
|
|
let (lengths, lines') = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines'
|
2009-12-05 21:34:46 +00:00
|
|
|
-- If no header, calculate alignment on basis of first row of text
|
2012-01-27 00:39:00 -08:00
|
|
|
rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
|
2009-12-05 21:34:46 +00:00
|
|
|
if headless
|
2012-07-26 22:32:53 -07:00
|
|
|
then lookAhead anyLine
|
2009-12-05 21:34:46 +00:00
|
|
|
else return rawContent
|
2007-11-03 23:27:58 +00:00
|
|
|
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
|
2009-12-05 21:34:46 +00:00
|
|
|
let rawHeads' = if headless
|
|
|
|
then replicate (length dashes) ""
|
2012-07-26 22:32:53 -07:00
|
|
|
else rawHeads
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence
|
|
|
|
$ mapM (parseFromString (mconcat <$> many plain))
|
|
|
|
$ map trim rawHeads'
|
2010-07-05 23:43:07 -07:00
|
|
|
return (heads, aligns, indices)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- Returns an alignment type for a table, based on a list of strings
|
|
|
|
-- (the rows of the column header) and a number (the length of the
|
|
|
|
-- dashed line under the rows.
|
|
|
|
alignType :: [String]
|
|
|
|
-> Int
|
|
|
|
-> Alignment
|
|
|
|
alignType [] _ = AlignDefault
|
|
|
|
alignType strLst len =
|
2012-09-29 17:09:34 -04:00
|
|
|
let nonempties = filter (not . null) $ map trimr strLst
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(leftSpace, rightSpace) =
|
|
|
|
case sortBy (comparing length) nonempties of
|
2017-03-04 13:03:41 +01:00
|
|
|
(x:_) -> (head x `elem` " \t", length x < len)
|
|
|
|
[] -> (False, False)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
in case (leftSpace, rightSpace) of
|
2017-03-04 13:03:41 +01:00
|
|
|
(True, False) -> AlignRight
|
|
|
|
(False, True) -> AlignLeft
|
|
|
|
(True, True) -> AlignCenter
|
|
|
|
(False, False) -> AlignDefault
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- Parse a table footer - dashed lines followed by blank line.
|
2016-11-28 17:13:46 -05:00
|
|
|
tableFooter :: PandocMonad m => MarkdownParser m String
|
2009-04-29 19:28:31 +00:00
|
|
|
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a table separator - dashed line.
|
2016-11-28 17:13:46 -05:00
|
|
|
tableSep :: PandocMonad m => MarkdownParser m Char
|
2009-12-05 21:34:46 +00:00
|
|
|
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a raw line and split it into chunks by indices.
|
2016-11-28 17:13:46 -05:00
|
|
|
rawTableLine :: PandocMonad m
|
|
|
|
=> [Int]
|
|
|
|
-> MarkdownParser m [String]
|
2007-11-03 23:27:58 +00:00
|
|
|
rawTableLine indices = do
|
|
|
|
notFollowedBy' (blanklines <|> tableFooter)
|
|
|
|
line <- many1Till anyChar newline
|
2012-09-29 17:09:34 -04:00
|
|
|
return $ map trim $ tail $
|
2012-01-27 00:39:00 -08:00
|
|
|
splitStringByIndices (init indices) line
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a table line and return a list of lists of blocks (columns).
|
2016-11-28 17:13:46 -05:00
|
|
|
tableLine :: PandocMonad m
|
|
|
|
=> [Int]
|
|
|
|
-> MarkdownParser m (F [Blocks])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
tableLine indices = rawTableLine indices >>=
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap sequence . mapM (parseFromString (mconcat <$> many plain))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a multiline table row and return a list of blocks (columns).
|
2016-11-28 17:13:46 -05:00
|
|
|
multilineRow :: PandocMonad m
|
|
|
|
=> [Int]
|
|
|
|
-> MarkdownParser m (F [Blocks])
|
2007-11-03 23:27:58 +00:00
|
|
|
multilineRow indices = do
|
|
|
|
colLines <- many1 (rawTableLine indices)
|
|
|
|
let cols = map unlines $ transpose colLines
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parses a table caption: inlines beginning with 'Table:'
|
|
|
|
-- and followed by blank lines.
|
2016-11-28 17:13:46 -05:00
|
|
|
tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
tableCaption = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_table_captions
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
2010-07-06 21:01:26 -07:00
|
|
|
string ":" <|> string "Table:"
|
2015-04-18 18:34:55 -07:00
|
|
|
trimInlinesF . mconcat <$> many1 inline <* blanklines
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a simple table with '---' header and one line per row.
|
2016-11-28 17:13:46 -05:00
|
|
|
simpleTable :: PandocMonad m
|
|
|
|
=> Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2009-12-05 21:34:46 +00:00
|
|
|
simpleTable headless = do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(aligns, _widths, heads', lines') <-
|
|
|
|
tableWith (simpleTableHeader headless) tableLine
|
2010-07-05 23:43:07 -07:00
|
|
|
(return ())
|
|
|
|
(if headless then tableFooter else tableFooter <|> blanklines)
|
2009-11-28 03:22:33 +00:00
|
|
|
-- Simple tables get 0s for relative column widths (i.e., use default)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return (aligns, replicate (length aligns) 0, heads', lines')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a multiline table: starts with row of '-' on top, then header
|
|
|
|
-- (which may be multiline), then the rows,
|
|
|
|
-- which may be multiline, separated by blank lines, and
|
|
|
|
-- ending with a footer (dashed line followed by blank line).
|
2016-11-28 17:13:46 -05:00
|
|
|
multilineTable :: PandocMonad m
|
|
|
|
=> Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2009-12-05 21:34:46 +00:00
|
|
|
multilineTable headless =
|
2012-07-24 09:06:13 -07:00
|
|
|
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
|
2009-12-05 21:34:46 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
multilineTableHeader :: PandocMonad m
|
|
|
|
=> Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
|
2009-12-05 21:34:46 +00:00
|
|
|
multilineTableHeader headless = try $ do
|
2014-06-16 20:48:55 -07:00
|
|
|
unless headless $
|
|
|
|
tableSep >> notFollowedBy blankline
|
2009-12-05 21:34:46 +00:00
|
|
|
rawContent <- if headless
|
2012-07-26 22:32:53 -07:00
|
|
|
then return $ repeat ""
|
2014-06-16 21:26:50 -07:00
|
|
|
else many1 $ notFollowedBy tableSep >> anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
2008-07-11 16:33:21 +00:00
|
|
|
let (lengths, lines') = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines'
|
2009-12-05 21:34:46 +00:00
|
|
|
rawHeadsList <- if headless
|
|
|
|
then liftM (map (:[]) . tail .
|
2012-01-27 00:39:00 -08:00
|
|
|
splitStringByIndices (init indices)) $ lookAhead anyLine
|
2012-07-26 22:32:53 -07:00
|
|
|
else return $ transpose $ map
|
2014-01-07 23:39:49 -08:00
|
|
|
(tail . splitStringByIndices (init indices))
|
2009-12-05 21:34:46 +00:00
|
|
|
rawContent
|
2007-11-03 23:27:58 +00:00
|
|
|
let aligns = zipWith alignType rawHeadsList lengths
|
2009-12-05 21:34:46 +00:00
|
|
|
let rawHeads = if headless
|
|
|
|
then replicate (length dashes) ""
|
2014-01-07 23:39:49 -08:00
|
|
|
else map (unlines . map trim) rawHeadsList
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence $
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
mapM (parseFromString (mconcat <$> many plain)) $
|
2012-09-29 17:09:34 -04:00
|
|
|
map trim rawHeads
|
2010-07-05 23:43:07 -07:00
|
|
|
return (heads, aligns, indices)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- Parse a grid table: starts with row of '-' on top, then header
|
|
|
|
-- (which may be grid), then the rows,
|
|
|
|
-- which may be grid, separated by blank lines, and
|
|
|
|
-- ending with a footer (dashed line followed by blank line).
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTable :: PandocMonad m => Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTable headless =
|
|
|
|
tableWith (gridTableHeader headless) gridTableRow
|
|
|
|
(gridTableSep '-') gridTableFooter
|
|
|
|
|
|
|
|
gridTableSplitLine :: [Int] -> String -> [String]
|
|
|
|
gridTableSplitLine indices line = map removeFinalBar $ tail $
|
2012-09-29 17:09:34 -04:00
|
|
|
splitStringByIndices (init indices) $ trimr line
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridPart ch = do
|
2016-11-15 16:41:54 +01:00
|
|
|
leftColon <- option False (True <$ char ':')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
dashes <- many1 (char ch)
|
2016-11-15 16:41:54 +01:00
|
|
|
rightColon <- option False (True <$ char ':')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
char '+'
|
2016-11-15 16:41:54 +01:00
|
|
|
let lengthDashes = length dashes + (if leftColon then 1 else 0) +
|
|
|
|
(if rightColon then 1 else 0)
|
|
|
|
let alignment = case (leftColon, rightColon) of
|
|
|
|
(True, True) -> AlignCenter
|
|
|
|
(True, False) -> AlignLeft
|
|
|
|
(False, True) -> AlignRight
|
|
|
|
(False, False) -> AlignDefault
|
|
|
|
return ((lengthDashes, lengthDashes + 1), alignment)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)]
|
2014-07-11 12:51:26 +01:00
|
|
|
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
|
|
|
removeFinalBar :: String -> String
|
|
|
|
removeFinalBar =
|
|
|
|
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
|
|
|
|
|
|
|
|
-- | Separator between rows of grid table.
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
|
|
|
|
|
|
|
|
-- | Parse header for a grid table.
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table
|
|
|
|
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableHeader headless = try $ do
|
|
|
|
optional blanklines
|
|
|
|
dashes <- gridDashedLines '-'
|
|
|
|
rawContent <- if headless
|
2016-11-15 16:41:54 +01:00
|
|
|
then return []
|
|
|
|
else many1 (try (char '|' >> anyLine))
|
|
|
|
underDashes <- if headless
|
|
|
|
then return dashes
|
|
|
|
else gridDashedLines '='
|
|
|
|
guard $ length dashes == length underDashes
|
|
|
|
let lines' = map (snd . fst) underDashes
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
let indices = scanl (+) 0 lines'
|
2016-11-15 16:41:54 +01:00
|
|
|
let aligns = map snd underDashes
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
let rawHeads = if headless
|
2016-11-15 16:41:54 +01:00
|
|
|
then replicate (length underDashes) ""
|
2014-01-07 23:39:49 -08:00
|
|
|
else map (unlines . map trim) $ transpose
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
$ map (gridTableSplitLine indices) rawContent
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return (heads, aligns, indices)
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String]
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableRawLine indices = do
|
|
|
|
char '|'
|
2016-11-15 16:41:54 +01:00
|
|
|
line <- anyLine
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return (gridTableSplitLine indices line)
|
|
|
|
|
|
|
|
-- | Parse row of grid table.
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTableRow :: PandocMonad m => [Int]
|
|
|
|
-> MarkdownParser m (F [Blocks])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableRow indices = do
|
|
|
|
colLines <- many1 (gridTableRawLine indices)
|
|
|
|
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
|
|
|
|
transpose colLines
|
2017-01-27 21:36:45 +01:00
|
|
|
fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
|
|
|
removeOneLeadingSpace :: [String] -> [String]
|
|
|
|
removeOneLeadingSpace xs =
|
|
|
|
if all startsWithSpace xs
|
|
|
|
then map (drop 1) xs
|
|
|
|
else xs
|
2017-03-04 13:03:41 +01:00
|
|
|
where startsWithSpace "" = True
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
startsWithSpace (y:_) = y == ' '
|
|
|
|
|
|
|
|
-- | Parse footer for a grid table.
|
2016-11-28 17:13:46 -05:00
|
|
|
gridTableFooter :: PandocMonad m => MarkdownParser m [Char]
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableFooter = blanklines
|
2010-07-05 23:43:07 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
|
2014-05-01 09:23:21 -07:00
|
|
|
pipeBreak = try $ do
|
|
|
|
nonindentSpaces
|
|
|
|
openPipe <- (True <$ char '|') <|> return False
|
|
|
|
first <- pipeTableHeaderPart
|
|
|
|
rest <- many $ sepPipe *> pipeTableHeaderPart
|
|
|
|
-- surrounding pipes needed for a one-column table:
|
|
|
|
guard $ not (null rest && not openPipe)
|
|
|
|
optional (char '|')
|
|
|
|
blankline
|
2015-10-30 12:37:08 -07:00
|
|
|
return $ unzip (first:rest)
|
2014-05-01 09:23:21 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2012-08-01 23:34:48 -07:00
|
|
|
pipeTable = try $ do
|
2015-07-27 10:24:06 -07:00
|
|
|
nonindentSpaces
|
|
|
|
lookAhead nonspaceChar
|
2015-12-03 11:02:45 -08:00
|
|
|
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
|
2016-03-09 10:11:32 -08:00
|
|
|
let heads' = take (length aligns) <$> heads
|
2015-12-03 11:02:45 -08:00
|
|
|
lines' <- many pipeTableRow
|
2016-03-09 10:11:32 -08:00
|
|
|
let lines'' = map (take (length aligns) <$>) lines'
|
2015-12-03 11:02:45 -08:00
|
|
|
let maxlength = maximum $
|
2016-03-09 10:11:32 -08:00
|
|
|
map (\x -> length . stringify $ runF x def) (heads' : lines'')
|
2015-10-30 12:37:08 -07:00
|
|
|
numColumns <- getOption readerColumns
|
|
|
|
let widths = if maxlength > numColumns
|
|
|
|
then map (\len ->
|
|
|
|
fromIntegral (len + 1) / fromIntegral numColumns)
|
|
|
|
seplengths
|
|
|
|
else replicate (length aligns) 0.0
|
2016-03-09 10:11:32 -08:00
|
|
|
return $ (aligns, widths, heads', sequence lines'')
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
sepPipe :: PandocMonad m => MarkdownParser m ()
|
2012-08-10 13:29:37 -07:00
|
|
|
sepPipe = try $ do
|
|
|
|
char '|' <|> char '+'
|
|
|
|
notFollowedBy blankline
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2012-08-10 13:29:37 -07:00
|
|
|
-- parse a row, also returning probable alignments for org-table cells
|
2016-11-28 17:13:46 -05:00
|
|
|
pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
|
2016-03-09 08:33:13 -08:00
|
|
|
pipeTableRow = try $ do
|
|
|
|
scanForPipe
|
2015-07-27 10:24:06 -07:00
|
|
|
skipMany spaceChar
|
2014-05-01 09:23:21 -07:00
|
|
|
openPipe <- (True <$ char '|') <|> return False
|
2016-03-09 11:46:00 -08:00
|
|
|
-- split into cells
|
2017-03-02 10:25:12 +01:00
|
|
|
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
|
2016-03-09 11:46:00 -08:00
|
|
|
<|> void (noneOf "|\n\r")
|
|
|
|
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
|
|
|
|
parseFromString pipeTableCell
|
|
|
|
cells <- cellContents `sepEndBy1` (char '|')
|
2014-05-01 09:23:21 -07:00
|
|
|
-- surrounding pipes needed for a one-column table:
|
2016-03-09 08:44:31 -08:00
|
|
|
guard $ not (length cells == 1 && not openPipe)
|
2016-03-09 11:46:00 -08:00
|
|
|
blankline
|
|
|
|
return $ sequence cells
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
|
2016-03-09 11:46:00 -08:00
|
|
|
pipeTableCell = do
|
|
|
|
result <- many inline
|
|
|
|
if null result
|
|
|
|
then return mempty
|
|
|
|
else return $ B.plain . mconcat <$> sequence result
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
|
2013-03-02 18:59:50 -08:00
|
|
|
pipeTableHeaderPart = try $ do
|
|
|
|
skipMany spaceChar
|
2012-07-22 22:09:15 -07:00
|
|
|
left <- optionMaybe (char ':')
|
2015-10-30 12:37:08 -07:00
|
|
|
pipe <- many1 (char '-')
|
2012-07-22 22:09:15 -07:00
|
|
|
right <- optionMaybe (char ':')
|
2013-03-02 18:59:50 -08:00
|
|
|
skipMany spaceChar
|
2015-10-30 12:37:08 -07:00
|
|
|
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
|
2012-07-22 22:09:15 -07:00
|
|
|
return $
|
2015-10-30 12:37:08 -07:00
|
|
|
((case (left,right) of
|
|
|
|
(Nothing,Nothing) -> AlignDefault
|
|
|
|
(Just _,Nothing) -> AlignLeft
|
|
|
|
(Nothing,Just _) -> AlignRight
|
|
|
|
(Just _,Just _) -> AlignCenter), len)
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2012-07-24 07:13:49 -07:00
|
|
|
-- Succeed only if current line contains a pipe.
|
2016-11-28 17:13:46 -05:00
|
|
|
scanForPipe :: PandocMonad m => ParserT [Char] st m ()
|
2013-01-25 16:13:58 -08:00
|
|
|
scanForPipe = do
|
|
|
|
inp <- getInput
|
|
|
|
case break (\c -> c == '\n' || c == '|') inp of
|
|
|
|
(_,'|':_) -> return ()
|
|
|
|
_ -> mzero
|
2012-07-24 07:13:49 -07:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- | Parse a table using 'headerParser', 'rowParser',
|
|
|
|
-- 'lineParser', and 'footerParser'. Variant of the version in
|
|
|
|
-- Text.Pandoc.Parsing.
|
2016-11-28 17:13:46 -05:00
|
|
|
tableWith :: PandocMonad m
|
|
|
|
=> MarkdownParser m (F [Blocks], [Alignment], [Int])
|
|
|
|
-> ([Int] -> MarkdownParser m (F [Blocks]))
|
|
|
|
-> MarkdownParser m sep
|
|
|
|
-> MarkdownParser m end
|
|
|
|
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
tableWith headerParser rowParser lineParser footerParser = try $ do
|
|
|
|
(heads, aligns, indices) <- headerParser
|
2015-04-18 18:34:55 -07:00
|
|
|
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
footerParser
|
|
|
|
numColumns <- getOption readerColumns
|
2015-04-18 18:34:55 -07:00
|
|
|
let widths = if (indices == [])
|
|
|
|
then replicate (length aligns) 0.0
|
|
|
|
else widthsFromIndices numColumns indices
|
|
|
|
return $ (aligns, widths, heads, lines')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
table :: PandocMonad m => MarkdownParser m (F Blocks)
|
2012-07-24 09:06:13 -07:00
|
|
|
table = try $ do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
frontCaption <- option Nothing (Just <$> tableCaption)
|
|
|
|
(aligns, widths, heads, lns) <-
|
2012-08-01 23:34:48 -07:00
|
|
|
try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
try (guardEnabled Ext_multiline_tables >>
|
|
|
|
multilineTable False) <|>
|
|
|
|
try (guardEnabled Ext_simple_tables >>
|
|
|
|
(simpleTable True <|> simpleTable False)) <|>
|
|
|
|
try (guardEnabled Ext_multiline_tables >>
|
|
|
|
multilineTable True) <|>
|
|
|
|
try (guardEnabled Ext_grid_tables >>
|
2012-07-26 20:29:08 -07:00
|
|
|
(gridTable False <|> gridTable True)) <?> "table"
|
2012-07-24 09:24:28 -07:00
|
|
|
optional blanklines
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
caption <- case frontCaption of
|
2017-03-04 13:03:41 +01:00
|
|
|
Nothing -> option (return mempty) tableCaption
|
|
|
|
Just c -> return c
|
2016-01-07 10:40:30 -08:00
|
|
|
-- renormalize widths if greater than 100%:
|
|
|
|
let totalWidth = sum widths
|
|
|
|
let widths' = if totalWidth < 1
|
|
|
|
then widths
|
|
|
|
else map (/ totalWidth) widths
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
caption' <- caption
|
|
|
|
heads' <- heads
|
|
|
|
lns' <- lns
|
2016-01-07 10:40:30 -08:00
|
|
|
return $ B.table caption' (zip aligns widths') heads' lns'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-24 09:06:13 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
-- inline
|
|
|
|
--
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
inline :: PandocMonad m => MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
inline = choice [ whitespace
|
2012-09-27 13:43:48 -07:00
|
|
|
, bareURL
|
2011-01-22 16:04:32 -08:00
|
|
|
, str
|
2007-11-03 23:27:58 +00:00
|
|
|
, endline
|
|
|
|
, code
|
2013-07-20 21:14:38 -07:00
|
|
|
, strongOrEmph
|
2007-11-03 23:27:58 +00:00
|
|
|
, note
|
2010-11-12 00:37:44 -08:00
|
|
|
, cite
|
2016-09-28 12:33:05 +02:00
|
|
|
, bracketedSpan
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, link
|
2007-11-03 23:27:58 +00:00
|
|
|
, image
|
|
|
|
, math
|
|
|
|
, strikeout
|
|
|
|
, subscript
|
2013-07-20 21:14:38 -07:00
|
|
|
, superscript
|
2010-12-12 20:30:55 -08:00
|
|
|
, inlineNote -- after superscript because of ^[link](/foo)^
|
2007-11-03 23:27:58 +00:00
|
|
|
, autoLink
|
2013-08-15 22:39:14 -07:00
|
|
|
, spanHtml
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
, rawHtmlInline
|
2007-11-03 23:27:58 +00:00
|
|
|
, escapedChar
|
2012-01-29 23:54:00 -08:00
|
|
|
, rawLaTeXInline'
|
2010-03-24 10:51:38 -07:00
|
|
|
, exampleRef
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, smart
|
2015-04-18 18:34:55 -07:00
|
|
|
, return . B.singleton <$> charRef
|
2015-11-13 12:06:39 -08:00
|
|
|
, emoji
|
2007-11-03 23:27:58 +00:00
|
|
|
, symbol
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, ltSign
|
|
|
|
] <?> "inline"
|
2007-12-24 04:22:31 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
escapedChar' :: PandocMonad m => MarkdownParser m Char
|
2011-12-05 20:27:10 -08:00
|
|
|
escapedChar' = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
char '\\'
|
2012-07-26 20:29:08 -07:00
|
|
|
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
|
2016-10-22 23:41:55 +02:00
|
|
|
<|> (guardEnabled Ext_angle_brackets_escapable >>
|
|
|
|
oneOf "\\`*_{}[]()>#+-.!~\"<>")
|
2017-01-08 10:01:19 +01:00
|
|
|
<|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
|
2013-01-16 11:25:17 -08:00
|
|
|
<|> oneOf "\\`*_{}[]()>#+-.!~\""
|
2011-12-05 20:27:10 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
|
2011-12-05 20:27:10 -08:00
|
|
|
escapedChar = do
|
|
|
|
result <- escapedChar'
|
2012-07-27 09:18:51 -07:00
|
|
|
case result of
|
2015-04-18 18:34:55 -07:00
|
|
|
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
|
2012-07-27 09:18:51 -07:00
|
|
|
'\n' -> guardEnabled Ext_escaped_line_breaks >>
|
2015-04-18 18:34:55 -07:00
|
|
|
return (return B.linebreak) -- "\[newline]" is a linebreak
|
|
|
|
_ -> return $ return $ B.str [result]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
ltSign = do
|
2012-08-13 20:04:17 -07:00
|
|
|
guardDisabled Ext_raw_html
|
2014-07-20 17:22:29 -07:00
|
|
|
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
|
2012-07-26 20:29:08 -07:00
|
|
|
char '<'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.str "<"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
|
2010-03-24 10:51:38 -07:00
|
|
|
exampleRef = try $ do
|
2012-07-27 16:00:27 -07:00
|
|
|
guardEnabled Ext_example_lists
|
2010-03-24 10:51:38 -07:00
|
|
|
char '@'
|
|
|
|
lab <- many1 (alphaNum <|> oneOf "-_")
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
st <- askF
|
|
|
|
return $ case M.lookup lab (stateExamples st) of
|
2017-03-04 13:03:41 +01:00
|
|
|
Just n -> B.str (show n)
|
|
|
|
Nothing -> B.str ('@':lab)
|
2010-03-24 10:51:38 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
|
2012-07-26 22:32:53 -07:00
|
|
|
symbol = do
|
2011-01-24 22:12:42 -08:00
|
|
|
result <- noneOf "<\\\n\t "
|
|
|
|
<|> try (do lookAhead $ char '\\'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ rawTeXBlock)
|
2011-01-24 22:12:42 -08:00
|
|
|
char '\\')
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.str [result]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- parses inline code, between n `s and n `s
|
2016-11-28 17:13:46 -05:00
|
|
|
code :: PandocMonad m => MarkdownParser m (F Inlines)
|
2012-07-26 22:32:53 -07:00
|
|
|
code = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
starts <- many1 (char '`')
|
|
|
|
skipSpaces
|
|
|
|
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
|
2010-03-06 02:42:15 +00:00
|
|
|
(char '\n' >> notFollowedBy' blankline >> return " "))
|
2012-07-26 22:32:53 -07:00
|
|
|
(try (skipSpaces >> count (length starts) (char '`') >>
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy (char '`')))
|
2016-12-24 15:34:07 +01:00
|
|
|
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
|
|
|
|
>> attributes)
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeWith attr $ trim $ concat result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
math :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
|
|
|
|
<|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
|
2017-01-14 18:27:06 +01:00
|
|
|
(guardEnabled Ext_smart *> (return <$> apostrophe)
|
2016-02-04 12:06:32 -08:00
|
|
|
<* notFollowedBy (space <|> satisfy isPunctuation))
|
2008-08-13 03:02:42 +00:00
|
|
|
|
2013-07-20 21:14:38 -07:00
|
|
|
-- Parses material enclosed in *s, **s, _s, or __s.
|
|
|
|
-- Designed to avoid backtracking.
|
2016-11-28 17:13:46 -05:00
|
|
|
enclosure :: PandocMonad m
|
|
|
|
=> Char
|
|
|
|
-> MarkdownParser m (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
enclosure c = do
|
2014-07-10 14:37:10 -07:00
|
|
|
-- we can't start an enclosure with _ if after a string and
|
|
|
|
-- the intraword_underscores extension is enabled:
|
|
|
|
guardDisabled Ext_intraword_underscores
|
|
|
|
<|> guard (c == '*')
|
|
|
|
<|> (guard =<< notAfterString)
|
2013-07-20 21:14:38 -07:00
|
|
|
cs <- many1 (char c)
|
2015-04-18 18:34:55 -07:00
|
|
|
(return (B.str cs) <>) <$> whitespace
|
|
|
|
<|> do
|
2014-07-10 14:37:10 -07:00
|
|
|
case length cs of
|
2017-03-04 13:03:41 +01:00
|
|
|
3 -> three c
|
|
|
|
2 -> two c mempty
|
|
|
|
1 -> one c mempty
|
|
|
|
_ -> return (return $ B.str cs)
|
2013-07-20 21:14:38 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
|
2014-07-10 14:51:08 -07:00
|
|
|
ender c n = try $ do
|
|
|
|
count n (char c)
|
|
|
|
guard (c == '*')
|
|
|
|
<|> guardDisabled Ext_intraword_underscores
|
|
|
|
<|> notFollowedBy alphaNum
|
|
|
|
|
2013-07-20 21:14:38 -07:00
|
|
|
-- Parse inlines til you hit one c or a sequence of two cs.
|
|
|
|
-- If one c, emit emph and then parse two.
|
|
|
|
-- If two cs, emit strong and then parse one.
|
2013-11-22 19:41:08 -08:00
|
|
|
-- Otherwise, emit ccc then the results.
|
2016-11-28 17:13:46 -05:00
|
|
|
three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
three c = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 3 >> return ((B.strong . B.emph) <$> contents))
|
|
|
|
<|> (ender c 2 >> one c (B.strong <$> contents))
|
|
|
|
<|> (ender c 1 >> two c (B.emph <$> contents))
|
|
|
|
<|> return (return (B.str [c,c,c]) <> contents)
|
2013-07-20 21:14:38 -07:00
|
|
|
|
|
|
|
-- Parse inlines til you hit two c's, and emit strong.
|
|
|
|
-- If you never do hit two cs, emit ** plus inlines parsed.
|
2016-11-28 17:13:46 -05:00
|
|
|
two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
two c prefix' = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 2 >> return (B.strong <$> (prefix' <> contents)))
|
|
|
|
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
|
2013-07-20 21:14:38 -07:00
|
|
|
|
|
|
|
-- Parse inlines til you hit a c, and emit emph.
|
|
|
|
-- If you never hit a c, emit * plus inlines parsed.
|
2016-11-28 17:13:46 -05:00
|
|
|
one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
one c prefix' = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
|
2013-07-20 21:14:38 -07:00
|
|
|
<|> try (string [c,c] >>
|
2014-07-10 14:51:08 -07:00
|
|
|
notFollowedBy (ender c 1) >>
|
2013-11-22 19:41:08 -08:00
|
|
|
two c mempty) )
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 1 >> return (B.emph <$> (prefix' <> contents)))
|
|
|
|
<|> return (return (B.str [c]) <> (prefix' <> contents))
|
2013-07-20 21:14:38 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
|
2014-07-10 14:37:10 -07:00
|
|
|
strongOrEmph = enclosure '*' <|> enclosure '_'
|
2011-07-30 18:08:49 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
-- | Parses a list of inlines between start and end delimiters.
|
2016-11-28 17:13:46 -05:00
|
|
|
inlinesBetween :: PandocMonad m
|
|
|
|
=> (Show b)
|
|
|
|
=> MarkdownParser m a
|
|
|
|
-> MarkdownParser m b
|
|
|
|
-> MarkdownParser m (F Inlines)
|
2011-07-30 18:08:49 -07:00
|
|
|
inlinesBetween start end =
|
2015-04-18 18:34:55 -07:00
|
|
|
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
|
2014-07-11 12:51:26 +01:00
|
|
|
innerSpace = try $ whitespace <* notFollowedBy' end
|
2009-11-29 19:31:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
strikeout = fmap B.strikeout <$>
|
2012-07-26 20:29:08 -07:00
|
|
|
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
|
2011-07-30 19:43:20 -07:00
|
|
|
where strikeStart = string "~~" >> lookAhead nonspaceChar
|
|
|
|
>> notFollowedBy (char '~')
|
|
|
|
strikeEnd = try $ string "~~"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
superscript = fmap B.superscript <$> try (do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
guardEnabled Ext_superscript
|
|
|
|
char '^'
|
|
|
|
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
subscript = fmap B.subscript <$> try (do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
guardEnabled Ext_subscript
|
|
|
|
char '~'
|
|
|
|
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
|
|
|
|
regsp = skipMany spaceChar >> return B.space
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
nonEndline :: PandocMonad m => ParserT [Char] st m Char
|
2007-11-03 23:27:58 +00:00
|
|
|
nonEndline = satisfy (/='\n')
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
str :: PandocMonad m => MarkdownParser m (F Inlines)
|
2009-04-30 04:39:45 +00:00
|
|
|
str = do
|
2017-03-05 10:24:39 +01:00
|
|
|
result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
|
2014-05-14 14:45:37 +02:00
|
|
|
updateLastStrPos
|
2017-03-05 10:24:39 +01:00
|
|
|
(do guardEnabled Ext_smart
|
|
|
|
abbrevs <- getOption readerAbbreviations
|
|
|
|
if not (null result) && last result == '.' && result `Set.member` abbrevs
|
|
|
|
then try (do ils <- whitespace <|> endline
|
|
|
|
lookAhead alphaNum
|
|
|
|
return $ do
|
|
|
|
ils' <- ils
|
|
|
|
if ils' == B.space
|
|
|
|
then return (B.str result <> B.str "\160")
|
|
|
|
else -- linebreak or softbreak
|
|
|
|
return (ils' <> B.str result <> B.str "\160"))
|
|
|
|
<|> return (return (B.str result))
|
|
|
|
else return (return (B.str result)))
|
|
|
|
<|> return (return (B.str result))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
2016-11-28 17:13:46 -05:00
|
|
|
endline :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
endline = try $ do
|
|
|
|
newline
|
|
|
|
notFollowedBy blankline
|
2014-02-04 10:05:52 -08:00
|
|
|
-- parse potential list-starts differently if in a list:
|
2014-09-26 13:32:08 +04:00
|
|
|
notFollowedBy (inList >> listStart)
|
2013-09-07 09:36:37 -07:00
|
|
|
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
|
2014-10-14 13:28:28 +02:00
|
|
|
guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
|
2014-03-24 09:56:16 -07:00
|
|
|
guardDisabled Ext_backtick_code_blocks <|>
|
2013-09-08 11:43:46 -07:00
|
|
|
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
|
2014-08-31 12:55:47 -07:00
|
|
|
notFollowedByHtmlCloser
|
2014-06-20 11:10:35 -07:00
|
|
|
(eof >> return mempty)
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
|
2013-07-17 15:38:56 -07:00
|
|
|
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
|
2015-12-11 15:58:11 -08:00
|
|
|
<|> (skipMany spaceChar >> return (return B.softbreak))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- links
|
|
|
|
--
|
|
|
|
|
|
|
|
-- a reference label for a link
|
2016-11-28 17:13:46 -05:00
|
|
|
reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
|
2007-12-24 04:22:31 +00:00
|
|
|
reference = do notFollowedBy' (string "[^") -- footnote reference
|
2015-04-18 18:34:55 -07:00
|
|
|
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
|
2013-03-13 19:18:20 -07:00
|
|
|
parenthesizedChars = do
|
|
|
|
result <- charsInBalanced '(' ')' litChar
|
|
|
|
return $ '(' : result ++ ")"
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- source for a link, with optional title
|
2016-11-28 17:13:46 -05:00
|
|
|
source :: PandocMonad m => MarkdownParser m (String, String)
|
2013-01-16 11:25:17 -08:00
|
|
|
source = do
|
|
|
|
char '('
|
2007-12-24 04:22:41 +00:00
|
|
|
skipSpaces
|
2014-08-14 10:56:41 -07:00
|
|
|
let urlChunk =
|
|
|
|
try parenthesizedChars
|
|
|
|
<|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
|
|
|
|
<|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
|
2013-01-16 11:25:17 -08:00
|
|
|
let sourceURL = (unwords . words . concat) <$> many urlChunk
|
2011-12-05 21:13:24 -08:00
|
|
|
let betweenAngles = try $
|
2013-01-16 11:25:17 -08:00
|
|
|
char '<' >> manyTill litChar (char '>')
|
2010-12-10 12:14:51 -08:00
|
|
|
src <- try betweenAngles <|> sourceURL
|
2013-01-16 11:25:17 -08:00
|
|
|
tit <- option "" $ try $ spnl >> linkTitle
|
2007-11-03 23:27:58 +00:00
|
|
|
skipSpaces
|
2013-01-16 11:25:17 -08:00
|
|
|
char ')'
|
2012-09-29 17:09:34 -04:00
|
|
|
return (escapeURI $ trimr src, tit)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
linkTitle :: PandocMonad m => MarkdownParser m String
|
2013-02-15 20:27:29 -08:00
|
|
|
linkTitle = quotedTitle '"' <|> quotedTitle '\''
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
link :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
link = try $ do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
st <- getState
|
|
|
|
guard $ stateAllowLinks st
|
|
|
|
setState $ st{ stateAllowLinks = False }
|
|
|
|
(lab,raw) <- reference
|
|
|
|
setState $ st{ stateAllowLinks = True }
|
2015-04-02 21:09:08 -07:00
|
|
|
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
|
2016-09-28 12:33:05 +02:00
|
|
|
bracketedSpan = try $ do
|
|
|
|
guardEnabled Ext_bracketed_spans
|
|
|
|
(lab,_) <- reference
|
|
|
|
attr <- attributes
|
2017-03-04 14:38:09 +01:00
|
|
|
return $ if isSmallCaps attr
|
|
|
|
then B.smallcaps <$> lab
|
|
|
|
else B.spanWith attr <$> lab
|
|
|
|
|
|
|
|
-- | We treat a span as SmallCaps if class is "smallcaps" (and
|
|
|
|
-- no other attributes are set or if style is "font-variant:small-caps"
|
|
|
|
-- (and no other attributes are set).
|
|
|
|
isSmallCaps :: Attr -> Bool
|
|
|
|
isSmallCaps ("",["smallcaps"],[]) = True
|
|
|
|
isSmallCaps ("",[],kvs) =
|
|
|
|
case lookup "style" kvs of
|
|
|
|
Just s -> map toLower (filter (`notElem` " \t;") s) ==
|
|
|
|
"font-variant:small-caps"
|
|
|
|
Nothing -> False
|
|
|
|
isSmallCaps _ = False
|
2016-09-28 12:33:05 +02:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
regLink :: PandocMonad m
|
|
|
|
=> (Attr -> String -> String -> Inlines -> Inlines)
|
|
|
|
-> F Inlines
|
|
|
|
-> MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
regLink constructor lab = try $ do
|
|
|
|
(src, tit) <- source
|
2015-04-02 21:09:08 -07:00
|
|
|
attr <- option nullAttr $
|
2015-11-19 22:58:19 -08:00
|
|
|
guardEnabled Ext_link_attributes >> attributes
|
2015-11-19 22:41:12 -08:00
|
|
|
return $ constructor attr src tit <$> lab
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- a link like [this][ref] or [this][] or [this]
|
2016-11-28 17:13:46 -05:00
|
|
|
referenceLink :: PandocMonad m
|
|
|
|
=> (Attr -> String -> String -> Inlines -> Inlines)
|
|
|
|
-> (F Inlines, String)
|
|
|
|
-> MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
referenceLink constructor (lab, raw) = do
|
2013-06-19 08:56:11 -07:00
|
|
|
sp <- (True <$ lookAhead (char ' ')) <|> return False
|
2015-05-13 23:02:54 -07:00
|
|
|
(_,raw') <- option (mempty, "") $
|
2016-11-05 21:14:20 +01:00
|
|
|
lookAhead (try (guardEnabled Ext_citations >>
|
|
|
|
spnl >> normalCite >> return (mempty, "")))
|
2014-12-15 10:50:10 -08:00
|
|
|
<|>
|
|
|
|
try (spnl >> reference)
|
2015-03-03 03:28:56 +02:00
|
|
|
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
|
2013-01-03 20:32:15 -08:00
|
|
|
let labIsRef = raw' == "" || raw' == "[]"
|
|
|
|
let key = toKey $ if labIsRef then raw else raw'
|
2013-06-19 08:56:11 -07:00
|
|
|
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
|
2015-04-02 21:09:08 -07:00
|
|
|
fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
|
2013-01-03 20:32:15 -08:00
|
|
|
implicitHeaderRefs <- option False $
|
|
|
|
True <$ guardEnabled Ext_implicit_header_references
|
2015-04-18 18:34:55 -07:00
|
|
|
let makeFallback = do
|
|
|
|
parsedRaw' <- parsedRaw
|
|
|
|
fallback' <- fallback
|
|
|
|
return $ B.str "[" <> fallback' <> B.str "]" <>
|
2013-06-19 09:05:30 -07:00
|
|
|
(if sp && not (null raw) then B.space else mempty) <>
|
2015-04-18 18:34:55 -07:00
|
|
|
parsedRaw'
|
|
|
|
return $ do
|
|
|
|
keys <- asksF stateKeys
|
|
|
|
case M.lookup key keys of
|
2015-05-13 23:02:54 -07:00
|
|
|
Nothing ->
|
2015-04-18 18:34:55 -07:00
|
|
|
if implicitHeaderRefs
|
2015-05-13 23:02:54 -07:00
|
|
|
then do
|
|
|
|
headerKeys <- asksF stateHeaderKeys
|
|
|
|
case M.lookup key headerKeys of
|
2015-11-19 22:41:12 -08:00
|
|
|
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
|
2015-04-02 21:09:08 -07:00
|
|
|
Nothing -> makeFallback
|
2015-04-18 18:34:55 -07:00
|
|
|
else makeFallback
|
2015-11-19 22:41:12 -08:00
|
|
|
Just ((src,tit), attr) -> constructor attr src tit <$> lab
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2013-06-19 09:05:30 -07:00
|
|
|
dropBrackets :: String -> String
|
|
|
|
dropBrackets = reverse . dropRB . reverse . dropLB
|
|
|
|
where dropRB (']':xs) = xs
|
2017-03-04 13:03:41 +01:00
|
|
|
dropRB xs = xs
|
2013-06-19 09:05:30 -07:00
|
|
|
dropLB ('[':xs) = xs
|
2017-03-04 13:03:41 +01:00
|
|
|
dropLB xs = xs
|
2013-06-19 09:05:30 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
|
2012-09-27 13:43:48 -07:00
|
|
|
bareURL = try $ do
|
2013-01-15 12:44:50 -08:00
|
|
|
guardEnabled Ext_autolink_bare_uris
|
2015-07-14 13:16:20 -07:00
|
|
|
getState >>= guard . stateAllowLinks
|
2012-09-27 13:43:48 -07:00
|
|
|
(orig, src) <- uri <|> emailAddress
|
2013-09-01 15:18:56 -07:00
|
|
|
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.link src "" (B.str orig)
|
2012-09-27 13:43:48 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
autoLink = try $ do
|
2015-07-14 13:16:20 -07:00
|
|
|
getState >>= guard . stateAllowLinks
|
2007-11-03 23:27:58 +00:00
|
|
|
char '<'
|
2010-03-23 15:07:48 -07:00
|
|
|
(orig, src) <- uri <|> emailAddress
|
2013-02-28 22:05:22 -08:00
|
|
|
-- in rare cases, something may remain after the uri parser
|
|
|
|
-- is finished, because the uri parser tries to avoid parsing
|
|
|
|
-- final punctuation. for example: in `<http://hi---there>`,
|
|
|
|
-- the URI parser will stop before the dashes.
|
|
|
|
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
|
2016-10-26 12:18:58 +02:00
|
|
|
attr <- option nullAttr $ try $
|
|
|
|
guardEnabled Ext_link_attributes >> attributes
|
|
|
|
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
image :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
image = try $ do
|
|
|
|
char '!'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(lab,raw) <- reference
|
2013-02-05 20:08:00 -08:00
|
|
|
defaultExt <- getOption readerDefaultImageExtension
|
2015-11-19 22:41:12 -08:00
|
|
|
let constructor attr' src = case takeExtension src of
|
|
|
|
"" -> B.imageWith attr' (addExtension src defaultExt)
|
|
|
|
_ -> B.imageWith attr' src
|
2013-02-05 20:08:00 -08:00
|
|
|
regLink constructor lab <|> referenceLink constructor (lab,raw)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
note :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
note = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_footnotes
|
2007-11-03 23:27:58 +00:00
|
|
|
ref <- noteMarker
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
notes <- asksF stateNotes'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
case lookup ref notes of
|
2015-04-18 18:34:55 -07:00
|
|
|
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
|
|
|
|
Just contents -> do
|
|
|
|
st <- askF
|
|
|
|
-- process the note in a context that doesn't resolve
|
|
|
|
-- notes, to avoid infinite looping with notes inside
|
|
|
|
-- notes:
|
|
|
|
let contents' = runF contents st{ stateNotes' = [] }
|
|
|
|
return $ B.note contents'
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
inlineNote = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_inline_notes
|
2007-11-03 23:27:58 +00:00
|
|
|
char '^'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
contents <- inlinesInBalancedBrackets
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.note . B.para <$> contents
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
|
2011-01-24 22:12:42 -08:00
|
|
|
rawLaTeXInline' = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_raw_tex
|
2012-01-29 23:54:00 -08:00
|
|
|
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
|
2015-04-18 18:34:55 -07:00
|
|
|
RawInline _ s <- rawLaTeXInline
|
|
|
|
return $ return $ B.rawInline "tex" s
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- "tex" because it might be context or latex
|
2008-08-11 07:04:36 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
|
2012-01-29 23:54:00 -08:00
|
|
|
rawConTeXtEnvironment = try $ do
|
2008-08-11 07:04:36 +00:00
|
|
|
string "\\start"
|
|
|
|
completion <- inBrackets (letter <|> digit <|> spaceChar)
|
|
|
|
<|> (many1 letter)
|
2012-01-29 23:54:00 -08:00
|
|
|
contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
|
2008-08-11 07:04:36 +00:00
|
|
|
(try $ string "\\stop" >> string completion)
|
|
|
|
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
|
2008-08-11 07:04:36 +00:00
|
|
|
inBrackets parser = do
|
|
|
|
char '['
|
|
|
|
contents <- many parser
|
|
|
|
char ']'
|
|
|
|
return $ "[" ++ contents ++ "]"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
|
2013-08-15 22:39:14 -07:00
|
|
|
spanHtml = try $ do
|
2014-08-08 21:04:25 -07:00
|
|
|
guardEnabled Ext_native_spans
|
2013-08-15 22:39:14 -07:00
|
|
|
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
|
|
|
|
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
|
2013-12-19 21:07:09 -05:00
|
|
|
let ident = fromMaybe "" $ lookup "id" attrs
|
2013-08-15 22:39:14 -07:00
|
|
|
let classes = maybe [] words $ lookup "class" attrs
|
|
|
|
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
|
2017-03-04 14:38:09 +01:00
|
|
|
return $ if isSmallCaps (ident, classes, keyvals)
|
|
|
|
then B.smallcaps <$> contents
|
|
|
|
else B.spanWith (ident, classes, keyvals) <$> contents
|
2013-08-15 22:39:14 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
|
2013-08-15 22:39:14 -07:00
|
|
|
divHtml = try $ do
|
2014-08-08 21:04:25 -07:00
|
|
|
guardEnabled Ext_native_divs
|
2014-02-26 22:46:38 -08:00
|
|
|
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
|
2014-07-10 15:04:18 -07:00
|
|
|
-- we set stateInHtmlBlock so that closing tags that can be either block or
|
|
|
|
-- inline will not be parsed as inline tags
|
|
|
|
oldInHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
|
2014-02-26 22:46:38 -08:00
|
|
|
bls <- option "" (blankline >> option "" blanklines)
|
|
|
|
contents <- mconcat <$>
|
|
|
|
many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
|
|
|
|
closed <- option False (True <$ htmlTag (~== TagClose "div"))
|
|
|
|
if closed
|
|
|
|
then do
|
2014-07-10 15:04:18 -07:00
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
|
2014-02-26 22:46:38 -08:00
|
|
|
let ident = fromMaybe "" $ lookup "id" attrs
|
|
|
|
let classes = maybe [] words $ lookup "class" attrs
|
|
|
|
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.divWith (ident, classes, keyvals) <$> contents
|
2014-02-26 22:46:38 -08:00
|
|
|
else -- avoid backtracing
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
|
2013-08-15 22:39:14 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
rawHtmlInline = do
|
2012-08-12 22:04:23 -07:00
|
|
|
guardEnabled Ext_raw_html
|
2014-07-07 15:47:51 -06:00
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
let isCloseBlockTag t = case inHtmlBlock of
|
|
|
|
Just t' -> t ~== TagClose t'
|
|
|
|
Nothing -> False
|
2012-07-26 20:29:08 -07:00
|
|
|
mdInHtml <- option False $
|
2014-07-20 17:22:29 -07:00
|
|
|
( guardEnabled Ext_markdown_in_html_blocks
|
|
|
|
<|> guardEnabled Ext_markdown_attribute
|
|
|
|
) >> return True
|
2013-01-25 19:50:34 -08:00
|
|
|
(_,result) <- htmlTag $ if mdInHtml
|
2014-07-07 15:47:51 -06:00
|
|
|
then (\x -> isInlineTag x &&
|
|
|
|
not (isCloseBlockTag x))
|
2013-01-25 19:50:34 -08:00
|
|
|
else not . isTextTag
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.rawInline "html" result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-11-13 12:06:39 -08:00
|
|
|
-- Emoji
|
|
|
|
|
|
|
|
emojiChars :: [Char]
|
|
|
|
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
emoji :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-11-13 12:06:39 -08:00
|
|
|
emoji = try $ do
|
|
|
|
guardEnabled Ext_emoji
|
|
|
|
char ':'
|
|
|
|
emojikey <- many1 (oneOf emojiChars)
|
|
|
|
char ':'
|
|
|
|
case M.lookup emojikey emojis of
|
|
|
|
Just s -> return (return (B.str s))
|
|
|
|
Nothing -> mzero
|
|
|
|
|
2010-11-12 00:37:44 -08:00
|
|
|
-- Citations
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
cite :: PandocMonad m => MarkdownParser m (F Inlines)
|
2010-11-12 00:37:44 -08:00
|
|
|
cite = do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_citations
|
2015-04-18 18:34:55 -07:00
|
|
|
citations <- textualCite
|
|
|
|
<|> do (cs, raw) <- withRaw normalCite
|
|
|
|
return $ (flip B.cite (B.text raw)) <$> cs
|
|
|
|
return citations
|
2013-08-18 23:01:23 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
|
2013-02-06 13:14:37 -08:00
|
|
|
textualCite = try $ do
|
2010-11-18 12:38:45 -08:00
|
|
|
(_, key) <- citeKey
|
2012-10-21 23:16:23 -07:00
|
|
|
let first = Citation{ citationId = key
|
|
|
|
, citationPrefix = []
|
|
|
|
, citationSuffix = []
|
|
|
|
, citationMode = AuthorInText
|
|
|
|
, citationNoteNum = 0
|
|
|
|
, citationHash = 0
|
2010-11-12 00:37:44 -08:00
|
|
|
}
|
2013-09-14 22:27:25 -07:00
|
|
|
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
case mbrest of
|
2013-09-14 22:27:25 -07:00
|
|
|
Just (rest, raw) ->
|
|
|
|
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
|
2015-04-18 18:34:55 -07:00
|
|
|
<$> rest
|
2013-09-14 22:27:25 -07:00
|
|
|
Nothing ->
|
2015-04-17 17:33:55 +03:00
|
|
|
(do
|
|
|
|
(cs, raw) <- withRaw $ bareloc first
|
|
|
|
let (spaces',raw') = span isSpace raw
|
|
|
|
spc | null spaces' = mempty
|
|
|
|
| otherwise = B.space
|
|
|
|
lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
|
2015-04-02 21:09:08 -07:00
|
|
|
fallback <- referenceLink B.linkWith (lab,raw')
|
2015-04-17 17:33:55 +03:00
|
|
|
return $ do
|
|
|
|
fallback' <- fallback
|
|
|
|
cs' <- cs
|
|
|
|
return $
|
|
|
|
case B.toList fallback' of
|
|
|
|
Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
|
|
|
|
_ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> return (do st <- askF
|
|
|
|
return $ case M.lookup key (stateExamples st) of
|
|
|
|
Just n -> B.str (show n)
|
|
|
|
_ -> B.cite [first] $ B.str $ '@':key)
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
|
2010-11-18 12:38:45 -08:00
|
|
|
bareloc c = try $ do
|
|
|
|
spnl
|
|
|
|
char '['
|
2015-04-17 17:33:55 +03:00
|
|
|
notFollowedBy $ char '^'
|
2010-11-18 13:22:20 -08:00
|
|
|
suff <- suffix
|
2015-04-18 18:34:55 -07:00
|
|
|
rest <- option (return []) $ try $ char ';' >> citeList
|
2010-11-18 12:38:45 -08:00
|
|
|
spnl
|
|
|
|
char ']'
|
2015-04-17 17:33:55 +03:00
|
|
|
notFollowedBy $ oneOf "[("
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
suff' <- suff
|
|
|
|
rest' <- rest
|
|
|
|
return $ c{ citationSuffix = B.toList suff' } : rest'
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
|
2010-11-12 00:37:44 -08:00
|
|
|
normalCite = try $ do
|
2010-11-18 12:38:45 -08:00
|
|
|
char '['
|
|
|
|
spnl
|
|
|
|
citations <- citeList
|
|
|
|
spnl
|
|
|
|
char ']'
|
|
|
|
return citations
|
2010-11-12 00:37:44 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
suffix :: PandocMonad m => MarkdownParser m (F Inlines)
|
2010-11-18 12:38:45 -08:00
|
|
|
suffix = try $ do
|
2011-11-09 13:18:01 -08:00
|
|
|
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
2010-11-12 00:37:44 -08:00
|
|
|
spnl
|
2015-04-18 18:34:55 -07:00
|
|
|
rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
|
2011-11-09 13:18:01 -08:00
|
|
|
return $ if hasSpace
|
2015-04-18 18:34:55 -07:00
|
|
|
then (B.space <>) <$> rest
|
2011-11-09 13:18:01 -08:00
|
|
|
else rest
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
prefix :: PandocMonad m => MarkdownParser m (F Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
prefix = trimInlinesF . mconcat <$>
|
2010-11-18 13:22:20 -08:00
|
|
|
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
citeList :: PandocMonad m => MarkdownParser m (F [Citation])
|
2015-04-18 18:34:55 -07:00
|
|
|
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
|
2010-11-12 00:37:44 -08:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
citation :: PandocMonad m => MarkdownParser m (F Citation)
|
2010-11-12 00:37:44 -08:00
|
|
|
citation = try $ do
|
|
|
|
pref <- prefix
|
2010-11-18 12:38:45 -08:00
|
|
|
(suppress_author, key) <- citeKey
|
2010-11-18 13:22:20 -08:00
|
|
|
suff <- suffix
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
x <- pref
|
|
|
|
y <- suff
|
|
|
|
return $ Citation{ citationId = key
|
|
|
|
, citationPrefix = B.toList x
|
|
|
|
, citationSuffix = B.toList y
|
|
|
|
, citationMode = if suppress_author
|
|
|
|
then SuppressAuthor
|
|
|
|
else NormalCitation
|
|
|
|
, citationNoteNum = 0
|
|
|
|
, citationHash = 0
|
|
|
|
}
|
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
smart :: PandocMonad m => MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
smart = do
|
2017-01-14 18:27:06 +01:00
|
|
|
guardEnabled Ext_smart
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
doubleQuoted <|> singleQuoted <|>
|
2015-04-18 18:34:55 -07:00
|
|
|
choice (map (return <$>) [apostrophe, dash, ellipses])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-11-28 17:13:46 -05:00
|
|
|
singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
singleQuoted = try $ do
|
|
|
|
singleQuoteStart
|
|
|
|
withQuoteContext InSingleQuote $
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
many1Till inline singleQuoteEnd
|
|
|
|
|
2013-03-12 19:18:14 -07:00
|
|
|
-- doubleQuoted will handle regular double-quoted sections, as well
|
|
|
|
-- as dialogues with an open double-quote without a close double-quote
|
|
|
|
-- in the same paragraph.
|
2016-11-28 17:13:46 -05:00
|
|
|
doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
doubleQuoted = try $ do
|
|
|
|
doubleQuoteStart
|
2013-03-12 19:18:14 -07:00
|
|
|
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
|
|
|
|
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
|
|
|
<|> (return $ return (B.str "\8220") <> contents)
|