2011-07-30 19:58:46 -07:00
|
|
|
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
|
2015-02-18 13:04:08 +00:00
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
2007-11-03 23:27:58 +00:00
|
|
|
{-
|
2015-04-26 10:18:29 -07:00
|
|
|
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Readers.Markdown
|
2015-04-26 10:18:29 -07:00
|
|
|
Copyright : Copyright (C) 2006-2015 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
License : GNU GPL, version 2 or above
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion of markdown-formatted plain text to 'Pandoc' document.
|
|
|
|
-}
|
2013-01-04 11:32:17 -08:00
|
|
|
module Text.Pandoc.Readers.Markdown ( readMarkdown,
|
|
|
|
readMarkdownWithWarnings ) where
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2016-10-13 08:46:44 +02:00
|
|
|
import Data.List ( transpose, sortBy, findIndex, intercalate )
|
2010-05-08 10:03:02 -07:00
|
|
|
import qualified Data.Map as M
|
2014-04-24 11:09:07 -07:00
|
|
|
import Data.Scientific (coefficient, base10Exponent)
|
2007-11-03 23:27:58 +00:00
|
|
|
import Data.Ord ( comparing )
|
2016-02-04 12:06:32 -08:00
|
|
|
import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
|
2008-08-04 03:15:34 +00:00
|
|
|
import Data.Maybe
|
2007-11-03 23:27:58 +00:00
|
|
|
import Text.Pandoc.Definition
|
2015-11-13 12:06:39 -08:00
|
|
|
import Text.Pandoc.Emoji (emojis)
|
2015-12-12 17:28:52 -08:00
|
|
|
import Text.Pandoc.Generic (bottomUp)
|
2013-05-10 22:53:35 -07:00
|
|
|
import qualified Data.Text as T
|
|
|
|
import Data.Text (Text)
|
|
|
|
import qualified Data.Yaml as Yaml
|
2013-07-02 09:23:43 -07:00
|
|
|
import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..))
|
2013-05-10 22:53:35 -07:00
|
|
|
import qualified Data.HashMap.Strict as H
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
import qualified Text.Pandoc.Builder as B
|
2013-05-10 22:53:35 -07:00
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
|
|
import qualified Data.Vector as V
|
2015-10-14 09:09:10 -07:00
|
|
|
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
|
2012-07-25 10:45:45 -07:00
|
|
|
import Text.Pandoc.Options
|
2012-09-27 17:22:17 -07:00
|
|
|
import Text.Pandoc.Shared
|
2015-12-12 17:28:52 -08:00
|
|
|
import Text.Pandoc.Pretty (charWidth)
|
2013-02-28 22:05:22 -08:00
|
|
|
import Text.Pandoc.XML (fromEntities)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
import Text.Pandoc.Parsing hiding (tableWith)
|
2012-01-29 23:54:00 -08:00
|
|
|
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
|
|
|
|
isTextTag, isCommentTag )
|
2012-08-02 17:12:20 -07:00
|
|
|
import Control.Monad
|
2013-02-05 20:08:00 -08:00
|
|
|
import System.FilePath (takeExtension, addExtension)
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
import Text.HTML.TagSoup
|
2012-08-11 19:44:23 -07:00
|
|
|
import qualified Data.Set as Set
|
2014-02-25 22:43:58 -08:00
|
|
|
import Text.Printf (printf)
|
|
|
|
import Debug.Trace (trace)
|
2016-08-30 13:43:50 -04:00
|
|
|
import Data.Monoid ((<>))
|
2015-02-18 13:04:08 +00:00
|
|
|
import Text.Pandoc.Error
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
type MarkdownParser = Parser [Char] ParserState
|
2013-01-03 20:32:15 -08:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- | Read markdown from an input string and return a Pandoc document.
|
2012-07-25 22:35:41 -07:00
|
|
|
readMarkdown :: ReaderOptions -- ^ Reader options
|
|
|
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
2015-02-18 13:04:08 +00:00
|
|
|
-> Either PandocError Pandoc
|
2012-07-25 22:35:41 -07:00
|
|
|
readMarkdown opts s =
|
2015-04-18 18:34:55 -07:00
|
|
|
(readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-04 11:32:17 -08:00
|
|
|
-- | Read markdown from an input string and return a pair of a Pandoc document
|
|
|
|
-- and a list of warnings.
|
|
|
|
readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
|
|
|
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
2015-04-18 18:34:55 -07:00
|
|
|
-> Either PandocError (Pandoc, [String])
|
|
|
|
readMarkdownWithWarnings opts s =
|
|
|
|
(readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
|
2013-01-04 11:32:17 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
trimInlinesF :: F Inlines -> F Inlines
|
|
|
|
trimInlinesF = liftM trimInlines
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- Constants and data structure definitions
|
|
|
|
--
|
|
|
|
|
2011-01-19 15:06:56 -08:00
|
|
|
isBulletListMarker :: Char -> Bool
|
|
|
|
isBulletListMarker '*' = True
|
|
|
|
isBulletListMarker '+' = True
|
|
|
|
isBulletListMarker '-' = True
|
|
|
|
isBulletListMarker _ = False
|
|
|
|
|
|
|
|
isHruleChar :: Char -> Bool
|
|
|
|
isHruleChar '*' = True
|
|
|
|
isHruleChar '-' = True
|
|
|
|
isHruleChar '_' = True
|
|
|
|
isHruleChar _ = False
|
2008-07-11 16:33:21 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
setextHChars :: String
|
2007-11-03 23:27:58 +00:00
|
|
|
setextHChars = "=-"
|
|
|
|
|
2011-01-19 15:14:23 -08:00
|
|
|
isBlank :: Char -> Bool
|
|
|
|
isBlank ' ' = True
|
|
|
|
isBlank '\t' = True
|
|
|
|
isBlank '\n' = True
|
|
|
|
isBlank _ = False
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- auxiliary functions
|
|
|
|
--
|
|
|
|
|
2014-09-26 13:32:08 +04:00
|
|
|
-- | Succeeds when we're in list context.
|
|
|
|
inList :: MarkdownParser ()
|
|
|
|
inList = do
|
|
|
|
ctx <- stateParserContext <$> getState
|
|
|
|
guard (ctx == ListItemState)
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
spnl :: Parser [Char] st ()
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
spnl = try $ do
|
|
|
|
skipSpaces
|
|
|
|
optional newline
|
|
|
|
skipSpaces
|
|
|
|
notFollowedBy (char '\n')
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
indentSpaces :: MarkdownParser String
|
2007-11-03 23:27:58 +00:00
|
|
|
indentSpaces = try $ do
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2009-04-29 19:28:23 +00:00
|
|
|
count tabStop (char ' ') <|>
|
|
|
|
string "\t" <?> "indentation"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
nonindentSpaces :: MarkdownParser String
|
2007-11-03 23:27:58 +00:00
|
|
|
nonindentSpaces = do
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2007-11-03 23:27:58 +00:00
|
|
|
sps <- many (char ' ')
|
2012-07-26 22:32:53 -07:00
|
|
|
if length sps < tabStop
|
2007-11-03 23:27:58 +00:00
|
|
|
then return sps
|
|
|
|
else unexpected "indented line"
|
|
|
|
|
2014-08-12 11:10:48 -07:00
|
|
|
-- returns number of spaces parsed
|
|
|
|
skipNonindentSpaces :: MarkdownParser Int
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces = do
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2014-08-12 11:10:48 -07:00
|
|
|
atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ')
|
2009-04-29 19:28:31 +00:00
|
|
|
|
2014-08-12 11:10:48 -07:00
|
|
|
atMostSpaces :: Int -> MarkdownParser Int
|
|
|
|
atMostSpaces n
|
|
|
|
| n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
|
|
|
|
| otherwise = return 0
|
2009-04-29 19:28:31 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
litChar :: MarkdownParser Char
|
2011-12-05 20:55:23 -08:00
|
|
|
litChar = escapedChar'
|
2013-02-15 20:27:29 -08:00
|
|
|
<|> characterReference
|
2011-12-05 20:55:23 -08:00
|
|
|
<|> noneOf "\n"
|
2013-01-16 11:25:17 -08:00
|
|
|
<|> try (newline >> notFollowedBy blankline >> return ' ')
|
2011-12-05 20:55:23 -08:00
|
|
|
|
2007-12-24 04:22:31 +00:00
|
|
|
-- | Parse a sequence of inline elements between square brackets,
|
|
|
|
-- including inlines between balanced pairs of square brackets.
|
2015-04-18 18:34:55 -07:00
|
|
|
inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
|
2015-04-19 17:02:59 -07:00
|
|
|
inlinesInBalancedBrackets = do
|
2007-12-24 04:22:31 +00:00
|
|
|
char '['
|
2015-04-19 17:02:59 -07:00
|
|
|
(_, raw) <- withRaw $ charsInBalancedBrackets 1
|
|
|
|
guard $ not $ null raw
|
|
|
|
parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
|
|
|
|
|
|
|
|
charsInBalancedBrackets :: Int -> MarkdownParser ()
|
|
|
|
charsInBalancedBrackets 0 = return ()
|
|
|
|
charsInBalancedBrackets openBrackets =
|
|
|
|
(char '[' >> charsInBalancedBrackets (openBrackets + 1))
|
|
|
|
<|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
|
|
|
|
<|> (( (() <$ code)
|
2015-05-13 09:50:16 -07:00
|
|
|
<|> (() <$ (escapedChar'))
|
2015-04-19 17:02:59 -07:00
|
|
|
<|> (newline >> notFollowedBy blankline)
|
|
|
|
<|> skipMany1 (noneOf "[]`\n\\")
|
2015-05-13 09:50:16 -07:00
|
|
|
<|> (() <$ count 1 (oneOf "`\\"))
|
2015-04-19 17:02:59 -07:00
|
|
|
) >> charsInBalancedBrackets openBrackets)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- document structure
|
|
|
|
--
|
|
|
|
|
2016-04-10 09:13:53 -07:00
|
|
|
rawTitleBlockLine :: MarkdownParser String
|
|
|
|
rawTitleBlockLine = do
|
2010-02-28 11:21:19 +00:00
|
|
|
char '%'
|
|
|
|
skipSpaces
|
2016-04-10 09:13:53 -07:00
|
|
|
first <- anyLine
|
|
|
|
rest <- many $ try $ do spaceChar
|
|
|
|
notFollowedBy blankline
|
|
|
|
skipSpaces
|
|
|
|
anyLine
|
|
|
|
return $ trim $ unlines (first:rest)
|
|
|
|
|
|
|
|
titleLine :: MarkdownParser (F Inlines)
|
|
|
|
titleLine = try $ do
|
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
res <- parseFromString (many inline) raw
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ trimInlinesF $ mconcat res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
authorsLine :: MarkdownParser (F [Inlines])
|
2012-07-26 22:32:53 -07:00
|
|
|
authorsLine = try $ do
|
2016-04-10 09:13:53 -07:00
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
let sep = (char ';' <* spaces) <|> newline
|
|
|
|
let pAuthors = sepEndBy
|
|
|
|
(trimInlinesF . mconcat <$> many
|
|
|
|
(try $ notFollowedBy sep >> inline))
|
|
|
|
sep
|
|
|
|
sequence <$> parseFromString pAuthors raw
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
dateLine :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
dateLine = try $ do
|
2016-04-10 09:13:53 -07:00
|
|
|
raw <- rawTitleBlockLine
|
|
|
|
res <- parseFromString (many inline) raw
|
|
|
|
return $ trimInlinesF $ mconcat res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-08-18 18:39:04 -07:00
|
|
|
titleBlock :: MarkdownParser ()
|
|
|
|
titleBlock = pandocTitleBlock <|> mmdTitleBlock
|
2012-08-12 19:27:13 -07:00
|
|
|
|
2013-08-18 18:39:04 -07:00
|
|
|
pandocTitleBlock :: MarkdownParser ()
|
2012-08-12 19:27:13 -07:00
|
|
|
pandocTitleBlock = try $ do
|
|
|
|
guardEnabled Ext_pandoc_title_block
|
2013-02-12 19:55:37 -08:00
|
|
|
lookAhead (char '%')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
title <- option mempty titleLine
|
2015-04-18 18:34:55 -07:00
|
|
|
author <- option (return []) authorsLine
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
date <- option mempty dateLine
|
2007-11-03 23:27:58 +00:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
let meta' = do title' <- title
|
|
|
|
author' <- author
|
|
|
|
date' <- date
|
|
|
|
return $
|
|
|
|
(if B.isNull title' then id else B.setMeta "title" title')
|
|
|
|
. (if null author' then id else B.setMeta "author" author')
|
|
|
|
. (if B.isNull date' then id else B.setMeta "date" date')
|
|
|
|
$ nullMeta
|
|
|
|
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
|
|
|
|
|
|
|
|
yamlMetaBlock :: MarkdownParser (F Blocks)
|
2013-08-18 18:39:04 -07:00
|
|
|
yamlMetaBlock = try $ do
|
2013-07-02 20:54:30 -07:00
|
|
|
guardEnabled Ext_yaml_metadata_block
|
2013-07-01 22:29:04 -07:00
|
|
|
pos <- getPosition
|
2013-05-10 22:53:35 -07:00
|
|
|
string "---"
|
|
|
|
blankline
|
2013-10-29 10:57:48 -07:00
|
|
|
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
|
2013-08-07 08:43:42 -07:00
|
|
|
rawYamlLines <- manyTill anyLine stopLine
|
|
|
|
-- by including --- and ..., we allow yaml blocks with just comments:
|
|
|
|
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
|
2013-05-10 22:53:35 -07:00
|
|
|
optional blanklines
|
|
|
|
opts <- stateOptions <$> getState
|
2013-08-18 18:39:04 -07:00
|
|
|
meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
|
2015-04-18 18:34:55 -07:00
|
|
|
Right (Yaml.Object hashmap) -> return $ return $
|
2013-08-18 18:39:04 -07:00
|
|
|
H.foldrWithKey (\k v m ->
|
|
|
|
if ignorable k
|
|
|
|
then m
|
2015-02-18 13:04:08 +00:00
|
|
|
else case yamlToMeta opts v of
|
|
|
|
Left _ -> m
|
|
|
|
Right v' -> B.setMeta (T.unpack k) v' m)
|
2013-08-18 18:39:04 -07:00
|
|
|
nullMeta hashmap
|
2015-04-18 18:34:55 -07:00
|
|
|
Right Yaml.Null -> return $ return nullMeta
|
2013-08-18 18:39:04 -07:00
|
|
|
Right _ -> do
|
|
|
|
addWarning (Just pos) "YAML header is not an object"
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return nullMeta
|
2013-08-18 18:39:04 -07:00
|
|
|
Left err' -> do
|
|
|
|
case err' of
|
|
|
|
InvalidYaml (Just YamlParseException{
|
|
|
|
yamlProblem = problem
|
|
|
|
, yamlContext = _ctxt
|
|
|
|
, yamlProblemMark = Yaml.YamlMark {
|
|
|
|
yamlLine = yline
|
|
|
|
, yamlColumn = ycol
|
|
|
|
}}) ->
|
|
|
|
addWarning (Just $ setSourceLine
|
|
|
|
(setSourceColumn pos
|
|
|
|
(sourceColumn pos + ycol))
|
|
|
|
(sourceLine pos + 1 + yline))
|
|
|
|
$ "Could not parse YAML header: " ++
|
|
|
|
problem
|
|
|
|
_ -> addWarning (Just pos)
|
|
|
|
$ "Could not parse YAML header: " ++
|
|
|
|
show err'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return nullMeta
|
|
|
|
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
|
2013-08-18 18:39:04 -07:00
|
|
|
return mempty
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2013-07-01 22:39:56 -07:00
|
|
|
-- ignore fields ending with _
|
2013-05-10 22:53:35 -07:00
|
|
|
ignorable :: Text -> Bool
|
2015-04-18 18:34:55 -07:00
|
|
|
ignorable t = (T.pack "_") `T.isSuffixOf` t
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2015-02-18 13:04:08 +00:00
|
|
|
toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
|
2015-03-28 12:12:48 -07:00
|
|
|
toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
|
2015-02-18 13:04:08 +00:00
|
|
|
where
|
|
|
|
toMeta p =
|
|
|
|
case p of
|
|
|
|
Pandoc _ [Plain xs] -> MetaInlines xs
|
|
|
|
Pandoc _ [Para xs]
|
2013-05-10 22:53:35 -07:00
|
|
|
| endsWithNewline x -> MetaBlocks [Para xs]
|
|
|
|
| otherwise -> MetaInlines xs
|
2015-02-18 13:04:08 +00:00
|
|
|
Pandoc _ bs -> MetaBlocks bs
|
|
|
|
endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
|
2015-03-28 12:12:48 -07:00
|
|
|
opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
|
|
|
|
meta_exts = Set.fromList [ Ext_pandoc_title_block
|
|
|
|
, Ext_mmd_title_block
|
|
|
|
, Ext_yaml_metadata_block
|
|
|
|
]
|
2013-05-10 22:53:35 -07:00
|
|
|
|
2015-02-18 13:04:08 +00:00
|
|
|
yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
|
2013-05-10 22:53:35 -07:00
|
|
|
yamlToMeta opts (Yaml.String t) = toMetaValue opts t
|
2014-04-24 11:09:07 -07:00
|
|
|
yamlToMeta _ (Yaml.Number n)
|
|
|
|
-- avoid decimal points for numbers that don't need them:
|
2015-02-18 13:04:08 +00:00
|
|
|
| base10Exponent n >= 0 = return $ MetaString $ show
|
2014-04-24 11:09:07 -07:00
|
|
|
$ coefficient n * (10 ^ base10Exponent n)
|
2015-02-18 13:04:08 +00:00
|
|
|
| otherwise = return $ MetaString $ show n
|
|
|
|
yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
|
|
|
|
yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
|
|
|
|
(V.toList xs)
|
|
|
|
yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
|
2013-05-10 22:53:35 -07:00
|
|
|
if ignorable k
|
|
|
|
then m
|
2015-02-18 13:04:08 +00:00
|
|
|
else (do
|
|
|
|
v' <- yamlToMeta opts v
|
|
|
|
m' <- m
|
|
|
|
return (M.insert (T.unpack k) v' m')))
|
|
|
|
(return M.empty) o
|
|
|
|
yamlToMeta _ _ = return $ MetaString ""
|
2013-05-10 22:53:35 -07:00
|
|
|
|
|
|
|
stopLine :: MarkdownParser ()
|
|
|
|
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
|
|
|
|
|
2013-08-18 18:39:04 -07:00
|
|
|
mmdTitleBlock :: MarkdownParser ()
|
2012-08-12 19:27:13 -07:00
|
|
|
mmdTitleBlock = try $ do
|
|
|
|
guardEnabled Ext_mmd_title_block
|
2015-10-26 21:34:06 -07:00
|
|
|
firstPair <- kvPair False
|
|
|
|
restPairs <- many (kvPair True)
|
|
|
|
let kvPairs = firstPair : restPairs
|
2012-08-12 19:27:13 -07:00
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
|
|
|
|
return (Meta $ M.fromList kvPairs) }
|
2012-08-12 19:27:13 -07:00
|
|
|
|
2015-10-26 21:34:06 -07:00
|
|
|
kvPair :: Bool -> MarkdownParser (String, MetaValue)
|
|
|
|
kvPair allowEmpty = try $ do
|
2012-08-12 19:27:13 -07:00
|
|
|
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
|
2015-10-26 21:34:06 -07:00
|
|
|
val <- trim <$> manyTill anyChar
|
2012-08-12 19:27:13 -07:00
|
|
|
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
|
2015-10-26 21:34:06 -07:00
|
|
|
guard $ allowEmpty || not (null val)
|
2012-08-12 19:27:13 -07:00
|
|
|
let key' = concat $ words $ map toLower key
|
2015-10-26 21:34:06 -07:00
|
|
|
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
|
2012-08-12 19:27:13 -07:00
|
|
|
return (key',val')
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
parseMarkdown :: MarkdownParser Pandoc
|
2007-11-03 23:27:58 +00:00
|
|
|
parseMarkdown = do
|
|
|
|
-- markdown allows raw HTML
|
2012-07-25 11:43:56 -07:00
|
|
|
updateState $ \state -> state { stateOptions =
|
|
|
|
let oldOpts = stateOptions state in
|
|
|
|
oldOpts{ readerParseRaw = True } }
|
2013-08-18 18:39:04 -07:00
|
|
|
optional titleBlock
|
2008-08-04 03:15:34 +00:00
|
|
|
blocks <- parseBlocks
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
st <- getState
|
2015-04-18 18:34:55 -07:00
|
|
|
let meta = runF (stateMeta' st) st
|
|
|
|
let Pandoc _ bs = B.doc $ runF blocks st
|
2015-12-12 17:28:52 -08:00
|
|
|
eastAsianLineBreaks <- option False $
|
|
|
|
True <$ guardEnabled Ext_east_asian_line_breaks
|
|
|
|
return $ (if eastAsianLineBreaks
|
|
|
|
then bottomUp softBreakFilter
|
|
|
|
else id) $ Pandoc meta bs
|
|
|
|
|
|
|
|
softBreakFilter :: [Inline] -> [Inline]
|
|
|
|
softBreakFilter (x:SoftBreak:y:zs) =
|
|
|
|
case (stringify x, stringify y) of
|
|
|
|
(xs@(_:_), (c:_))
|
|
|
|
| charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
|
|
|
|
_ -> x:SoftBreak:y:zs
|
|
|
|
softBreakFilter xs = xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
referenceKey :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
referenceKey = try $ do
|
2013-01-04 12:01:09 -08:00
|
|
|
pos <- getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(_,raw) <- reference
|
2007-11-03 23:27:58 +00:00
|
|
|
char ':'
|
2008-08-10 23:26:32 +00:00
|
|
|
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
|
2010-12-10 12:14:51 -08:00
|
|
|
let sourceURL = liftM unwords $ many $ try $ do
|
2011-01-19 15:06:56 -08:00
|
|
|
skipMany spaceChar
|
2014-05-02 22:58:47 -07:00
|
|
|
notFollowedBy' referenceTitle
|
2015-11-19 22:58:19 -08:00
|
|
|
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ reference)
|
2013-02-15 22:56:53 -08:00
|
|
|
many1 $ notFollowedBy space >> litChar
|
2014-07-16 07:54:44 -07:00
|
|
|
let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
|
2010-12-10 12:14:51 -08:00
|
|
|
src <- try betweenAngles <|> sourceURL
|
2007-11-03 23:27:58 +00:00
|
|
|
tit <- option "" referenceTitle
|
2015-04-02 21:09:08 -07:00
|
|
|
attr <- option nullAttr $ try $
|
2015-11-19 22:58:19 -08:00
|
|
|
guardEnabled Ext_link_attributes >> skipSpaces >> attributes
|
|
|
|
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
|
2015-04-02 21:09:08 -07:00
|
|
|
>> many (try $ spnl >> keyValAttr)
|
2007-11-03 23:27:58 +00:00
|
|
|
blanklines
|
2015-04-02 21:09:08 -07:00
|
|
|
let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
|
|
|
|
target = (escapeURI $ trimr src, tit)
|
2007-11-03 23:27:58 +00:00
|
|
|
st <- getState
|
2012-08-01 22:40:07 -07:00
|
|
|
let oldkeys = stateKeys st
|
2013-01-04 12:01:09 -08:00
|
|
|
let key = toKey raw
|
|
|
|
case M.lookup key oldkeys of
|
|
|
|
Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
|
|
|
|
Nothing -> return ()
|
2015-04-02 21:09:08 -07:00
|
|
|
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return mempty
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
referenceTitle :: MarkdownParser String
|
2011-12-05 20:55:23 -08:00
|
|
|
referenceTitle = try $ do
|
2008-08-10 23:26:32 +00:00
|
|
|
skipSpaces >> optional newline >> skipSpaces
|
2013-03-13 19:18:20 -07:00
|
|
|
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
|
2013-01-16 11:25:17 -08:00
|
|
|
|
|
|
|
-- A link title in quotes
|
|
|
|
quotedTitle :: Char -> MarkdownParser String
|
|
|
|
quotedTitle c = try $ do
|
|
|
|
char c
|
|
|
|
notFollowedBy spaces
|
|
|
|
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
|
2013-02-15 20:27:29 -08:00
|
|
|
let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
|
2013-01-16 11:25:17 -08:00
|
|
|
let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
|
|
|
|
unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-08-19 11:12:18 -07:00
|
|
|
-- | PHP Markdown Extra style abbreviation key. Currently
|
|
|
|
-- we just skip them, since Pandoc doesn't have an element for
|
|
|
|
-- an abbreviation.
|
2015-04-18 18:34:55 -07:00
|
|
|
abbrevKey :: MarkdownParser (F Blocks)
|
2012-08-19 11:12:18 -07:00
|
|
|
abbrevKey = do
|
|
|
|
guardEnabled Ext_abbreviations
|
|
|
|
try $ do
|
|
|
|
char '*'
|
|
|
|
reference
|
|
|
|
char ':'
|
|
|
|
skipMany (satisfy (/= '\n'))
|
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return mempty
|
2012-08-19 11:12:18 -07:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
noteMarker :: MarkdownParser String
|
2011-01-19 15:14:23 -08:00
|
|
|
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
rawLine :: MarkdownParser String
|
2011-02-01 22:35:27 -08:00
|
|
|
rawLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy blankline
|
2010-12-08 08:17:16 -08:00
|
|
|
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
|
2011-02-01 22:35:27 -08:00
|
|
|
optional indentSpaces
|
|
|
|
anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
rawLines :: MarkdownParser String
|
2011-02-01 22:35:27 -08:00
|
|
|
rawLines = do
|
|
|
|
first <- anyLine
|
|
|
|
rest <- many rawLine
|
|
|
|
return $ unlines (first:rest)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
noteBlock :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
noteBlock = try $ do
|
2013-01-04 12:08:18 -08:00
|
|
|
pos <- getPosition
|
2010-12-08 08:17:16 -08:00
|
|
|
skipNonindentSpaces
|
2007-11-03 23:27:58 +00:00
|
|
|
ref <- noteMarker
|
|
|
|
char ':'
|
|
|
|
optional blankline
|
|
|
|
optional indentSpaces
|
2012-09-29 11:57:32 -04:00
|
|
|
first <- rawLines
|
|
|
|
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
|
|
|
|
let raw = unlines (first:rest) ++ "\n"
|
2007-11-03 23:27:58 +00:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
parsed <- parseFromString parseBlocks raw
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
let newnote = (ref, parsed)
|
2013-01-04 12:08:18 -08:00
|
|
|
oldnotes <- stateNotes' <$> getState
|
|
|
|
case lookup ref oldnotes of
|
|
|
|
Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'"
|
|
|
|
Nothing -> return ()
|
|
|
|
updateState $ \s -> s { stateNotes' = newnote : oldnotes }
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return mempty
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- parsing blocks
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
parseBlocks :: MarkdownParser (F Blocks)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
parseBlocks = mconcat <$> manyTill block eof
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
block :: MarkdownParser (F Blocks)
|
2014-02-25 22:43:58 -08:00
|
|
|
block = do
|
|
|
|
tr <- getOption readerTrace
|
|
|
|
pos <- getPosition
|
|
|
|
res <- choice [ mempty <$ blanklines
|
2013-06-19 09:22:34 -07:00
|
|
|
, codeBlockFenced
|
2013-08-18 18:39:04 -07:00
|
|
|
, yamlMetaBlock
|
2013-11-03 21:16:47 -08:00
|
|
|
-- note: bulletList needs to be before header because of
|
|
|
|
-- the possibility of empty list items: -
|
|
|
|
, bulletList
|
2012-07-26 20:29:08 -07:00
|
|
|
, header
|
2013-01-21 21:35:51 -08:00
|
|
|
, lhsCodeBlock
|
2013-08-15 22:39:14 -07:00
|
|
|
, divHtml
|
2012-08-04 10:49:05 -07:00
|
|
|
, htmlBlock
|
2012-07-26 20:29:08 -07:00
|
|
|
, table
|
|
|
|
, codeBlockIndented
|
2015-12-18 23:59:20 -08:00
|
|
|
, guardEnabled Ext_latex_macros *> (macro >>= return . return)
|
2015-02-25 08:33:42 -08:00
|
|
|
, rawTeXBlock
|
|
|
|
, lineBlock
|
2012-07-26 20:29:08 -07:00
|
|
|
, blockQuote
|
|
|
|
, hrule
|
|
|
|
, orderedList
|
|
|
|
, definitionList
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, noteBlock
|
|
|
|
, referenceKey
|
2012-08-19 11:12:18 -07:00
|
|
|
, abbrevKey
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, para
|
2012-07-26 20:29:08 -07:00
|
|
|
, plain
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
] <?> "block"
|
2015-04-18 18:34:55 -07:00
|
|
|
when tr $ do
|
|
|
|
st <- getState
|
2014-02-25 22:43:58 -08:00
|
|
|
trace (printf "line %d: %s" (sourceLine pos)
|
2015-04-18 18:34:55 -07:00
|
|
|
(take 60 $ show $ B.toList $ runF res st)) (return ())
|
2014-02-25 22:43:58 -08:00
|
|
|
return res
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- header blocks
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
header :: MarkdownParser (F Blocks)
|
2008-07-23 23:10:05 +00:00
|
|
|
header = setextHeader <|> atxHeader <?> "header"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2014-10-14 13:28:28 +02:00
|
|
|
atxChar :: MarkdownParser Char
|
|
|
|
atxChar = do
|
|
|
|
exts <- getOption readerExtensions
|
|
|
|
return $ if Set.member Ext_literate_haskell exts
|
|
|
|
then '=' else '#'
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
atxHeader :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
atxHeader = try $ do
|
2014-10-14 13:28:28 +02:00
|
|
|
level <- atxChar >>= many1 . char >>= return . length
|
2013-07-29 08:38:46 -07:00
|
|
|
notFollowedBy $ guardEnabled Ext_fancy_lists >>
|
|
|
|
(char '.' <|> char ')') -- this would be a list
|
2007-11-03 23:27:58 +00:00
|
|
|
skipSpaces
|
2015-05-13 23:02:54 -07:00
|
|
|
(text, raw) <- withRaw $
|
|
|
|
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
|
2013-01-10 20:22:18 -08:00
|
|
|
attr <- atxClosing
|
2015-04-02 21:09:08 -07:00
|
|
|
attr' <- registerHeader attr (runF text defaultParserState)
|
2015-05-13 23:02:54 -07:00
|
|
|
guardDisabled Ext_implicit_header_references
|
2015-04-02 21:09:08 -07:00
|
|
|
<|> registerImplicitHeader raw attr'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.headerWith attr' level <$> text
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-16 09:17:20 -08:00
|
|
|
atxClosing :: MarkdownParser Attr
|
|
|
|
atxClosing = try $ do
|
|
|
|
attr' <- option nullAttr
|
|
|
|
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
|
2014-10-14 13:28:28 +02:00
|
|
|
skipMany . char =<< atxChar
|
2013-01-16 09:17:20 -08:00
|
|
|
skipSpaces
|
|
|
|
attr <- option attr'
|
|
|
|
(guardEnabled Ext_header_attributes >> attributes)
|
|
|
|
blanklines
|
|
|
|
return attr
|
2013-01-10 20:22:18 -08:00
|
|
|
|
|
|
|
setextHeaderEnd :: MarkdownParser Attr
|
2013-01-16 09:17:20 -08:00
|
|
|
setextHeaderEnd = try $ do
|
|
|
|
attr <- option nullAttr
|
|
|
|
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
|
|
|
|
<|> (guardEnabled Ext_header_attributes >> attributes)
|
|
|
|
blanklines
|
|
|
|
return attr
|
|
|
|
|
|
|
|
mmdHeaderIdentifier :: MarkdownParser Attr
|
|
|
|
mmdHeaderIdentifier = do
|
|
|
|
ident <- stripFirstAndLast . snd <$> reference
|
|
|
|
skipSpaces
|
|
|
|
return (ident,[],[])
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
setextHeader :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
setextHeader = try $ do
|
2010-11-28 20:19:32 -08:00
|
|
|
-- This lookahead prevents us from wasting time parsing Inlines
|
|
|
|
-- unless necessary -- it gives a significant performance boost.
|
|
|
|
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
|
2015-07-23 02:29:37 -04:00
|
|
|
skipSpaces
|
2015-05-13 23:02:54 -07:00
|
|
|
(text, raw) <- withRaw $
|
|
|
|
trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
|
2013-01-10 20:22:18 -08:00
|
|
|
attr <- setextHeaderEnd
|
2007-11-22 19:09:38 +00:00
|
|
|
underlineChar <- oneOf setextHChars
|
|
|
|
many (char underlineChar)
|
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
|
2015-04-02 21:09:08 -07:00
|
|
|
attr' <- registerHeader attr (runF text defaultParserState)
|
2015-05-13 23:02:54 -07:00
|
|
|
guardDisabled Ext_implicit_header_references
|
2015-04-02 21:09:08 -07:00
|
|
|
<|> registerImplicitHeader raw attr'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.headerWith attr' level <$> text
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-02 21:09:08 -07:00
|
|
|
registerImplicitHeader :: String -> Attr -> MarkdownParser ()
|
|
|
|
registerImplicitHeader raw attr@(ident, _, _) = do
|
2015-05-13 23:02:54 -07:00
|
|
|
let key = toKey $ "[" ++ raw ++ "]"
|
|
|
|
updateState (\s -> s { stateHeaderKeys =
|
2015-04-02 21:09:08 -07:00
|
|
|
M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
|
2015-05-13 23:02:54 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- hrule block
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
hrule :: Parser [Char] st (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
hrule = try $ do
|
|
|
|
skipSpaces
|
2011-01-19 15:06:56 -08:00
|
|
|
start <- satisfy isHruleChar
|
2007-11-03 23:27:58 +00:00
|
|
|
count 2 (skipSpaces >> char start)
|
2009-04-29 19:28:39 +00:00
|
|
|
skipMany (spaceChar <|> char start)
|
2007-11-03 23:27:58 +00:00
|
|
|
newline
|
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return B.horizontalRule
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- code blocks
|
|
|
|
--
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
indentedLine :: MarkdownParser String
|
2015-04-18 18:34:55 -07:00
|
|
|
indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
blockDelimiter :: (Char -> Bool)
|
2012-01-28 12:25:24 -08:00
|
|
|
-> Maybe Int
|
2015-04-18 18:34:55 -07:00
|
|
|
-> Parser [Char] st Int
|
2012-01-28 12:25:24 -08:00
|
|
|
blockDelimiter f len = try $ do
|
|
|
|
c <- lookAhead (satisfy f)
|
2012-08-21 20:11:10 -07:00
|
|
|
case len of
|
|
|
|
Just l -> count l (char c) >> many (char c) >> return l
|
2015-04-18 18:34:55 -07:00
|
|
|
Nothing -> count 3 (char c) >> many (char c) >>=
|
|
|
|
return . (+ 3) . length
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2013-03-02 21:08:33 -08:00
|
|
|
attributes :: MarkdownParser Attr
|
2008-02-09 03:19:43 +00:00
|
|
|
attributes = try $ do
|
2008-02-09 03:18:54 +00:00
|
|
|
char '{'
|
2012-02-08 17:18:08 -08:00
|
|
|
spnl
|
2014-07-11 12:51:26 +01:00
|
|
|
attrs <- many (attribute <* spnl)
|
2008-02-09 03:18:54 +00:00
|
|
|
char '}'
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ foldl (\x f -> f x) nullAttr attrs
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2013-03-02 21:08:33 -08:00
|
|
|
attribute :: MarkdownParser (Attr -> Attr)
|
|
|
|
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2013-02-12 11:27:42 -08:00
|
|
|
identifier :: MarkdownParser String
|
2008-02-09 03:19:43 +00:00
|
|
|
identifier = do
|
|
|
|
first <- letter
|
2009-07-03 02:33:52 +00:00
|
|
|
rest <- many $ alphaNum <|> oneOf "-_:."
|
2008-02-09 03:19:43 +00:00
|
|
|
return (first:rest)
|
|
|
|
|
2013-03-02 21:08:33 -08:00
|
|
|
identifierAttr :: MarkdownParser (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
identifierAttr = try $ do
|
|
|
|
char '#'
|
|
|
|
result <- identifier
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ \(_,cs,kvs) -> (result,cs,kvs)
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2013-03-02 21:08:33 -08:00
|
|
|
classAttr :: MarkdownParser (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
classAttr = try $ do
|
|
|
|
char '.'
|
2013-01-17 11:24:45 -08:00
|
|
|
result <- identifier
|
2013-03-02 21:08:33 -08:00
|
|
|
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
|
2008-02-09 03:19:43 +00:00
|
|
|
|
2013-03-02 21:08:33 -08:00
|
|
|
keyValAttr :: MarkdownParser (Attr -> Attr)
|
2008-02-09 03:19:43 +00:00
|
|
|
keyValAttr = try $ do
|
|
|
|
key <- identifier
|
|
|
|
char '='
|
2013-02-12 11:27:42 -08:00
|
|
|
val <- enclosed (char '"') (char '"') litChar
|
|
|
|
<|> enclosed (char '\'') (char '\'') litChar
|
|
|
|
<|> many (escapedChar' <|> noneOf " \t\n\r}")
|
2015-09-25 23:01:34 -07:00
|
|
|
return $ \(id',cs,kvs) ->
|
|
|
|
case key of
|
|
|
|
"id" -> (val,cs,kvs)
|
|
|
|
"class" -> (id',cs ++ words val,kvs)
|
|
|
|
_ -> (id',cs,kvs ++ [(key,val)])
|
2013-03-02 21:08:33 -08:00
|
|
|
|
|
|
|
specialAttr :: MarkdownParser (Attr -> Attr)
|
|
|
|
specialAttr = do
|
|
|
|
char '-'
|
|
|
|
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
codeBlockFenced :: MarkdownParser (F Blocks)
|
2012-08-21 19:21:51 -07:00
|
|
|
codeBlockFenced = try $ do
|
2013-01-23 13:07:07 -08:00
|
|
|
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
|
|
|
|
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
|
|
|
|
size <- blockDelimiter (== c) Nothing
|
2012-08-21 20:11:10 -07:00
|
|
|
skipMany spaceChar
|
|
|
|
attr <- option ([],[],[]) $
|
2013-01-23 13:07:07 -08:00
|
|
|
try (guardEnabled Ext_fenced_code_attributes >> attributes)
|
2014-05-27 12:44:39 -07:00
|
|
|
<|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
|
2012-08-21 20:11:10 -07:00
|
|
|
blankline
|
2013-01-23 13:07:07 -08:00
|
|
|
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
|
2008-02-09 03:19:17 +00:00
|
|
|
blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
|
2008-02-09 03:18:54 +00:00
|
|
|
|
2014-05-27 12:44:39 -07:00
|
|
|
-- correctly handle github language identifiers
|
|
|
|
toLanguageId :: String -> String
|
|
|
|
toLanguageId = map toLower . go
|
|
|
|
where go "c++" = "cpp"
|
|
|
|
go "objective-c" = "objectivec"
|
|
|
|
go x = x
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
codeBlockIndented :: MarkdownParser (F Blocks)
|
2008-02-09 03:18:54 +00:00
|
|
|
codeBlockIndented = do
|
2012-07-26 22:32:53 -07:00
|
|
|
contents <- many1 (indentedLine <|>
|
2007-11-03 23:27:58 +00:00
|
|
|
try (do b <- blanklines
|
|
|
|
l <- indentedLine
|
|
|
|
return $ b ++ l))
|
|
|
|
optional blanklines
|
2012-07-25 20:42:15 -07:00
|
|
|
classes <- getOption readerIndentedCodeClasses
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeBlockWith ("", classes, []) $
|
2009-12-05 04:46:57 +00:00
|
|
|
stripTrailingNewlines $ concat contents
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
lhsCodeBlock :: MarkdownParser (F Blocks)
|
2008-12-02 22:41:51 +00:00
|
|
|
lhsCodeBlock = do
|
2012-08-08 23:18:19 -07:00
|
|
|
guardEnabled Ext_literate_haskell
|
2015-04-18 18:34:55 -07:00
|
|
|
(return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
lhsCodeBlockInverseBird)
|
2008-12-02 22:42:55 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
lhsCodeBlockLaTeX :: MarkdownParser String
|
2008-12-02 22:42:55 +00:00
|
|
|
lhsCodeBlockLaTeX = try $ do
|
|
|
|
string "\\begin{code}"
|
|
|
|
manyTill spaceChar newline
|
|
|
|
contents <- many1Till anyChar (try $ string "\\end{code}")
|
|
|
|
blanklines
|
|
|
|
return $ stripTrailingNewlines contents
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
lhsCodeBlockBird :: MarkdownParser String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
lhsCodeBlockInverseBird :: MarkdownParser String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
lhsCodeBlockBirdWith :: Char -> MarkdownParser String
|
2010-03-14 23:23:20 +00:00
|
|
|
lhsCodeBlockBirdWith c = try $ do
|
2008-12-02 22:41:51 +00:00
|
|
|
pos <- getPosition
|
|
|
|
when (sourceColumn pos /= 1) $ fail "Not in first column"
|
2010-03-14 23:23:20 +00:00
|
|
|
lns <- many1 $ birdTrackLine c
|
2008-12-02 22:41:51 +00:00
|
|
|
-- if (as is normal) there is always a space after >, drop it
|
|
|
|
let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
|
|
|
|
then map (drop 1) lns
|
|
|
|
else lns
|
|
|
|
blanklines
|
2008-12-02 22:42:55 +00:00
|
|
|
return $ intercalate "\n" lns'
|
2008-12-02 22:41:51 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
birdTrackLine :: Char -> Parser [Char] st String
|
2011-03-02 11:18:38 -08:00
|
|
|
birdTrackLine c = try $ do
|
2010-03-14 23:23:20 +00:00
|
|
|
char c
|
2011-03-02 11:18:38 -08:00
|
|
|
-- allow html tags on left margin:
|
|
|
|
when (c == '<') $ notFollowedBy letter
|
2013-01-25 18:32:15 -08:00
|
|
|
anyLine
|
2008-12-02 22:41:51 +00:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- block quotes
|
|
|
|
--
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
emailBlockQuoteStart :: MarkdownParser Char
|
2014-07-11 12:51:26 +01:00
|
|
|
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
emailBlockQuote :: MarkdownParser [String]
|
2007-11-03 23:27:58 +00:00
|
|
|
emailBlockQuote = try $ do
|
|
|
|
emailBlockQuoteStart
|
2012-09-29 11:57:32 -04:00
|
|
|
let emailLine = many $ nonEndline <|> try
|
|
|
|
(endline >> notFollowedBy emailBlockQuoteStart >>
|
|
|
|
return '\n')
|
|
|
|
let emailSep = try (newline >> emailBlockQuoteStart)
|
|
|
|
first <- emailLine
|
|
|
|
rest <- many $ try $ emailSep >> emailLine
|
|
|
|
let raw = first:rest
|
2007-11-03 23:27:58 +00:00
|
|
|
newline <|> (eof >> return '\n')
|
|
|
|
optional blanklines
|
|
|
|
return raw
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
blockQuote :: MarkdownParser (F Blocks)
|
2012-07-26 22:32:53 -07:00
|
|
|
blockQuote = do
|
2007-12-08 19:32:18 +00:00
|
|
|
raw <- emailBlockQuote
|
2007-11-03 23:27:58 +00:00
|
|
|
-- parse the extracted block, which may contain various block elements:
|
2015-04-18 18:34:55 -07:00
|
|
|
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
|
|
|
|
return $ B.blockQuote <$> contents
|
2012-07-26 22:32:53 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- list blocks
|
|
|
|
--
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
bulletListStart :: MarkdownParser ()
|
2007-11-03 23:27:58 +00:00
|
|
|
bulletListStart = try $ do
|
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
|
2011-01-19 15:06:56 -08:00
|
|
|
satisfy isBulletListMarker
|
2014-08-12 11:10:48 -07:00
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
() <$ atMostSpaces (tabStop - (endpos - startpos))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
|
2007-11-03 23:27:58 +00:00
|
|
|
anyOrderedListStart = try $ do
|
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
|
2014-08-12 11:10:48 -07:00
|
|
|
res <- do guardDisabled Ext_fancy_lists
|
2014-10-18 13:57:48 -07:00
|
|
|
start <- many1 digit >>= safeRead
|
2014-08-12 11:10:48 -07:00
|
|
|
char '.'
|
2014-10-18 13:57:48 -07:00
|
|
|
return (start, DefaultStyle, DefaultDelim)
|
2014-08-12 11:10:48 -07:00
|
|
|
<|> do (num, style, delim) <- anyOrderedListMarker
|
|
|
|
-- if it could be an abbreviated first name,
|
|
|
|
-- insist on more than one space
|
|
|
|
when (delim == Period && (style == UpperAlpha ||
|
|
|
|
(style == UpperRoman &&
|
|
|
|
num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
|
|
|
|
() <$ spaceChar
|
|
|
|
return (num, style, delim)
|
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
atMostSpaces (tabStop - (endpos - startpos))
|
|
|
|
return res
|
2008-09-12 00:05:32 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
listStart :: MarkdownParser ()
|
2015-04-18 18:34:55 -07:00
|
|
|
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
listLine :: MarkdownParser String
|
2008-09-12 00:05:32 +00:00
|
|
|
listLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy' (do indentSpaces
|
2013-12-19 20:43:25 -05:00
|
|
|
many spaceChar
|
2008-09-12 00:05:32 +00:00
|
|
|
listStart)
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser
|
2014-05-04 16:21:18 -07:00
|
|
|
optional (() <$ indentSpaces)
|
2014-08-12 11:10:48 -07:00
|
|
|
listLineCommon
|
|
|
|
|
|
|
|
listLineCommon :: MarkdownParser String
|
|
|
|
listLineCommon = concat <$> manyTill
|
2013-12-07 19:56:54 -08:00
|
|
|
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
|
|
|
|
<|> liftM snd (htmlTag isCommentTag)
|
|
|
|
<|> count 1 anyChar
|
|
|
|
) newline
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
2013-01-03 20:32:15 -08:00
|
|
|
rawListItem :: MarkdownParser a
|
|
|
|
-> MarkdownParser String
|
2012-01-02 17:04:59 -08:00
|
|
|
rawListItem start = try $ do
|
|
|
|
start
|
2014-08-12 11:10:48 -07:00
|
|
|
first <- listLineCommon
|
2013-11-03 21:16:47 -08:00
|
|
|
rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
|
2007-11-03 23:27:58 +00:00
|
|
|
blanks <- many blankline
|
2013-04-19 20:23:50 -07:00
|
|
|
return $ unlines (first:rest) ++ blanks
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
-- continuation of a list item - indented and separated by blankline
|
2007-11-03 23:27:58 +00:00
|
|
|
-- or (in compact lists) endline.
|
|
|
|
-- note: nested lists are parsed as continuations
|
2013-01-03 20:32:15 -08:00
|
|
|
listContinuation :: MarkdownParser String
|
2008-09-12 00:05:32 +00:00
|
|
|
listContinuation = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
lookAhead indentSpaces
|
2008-09-12 00:05:32 +00:00
|
|
|
result <- many1 listContinuationLine
|
2007-11-03 23:27:58 +00:00
|
|
|
blanks <- many blankline
|
|
|
|
return $ concat result ++ blanks
|
|
|
|
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser :: MarkdownParser ()
|
|
|
|
notFollowedByHtmlCloser = do
|
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
case inHtmlBlock of
|
|
|
|
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
|
|
|
|
Nothing -> return ()
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
listContinuationLine :: MarkdownParser String
|
2008-09-12 00:05:32 +00:00
|
|
|
listContinuationLine = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy blankline
|
2008-09-12 00:05:32 +00:00
|
|
|
notFollowedBy' listStart
|
2014-07-07 15:47:51 -06:00
|
|
|
notFollowedByHtmlCloser
|
2007-11-03 23:27:58 +00:00
|
|
|
optional indentSpaces
|
2013-01-25 18:32:15 -08:00
|
|
|
result <- anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
return $ result ++ "\n"
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
listItem :: MarkdownParser a
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F Blocks)
|
2012-04-13 11:12:18 -07:00
|
|
|
listItem start = try $ do
|
2012-01-02 17:04:59 -08:00
|
|
|
first <- rawListItem start
|
2008-09-12 00:05:32 +00:00
|
|
|
continuations <- many listContinuation
|
2007-11-03 23:27:58 +00:00
|
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
|
|
-- count as list item markers, even if not separated by blank space.
|
|
|
|
-- see definition of "endline"
|
|
|
|
state <- getState
|
|
|
|
let oldContext = stateParserContext state
|
|
|
|
setState $ state {stateParserContext = ListItemState}
|
|
|
|
-- parse the extracted block, which may contain various block elements:
|
|
|
|
let raw = concat (first:continuations)
|
|
|
|
contents <- parseFromString parseBlocks raw
|
|
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
|
|
return contents
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
orderedList :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
orderedList = try $ do
|
|
|
|
(start, style, delim) <- lookAhead anyOrderedListStart
|
2013-12-19 20:19:24 -05:00
|
|
|
unless (style `elem` [DefaultStyle, Decimal, Example] &&
|
|
|
|
delim `elem` [DefaultDelim, Period]) $
|
2012-07-27 15:45:47 -07:00
|
|
|
guardEnabled Ext_fancy_lists
|
2012-07-27 16:00:27 -07:00
|
|
|
when (style == Example) $ guardEnabled Ext_example_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ listItem
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
( try $ do
|
|
|
|
optional newline -- if preceded by Plain block in a list
|
2014-08-12 11:10:48 -07:00
|
|
|
startpos <- sourceColumn <$> getPosition
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
skipNonindentSpaces
|
2014-08-12 11:10:48 -07:00
|
|
|
res <- orderedListMarker style delim
|
|
|
|
endpos <- sourceColumn <$> getPosition
|
|
|
|
tabStop <- getOption readerTabStop
|
|
|
|
lookAhead (newline <|> spaceChar)
|
|
|
|
atMostSpaces (tabStop - (endpos - startpos))
|
|
|
|
return res )
|
2012-07-27 15:45:47 -07:00
|
|
|
start' <- option 1 $ guardEnabled Ext_startnum >> return start
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
bulletList :: MarkdownParser (F Blocks)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
bulletList = do
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ listItem bulletListStart
|
|
|
|
return $ B.bulletList <$> fmap compactify' items
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- definition lists
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
defListMarker :: MarkdownParser ()
|
2009-12-07 08:26:53 +00:00
|
|
|
defListMarker = do
|
|
|
|
sps <- nonindentSpaces
|
|
|
|
char ':' <|> char '~'
|
2012-07-25 12:31:16 -07:00
|
|
|
tabStop <- getOption readerTabStop
|
2009-12-07 08:26:53 +00:00
|
|
|
let remaining = tabStop - (length sps + 1)
|
|
|
|
if remaining > 0
|
2015-04-18 10:13:32 -07:00
|
|
|
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
|
2012-07-20 14:19:06 -07:00
|
|
|
else mzero
|
2009-12-07 08:26:53 +00:00
|
|
|
return ()
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
|
2014-07-20 16:33:59 -07:00
|
|
|
definitionListItem compact = try $ do
|
|
|
|
rawLine' <- anyLine
|
|
|
|
raw <- many1 $ defRawBlock compact
|
2015-04-18 18:34:55 -07:00
|
|
|
term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
|
2015-04-26 11:20:53 -07:00
|
|
|
contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
|
2014-07-20 16:33:59 -07:00
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ liftM2 (,) term (sequence contents)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2014-07-20 16:33:59 -07:00
|
|
|
defRawBlock :: Bool -> MarkdownParser String
|
|
|
|
defRawBlock compact = try $ do
|
|
|
|
hasBlank <- option False $ blankline >> return True
|
2009-12-07 08:26:53 +00:00
|
|
|
defListMarker
|
2007-11-03 23:27:58 +00:00
|
|
|
firstline <- anyLine
|
2014-07-20 16:33:59 -07:00
|
|
|
let dline = try
|
|
|
|
( do notFollowedBy blankline
|
2015-05-03 15:06:40 -07:00
|
|
|
notFollowedByHtmlCloser
|
2014-07-20 16:33:59 -07:00
|
|
|
if compact -- laziness not compatible with compact
|
|
|
|
then () <$ indentSpaces
|
|
|
|
else (() <$ indentSpaces)
|
|
|
|
<|> notFollowedBy defListMarker
|
|
|
|
anyLine )
|
|
|
|
rawlines <- many dline
|
|
|
|
cont <- liftM concat $ many $ try $ do
|
|
|
|
trailing <- option "" blanklines
|
|
|
|
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
|
|
|
|
lns <- many dline
|
|
|
|
return $ trailing ++ unlines (ln:lns)
|
|
|
|
return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
|
|
|
|
if hasBlank || not (null cont) then "\n\n" else ""
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
definitionList :: MarkdownParser (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
definitionList = try $ do
|
2015-04-18 18:34:55 -07:00
|
|
|
lookAhead (anyLine >>
|
|
|
|
optional (blankline >> notFollowedBy (table >> return ())) >>
|
2015-04-18 10:13:32 -07:00
|
|
|
-- don't capture table caption as def list!
|
|
|
|
defListMarker)
|
2014-07-20 16:33:59 -07:00
|
|
|
compactDefinitionList <|> normalDefinitionList
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
compactDefinitionList :: MarkdownParser (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
compactDefinitionList = do
|
|
|
|
guardEnabled Ext_compact_definition_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ definitionListItem True
|
|
|
|
return $ B.definitionList <$> fmap compactify'DL items
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
normalDefinitionList :: MarkdownParser (F Blocks)
|
2014-07-20 16:33:59 -07:00
|
|
|
normalDefinitionList = do
|
|
|
|
guardEnabled Ext_definition_lists
|
2015-04-18 18:34:55 -07:00
|
|
|
items <- fmap sequence $ many1 $ definitionListItem False
|
|
|
|
return $ B.definitionList <$> items
|
2014-07-20 16:33:59 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- paragraph block
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
para :: MarkdownParser (F Blocks)
|
2012-07-26 22:32:53 -07:00
|
|
|
para = try $ do
|
2013-01-14 20:53:08 -08:00
|
|
|
exts <- getOption readerExtensions
|
2015-04-18 18:34:55 -07:00
|
|
|
result <- trimInlinesF . mconcat <$> many1 inline
|
|
|
|
option (B.plain <$> result)
|
2013-01-14 20:53:08 -08:00
|
|
|
$ try $ do
|
|
|
|
newline
|
|
|
|
(blanklines >> return mempty)
|
2013-09-08 11:49:13 -07:00
|
|
|
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
|
|
|
|
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
|
|
|
|
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
|
|
|
|
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
|
2014-09-26 13:32:08 +04:00
|
|
|
-- Avoid creating a paragraph in a nested list.
|
|
|
|
notFollowedBy' inList >>
|
2013-09-08 11:49:13 -07:00
|
|
|
() <$ lookAhead listStart)
|
2014-08-31 12:55:47 -07:00
|
|
|
<|> do guardEnabled Ext_native_divs
|
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
case inHtmlBlock of
|
|
|
|
Just "div" -> () <$
|
|
|
|
lookAhead (htmlTag (~== TagClose "div"))
|
|
|
|
_ -> mzero
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
result' <- result
|
|
|
|
case B.toList result' of
|
2015-04-02 21:09:08 -07:00
|
|
|
[Image attr alt (src,tit)]
|
2013-01-14 20:53:08 -08:00
|
|
|
| Ext_implicit_figures `Set.member` exts ->
|
2013-01-15 08:45:46 -08:00
|
|
|
-- the fig: at beginning of title indicates a figure
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.para $ B.singleton
|
2015-04-02 21:09:08 -07:00
|
|
|
$ Image attr alt (src,'f':'i':'g':':':tit)
|
2015-04-18 18:34:55 -07:00
|
|
|
_ -> return $ B.para result'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
plain :: MarkdownParser (F Blocks)
|
|
|
|
plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
-- raw html
|
|
|
|
--
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
htmlElement :: MarkdownParser String
|
2013-12-15 12:27:29 -08:00
|
|
|
htmlElement = rawVerbatimBlock
|
|
|
|
<|> strictHtmlBlock
|
|
|
|
<|> liftM snd (htmlTag isBlockTag)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
htmlBlock :: MarkdownParser (F Blocks)
|
2012-08-12 22:04:23 -07:00
|
|
|
htmlBlock = do
|
|
|
|
guardEnabled Ext_raw_html
|
2014-07-07 15:47:51 -06:00
|
|
|
try (do
|
|
|
|
(TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
|
|
|
|
(guard (t `elem` ["pre","style","script"]) >>
|
2015-04-18 18:34:55 -07:00
|
|
|
(return . B.rawBlock "html") <$> rawVerbatimBlock)
|
2014-07-20 17:44:28 -07:00
|
|
|
<|> (do guardEnabled Ext_markdown_attribute
|
|
|
|
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
|
|
|
|
markdownAttribute <-
|
|
|
|
case lookup "markdown" attrs of
|
|
|
|
Just "0" -> False <$ updateState (\st -> st{
|
|
|
|
stateMarkdownAttribute = False })
|
|
|
|
Just _ -> True <$ updateState (\st -> st{
|
|
|
|
stateMarkdownAttribute = True })
|
|
|
|
Nothing -> return oldMarkdownAttribute
|
|
|
|
res <- if markdownAttribute
|
|
|
|
then rawHtmlBlocks
|
|
|
|
else htmlBlock'
|
|
|
|
updateState $ \st -> st{ stateMarkdownAttribute =
|
|
|
|
oldMarkdownAttribute }
|
|
|
|
return res)
|
2014-07-07 15:47:51 -06:00
|
|
|
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
|
|
|
|
<|> htmlBlock'
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
htmlBlock' :: MarkdownParser (F Blocks)
|
2012-07-26 20:29:08 -07:00
|
|
|
htmlBlock' = try $ do
|
2008-01-03 21:32:32 +00:00
|
|
|
first <- htmlElement
|
2014-07-07 15:47:51 -06:00
|
|
|
skipMany spaceChar
|
|
|
|
optional blanklines
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.rawBlock "html" first
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
strictHtmlBlock :: MarkdownParser String
|
2012-07-24 21:41:57 -07:00
|
|
|
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
rawVerbatimBlock :: MarkdownParser String
|
2016-02-21 07:55:35 -08:00
|
|
|
rawVerbatimBlock = htmlInBalanced isVerbTag
|
|
|
|
where isVerbTag (TagOpen "pre" _) = True
|
|
|
|
isVerbTag (TagOpen "style" _) = True
|
|
|
|
isVerbTag (TagOpen "script" _) = True
|
|
|
|
isVerbTag _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
rawTeXBlock :: MarkdownParser (F Blocks)
|
2011-01-03 13:20:57 -08:00
|
|
|
rawTeXBlock = do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_raw_tex
|
2014-06-23 12:41:47 -07:00
|
|
|
result <- (B.rawBlock "latex" . concat <$>
|
2015-04-18 18:34:55 -07:00
|
|
|
rawLaTeXBlock `sepEndBy1` blankline)
|
2014-06-23 12:41:47 -07:00
|
|
|
<|> (B.rawBlock "context" . concat <$>
|
|
|
|
rawConTeXtEnvironment `sepEndBy1` blankline)
|
2011-01-03 13:20:57 -08:00
|
|
|
spaces
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return result
|
2011-01-03 13:20:57 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
rawHtmlBlocks :: MarkdownParser (F Blocks)
|
2007-11-03 23:27:58 +00:00
|
|
|
rawHtmlBlocks = do
|
2014-07-07 15:47:51 -06:00
|
|
|
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
|
|
|
|
-- try to find closing tag
|
|
|
|
-- we set stateInHtmlBlock so that closing tags that can be either block or
|
|
|
|
-- inline will not be parsed as inline tags
|
|
|
|
oldInHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
|
|
|
|
let closer = htmlTag (\x -> x ~== TagClose tagtype)
|
|
|
|
contents <- mconcat <$> many (notFollowedBy' closer >> block)
|
|
|
|
result <-
|
|
|
|
(closer >>= \(_, rawcloser) -> return (
|
2015-04-18 18:34:55 -07:00
|
|
|
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
|
2014-07-07 15:47:51 -06:00
|
|
|
contents <>
|
2015-04-18 18:34:55 -07:00
|
|
|
return (B.rawBlock "html" rawcloser)))
|
|
|
|
<|> return (return (B.rawBlock "html" raw) <> contents)
|
2014-07-07 15:47:51 -06:00
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
|
|
|
|
return result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-08-15 09:42:16 -07:00
|
|
|
-- remove markdown="1" attribute
|
|
|
|
stripMarkdownAttribute :: String -> String
|
|
|
|
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
|
|
|
|
where filterAttrib (TagOpen t as) = TagOpen t
|
|
|
|
[(k,v) | (k,v) <- as, k /= "markdown"]
|
|
|
|
filterAttrib x = x
|
|
|
|
|
2013-01-13 12:34:18 -08:00
|
|
|
--
|
|
|
|
-- line block
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
lineBlock :: MarkdownParser (F Blocks)
|
2013-01-13 12:34:18 -08:00
|
|
|
lineBlock = try $ do
|
|
|
|
guardEnabled Ext_line_blocks
|
|
|
|
lines' <- lineBlockLines >>=
|
2015-04-18 18:34:55 -07:00
|
|
|
mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
|
2016-10-13 08:46:44 +02:00
|
|
|
return $ B.lineBlock <$> sequence lines'
|
2013-01-13 12:34:18 -08:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- Tables
|
2012-07-26 22:32:53 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a dashed line with optional trailing spaces; return its length
|
|
|
|
-- and the length including trailing space.
|
2015-04-18 18:34:55 -07:00
|
|
|
dashedLine :: Char
|
|
|
|
-> Parser [Char] st (Int, Int)
|
2007-11-03 23:27:58 +00:00
|
|
|
dashedLine ch = do
|
|
|
|
dashes <- many1 (char ch)
|
|
|
|
sp <- many spaceChar
|
2015-04-07 13:46:14 +01:00
|
|
|
let lengthDashes = length dashes
|
|
|
|
lengthSp = length sp
|
|
|
|
return (lengthDashes, lengthDashes + lengthSp)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
-- Parse a table header with dashed lines of '-' preceded by
|
2009-12-05 21:34:46 +00:00
|
|
|
-- one (or zero) line of text.
|
2012-07-26 22:32:53 -07:00
|
|
|
simpleTableHeader :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks], [Alignment], [Int])
|
2009-12-05 21:34:46 +00:00
|
|
|
simpleTableHeader headless = try $ do
|
|
|
|
rawContent <- if headless
|
|
|
|
then return ""
|
|
|
|
else anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
2008-07-11 16:33:21 +00:00
|
|
|
let (lengths, lines') = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines'
|
2009-12-05 21:34:46 +00:00
|
|
|
-- If no header, calculate alignment on basis of first row of text
|
2012-01-27 00:39:00 -08:00
|
|
|
rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
|
2009-12-05 21:34:46 +00:00
|
|
|
if headless
|
2012-07-26 22:32:53 -07:00
|
|
|
then lookAhead anyLine
|
2009-12-05 21:34:46 +00:00
|
|
|
else return rawContent
|
2007-11-03 23:27:58 +00:00
|
|
|
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
|
2009-12-05 21:34:46 +00:00
|
|
|
let rawHeads' = if headless
|
|
|
|
then replicate (length dashes) ""
|
2012-07-26 22:32:53 -07:00
|
|
|
else rawHeads
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence
|
|
|
|
$ mapM (parseFromString (mconcat <$> many plain))
|
|
|
|
$ map trim rawHeads'
|
2010-07-05 23:43:07 -07:00
|
|
|
return (heads, aligns, indices)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- Returns an alignment type for a table, based on a list of strings
|
|
|
|
-- (the rows of the column header) and a number (the length of the
|
|
|
|
-- dashed line under the rows.
|
|
|
|
alignType :: [String]
|
|
|
|
-> Int
|
|
|
|
-> Alignment
|
|
|
|
alignType [] _ = AlignDefault
|
|
|
|
alignType strLst len =
|
2012-09-29 17:09:34 -04:00
|
|
|
let nonempties = filter (not . null) $ map trimr strLst
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(leftSpace, rightSpace) =
|
|
|
|
case sortBy (comparing length) nonempties of
|
|
|
|
(x:_) -> (head x `elem` " \t", length x < len)
|
|
|
|
[] -> (False, False)
|
|
|
|
in case (leftSpace, rightSpace) of
|
|
|
|
(True, False) -> AlignRight
|
|
|
|
(False, True) -> AlignLeft
|
|
|
|
(True, True) -> AlignCenter
|
|
|
|
(False, False) -> AlignDefault
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- Parse a table footer - dashed lines followed by blank line.
|
2013-01-03 20:32:15 -08:00
|
|
|
tableFooter :: MarkdownParser String
|
2009-04-29 19:28:31 +00:00
|
|
|
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a table separator - dashed line.
|
2013-01-03 20:32:15 -08:00
|
|
|
tableSep :: MarkdownParser Char
|
2009-12-05 21:34:46 +00:00
|
|
|
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a raw line and split it into chunks by indices.
|
2008-07-11 16:33:21 +00:00
|
|
|
rawTableLine :: [Int]
|
2013-01-03 20:32:15 -08:00
|
|
|
-> MarkdownParser [String]
|
2007-11-03 23:27:58 +00:00
|
|
|
rawTableLine indices = do
|
|
|
|
notFollowedBy' (blanklines <|> tableFooter)
|
|
|
|
line <- many1Till anyChar newline
|
2012-09-29 17:09:34 -04:00
|
|
|
return $ map trim $ tail $
|
2012-01-27 00:39:00 -08:00
|
|
|
splitStringByIndices (init indices) line
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a table line and return a list of lists of blocks (columns).
|
2008-07-11 16:33:21 +00:00
|
|
|
tableLine :: [Int]
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
tableLine indices = rawTableLine indices >>=
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap sequence . mapM (parseFromString (mconcat <$> many plain))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a multiline table row and return a list of blocks (columns).
|
2008-07-11 16:33:21 +00:00
|
|
|
multilineRow :: [Int]
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks])
|
2007-11-03 23:27:58 +00:00
|
|
|
multilineRow indices = do
|
|
|
|
colLines <- many1 (rawTableLine indices)
|
|
|
|
let cols = map unlines $ transpose colLines
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parses a table caption: inlines beginning with 'Table:'
|
|
|
|
-- and followed by blank lines.
|
2015-04-18 18:34:55 -07:00
|
|
|
tableCaption :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
tableCaption = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_table_captions
|
2009-04-29 19:28:31 +00:00
|
|
|
skipNonindentSpaces
|
2010-07-06 21:01:26 -07:00
|
|
|
string ":" <|> string "Table:"
|
2015-04-18 18:34:55 -07:00
|
|
|
trimInlinesF . mconcat <$> many1 inline <* blanklines
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a simple table with '---' header and one line per row.
|
2009-12-05 21:34:46 +00:00
|
|
|
simpleTable :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2009-12-05 21:34:46 +00:00
|
|
|
simpleTable headless = do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(aligns, _widths, heads', lines') <-
|
|
|
|
tableWith (simpleTableHeader headless) tableLine
|
2010-07-05 23:43:07 -07:00
|
|
|
(return ())
|
|
|
|
(if headless then tableFooter else tableFooter <|> blanklines)
|
2009-11-28 03:22:33 +00:00
|
|
|
-- Simple tables get 0s for relative column widths (i.e., use default)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return (aligns, replicate (length aligns) 0, heads', lines')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- Parse a multiline table: starts with row of '-' on top, then header
|
|
|
|
-- (which may be multiline), then the rows,
|
|
|
|
-- which may be multiline, separated by blank lines, and
|
|
|
|
-- ending with a footer (dashed line followed by blank line).
|
2009-12-05 21:34:46 +00:00
|
|
|
multilineTable :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2009-12-05 21:34:46 +00:00
|
|
|
multilineTable headless =
|
2012-07-24 09:06:13 -07:00
|
|
|
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
|
2009-12-05 21:34:46 +00:00
|
|
|
|
|
|
|
multilineTableHeader :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks], [Alignment], [Int])
|
2009-12-05 21:34:46 +00:00
|
|
|
multilineTableHeader headless = try $ do
|
2014-06-16 20:48:55 -07:00
|
|
|
unless headless $
|
|
|
|
tableSep >> notFollowedBy blankline
|
2009-12-05 21:34:46 +00:00
|
|
|
rawContent <- if headless
|
2012-07-26 22:32:53 -07:00
|
|
|
then return $ repeat ""
|
2014-06-16 21:26:50 -07:00
|
|
|
else many1 $ notFollowedBy tableSep >> anyLine
|
2007-11-03 23:27:58 +00:00
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
2008-07-11 16:33:21 +00:00
|
|
|
let (lengths, lines') = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines'
|
2009-12-05 21:34:46 +00:00
|
|
|
rawHeadsList <- if headless
|
|
|
|
then liftM (map (:[]) . tail .
|
2012-01-27 00:39:00 -08:00
|
|
|
splitStringByIndices (init indices)) $ lookAhead anyLine
|
2012-07-26 22:32:53 -07:00
|
|
|
else return $ transpose $ map
|
2014-01-07 23:39:49 -08:00
|
|
|
(tail . splitStringByIndices (init indices))
|
2009-12-05 21:34:46 +00:00
|
|
|
rawContent
|
2007-11-03 23:27:58 +00:00
|
|
|
let aligns = zipWith alignType rawHeadsList lengths
|
2009-12-05 21:34:46 +00:00
|
|
|
let rawHeads = if headless
|
|
|
|
then replicate (length dashes) ""
|
2014-01-07 23:39:49 -08:00
|
|
|
else map (unlines . map trim) rawHeadsList
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence $
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
mapM (parseFromString (mconcat <$> many plain)) $
|
2012-09-29 17:09:34 -04:00
|
|
|
map trim rawHeads
|
2010-07-05 23:43:07 -07:00
|
|
|
return (heads, aligns, indices)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- Parse a grid table: starts with row of '-' on top, then header
|
|
|
|
-- (which may be grid), then the rows,
|
|
|
|
-- which may be grid, separated by blank lines, and
|
|
|
|
-- ending with a footer (dashed line followed by blank line).
|
2010-07-05 23:43:07 -07:00
|
|
|
gridTable :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTable headless =
|
|
|
|
tableWith (gridTableHeader headless) gridTableRow
|
|
|
|
(gridTableSep '-') gridTableFooter
|
|
|
|
|
|
|
|
gridTableSplitLine :: [Int] -> String -> [String]
|
|
|
|
gridTableSplitLine indices line = map removeFinalBar $ tail $
|
2012-09-29 17:09:34 -04:00
|
|
|
splitStringByIndices (init indices) $ trimr line
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
gridPart :: Char -> Parser [Char] st (Int, Int)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridPart ch = do
|
|
|
|
dashes <- many1 (char ch)
|
|
|
|
char '+'
|
2015-04-07 13:46:14 +01:00
|
|
|
let lengthDashes = length dashes
|
|
|
|
return (lengthDashes, lengthDashes + 1)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
|
2014-07-11 12:51:26 +01:00
|
|
|
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
|
|
|
removeFinalBar :: String -> String
|
|
|
|
removeFinalBar =
|
|
|
|
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
|
|
|
|
|
|
|
|
-- | Separator between rows of grid table.
|
2013-01-03 20:32:15 -08:00
|
|
|
gridTableSep :: Char -> MarkdownParser Char
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
|
|
|
|
|
|
|
|
-- | Parse header for a grid table.
|
|
|
|
gridTableHeader :: Bool -- ^ Headerless table
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks], [Alignment], [Int])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableHeader headless = try $ do
|
|
|
|
optional blanklines
|
|
|
|
dashes <- gridDashedLines '-'
|
|
|
|
rawContent <- if headless
|
|
|
|
then return $ repeat ""
|
|
|
|
else many1
|
|
|
|
(notFollowedBy (gridTableSep '=') >> char '|' >>
|
|
|
|
many1Till anyChar newline)
|
2015-04-18 18:34:55 -07:00
|
|
|
if headless
|
|
|
|
then return ()
|
|
|
|
else gridTableSep '=' >> return ()
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
let lines' = map snd dashes
|
|
|
|
let indices = scanl (+) 0 lines'
|
|
|
|
let aligns = replicate (length lines') AlignDefault
|
|
|
|
-- RST does not have a notion of alignments
|
|
|
|
let rawHeads = if headless
|
|
|
|
then replicate (length dashes) ""
|
2014-01-07 23:39:49 -08:00
|
|
|
else map (unlines . map trim) $ transpose
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
$ map (gridTableSplitLine indices) rawContent
|
2015-04-18 18:34:55 -07:00
|
|
|
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
return (heads, aligns, indices)
|
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
gridTableRawLine :: [Int] -> MarkdownParser [String]
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableRawLine indices = do
|
|
|
|
char '|'
|
|
|
|
line <- many1Till anyChar newline
|
|
|
|
return (gridTableSplitLine indices line)
|
|
|
|
|
|
|
|
-- | Parse row of grid table.
|
|
|
|
gridTableRow :: [Int]
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F [Blocks])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableRow indices = do
|
|
|
|
colLines <- many1 (gridTableRawLine indices)
|
|
|
|
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
|
|
|
|
transpose colLines
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
|
|
|
removeOneLeadingSpace :: [String] -> [String]
|
|
|
|
removeOneLeadingSpace xs =
|
|
|
|
if all startsWithSpace xs
|
|
|
|
then map (drop 1) xs
|
|
|
|
else xs
|
|
|
|
where startsWithSpace "" = True
|
|
|
|
startsWithSpace (y:_) = y == ' '
|
|
|
|
|
|
|
|
-- | Parse footer for a grid table.
|
2013-01-03 20:32:15 -08:00
|
|
|
gridTableFooter :: MarkdownParser [Char]
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
gridTableFooter = blanklines
|
2010-07-05 23:43:07 -07:00
|
|
|
|
2015-10-30 12:37:08 -07:00
|
|
|
pipeBreak :: MarkdownParser ([Alignment], [Int])
|
2014-05-01 09:23:21 -07:00
|
|
|
pipeBreak = try $ do
|
|
|
|
nonindentSpaces
|
|
|
|
openPipe <- (True <$ char '|') <|> return False
|
|
|
|
first <- pipeTableHeaderPart
|
|
|
|
rest <- many $ sepPipe *> pipeTableHeaderPart
|
|
|
|
-- surrounding pipes needed for a one-column table:
|
|
|
|
guard $ not (null rest && not openPipe)
|
|
|
|
optional (char '|')
|
|
|
|
blankline
|
2015-10-30 12:37:08 -07:00
|
|
|
return $ unzip (first:rest)
|
2014-05-01 09:23:21 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
2012-08-01 23:34:48 -07:00
|
|
|
pipeTable = try $ do
|
2015-07-27 10:24:06 -07:00
|
|
|
nonindentSpaces
|
|
|
|
lookAhead nonspaceChar
|
2015-12-03 11:02:45 -08:00
|
|
|
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
|
2016-03-09 10:11:32 -08:00
|
|
|
let heads' = take (length aligns) <$> heads
|
2015-12-03 11:02:45 -08:00
|
|
|
lines' <- many pipeTableRow
|
2016-03-09 10:11:32 -08:00
|
|
|
let lines'' = map (take (length aligns) <$>) lines'
|
2015-12-03 11:02:45 -08:00
|
|
|
let maxlength = maximum $
|
2016-03-09 10:11:32 -08:00
|
|
|
map (\x -> length . stringify $ runF x def) (heads' : lines'')
|
2015-10-30 12:37:08 -07:00
|
|
|
numColumns <- getOption readerColumns
|
|
|
|
let widths = if maxlength > numColumns
|
|
|
|
then map (\len ->
|
|
|
|
fromIntegral (len + 1) / fromIntegral numColumns)
|
|
|
|
seplengths
|
|
|
|
else replicate (length aligns) 0.0
|
2016-03-09 10:11:32 -08:00
|
|
|
return $ (aligns, widths, heads', sequence lines'')
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
sepPipe :: MarkdownParser ()
|
2012-08-10 13:29:37 -07:00
|
|
|
sepPipe = try $ do
|
|
|
|
char '|' <|> char '+'
|
|
|
|
notFollowedBy blankline
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2012-08-10 13:29:37 -07:00
|
|
|
-- parse a row, also returning probable alignments for org-table cells
|
2015-04-18 18:34:55 -07:00
|
|
|
pipeTableRow :: MarkdownParser (F [Blocks])
|
2016-03-09 08:33:13 -08:00
|
|
|
pipeTableRow = try $ do
|
|
|
|
scanForPipe
|
2015-07-27 10:24:06 -07:00
|
|
|
skipMany spaceChar
|
2014-05-01 09:23:21 -07:00
|
|
|
openPipe <- (True <$ char '|') <|> return False
|
2016-03-09 11:46:00 -08:00
|
|
|
-- split into cells
|
|
|
|
let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
|
|
|
|
<|> void (noneOf "|\n\r")
|
|
|
|
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
|
|
|
|
parseFromString pipeTableCell
|
|
|
|
cells <- cellContents `sepEndBy1` (char '|')
|
2014-05-01 09:23:21 -07:00
|
|
|
-- surrounding pipes needed for a one-column table:
|
2016-03-09 08:44:31 -08:00
|
|
|
guard $ not (length cells == 1 && not openPipe)
|
2016-03-09 11:46:00 -08:00
|
|
|
blankline
|
|
|
|
return $ sequence cells
|
|
|
|
|
|
|
|
pipeTableCell :: MarkdownParser (F Blocks)
|
|
|
|
pipeTableCell = do
|
|
|
|
result <- many inline
|
|
|
|
if null result
|
|
|
|
then return mempty
|
|
|
|
else return $ B.plain . mconcat <$> sequence result
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2015-10-30 12:37:08 -07:00
|
|
|
pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
|
2013-03-02 18:59:50 -08:00
|
|
|
pipeTableHeaderPart = try $ do
|
|
|
|
skipMany spaceChar
|
2012-07-22 22:09:15 -07:00
|
|
|
left <- optionMaybe (char ':')
|
2015-10-30 12:37:08 -07:00
|
|
|
pipe <- many1 (char '-')
|
2012-07-22 22:09:15 -07:00
|
|
|
right <- optionMaybe (char ':')
|
2013-03-02 18:59:50 -08:00
|
|
|
skipMany spaceChar
|
2015-10-30 12:37:08 -07:00
|
|
|
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
|
2012-07-22 22:09:15 -07:00
|
|
|
return $
|
2015-10-30 12:37:08 -07:00
|
|
|
((case (left,right) of
|
|
|
|
(Nothing,Nothing) -> AlignDefault
|
|
|
|
(Just _,Nothing) -> AlignLeft
|
|
|
|
(Nothing,Just _) -> AlignRight
|
|
|
|
(Just _,Just _) -> AlignCenter), len)
|
2012-07-22 22:09:15 -07:00
|
|
|
|
2012-07-24 07:13:49 -07:00
|
|
|
-- Succeed only if current line contains a pipe.
|
2015-04-18 18:34:55 -07:00
|
|
|
scanForPipe :: Parser [Char] st ()
|
2013-01-25 16:13:58 -08:00
|
|
|
scanForPipe = do
|
|
|
|
inp <- getInput
|
|
|
|
case break (\c -> c == '\n' || c == '|') inp of
|
|
|
|
(_,'|':_) -> return ()
|
|
|
|
_ -> mzero
|
2012-07-24 07:13:49 -07:00
|
|
|
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- | Parse a table using 'headerParser', 'rowParser',
|
|
|
|
-- 'lineParser', and 'footerParser'. Variant of the version in
|
|
|
|
-- Text.Pandoc.Parsing.
|
2015-04-18 18:34:55 -07:00
|
|
|
tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int])
|
|
|
|
-> ([Int] -> MarkdownParser (F [Blocks]))
|
2013-01-03 20:32:15 -08:00
|
|
|
-> MarkdownParser sep
|
|
|
|
-> MarkdownParser end
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
tableWith headerParser rowParser lineParser footerParser = try $ do
|
|
|
|
(heads, aligns, indices) <- headerParser
|
2015-04-18 18:34:55 -07:00
|
|
|
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
footerParser
|
|
|
|
numColumns <- getOption readerColumns
|
2015-04-18 18:34:55 -07:00
|
|
|
let widths = if (indices == [])
|
|
|
|
then replicate (length aligns) 0.0
|
|
|
|
else widthsFromIndices numColumns indices
|
|
|
|
return $ (aligns, widths, heads, lines')
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
table :: MarkdownParser (F Blocks)
|
2012-07-24 09:06:13 -07:00
|
|
|
table = try $ do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
frontCaption <- option Nothing (Just <$> tableCaption)
|
|
|
|
(aligns, widths, heads, lns) <-
|
2012-08-01 23:34:48 -07:00
|
|
|
try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
try (guardEnabled Ext_multiline_tables >>
|
|
|
|
multilineTable False) <|>
|
|
|
|
try (guardEnabled Ext_simple_tables >>
|
|
|
|
(simpleTable True <|> simpleTable False)) <|>
|
|
|
|
try (guardEnabled Ext_multiline_tables >>
|
|
|
|
multilineTable True) <|>
|
|
|
|
try (guardEnabled Ext_grid_tables >>
|
2012-07-26 20:29:08 -07:00
|
|
|
(gridTable False <|> gridTable True)) <?> "table"
|
2012-07-24 09:24:28 -07:00
|
|
|
optional blanklines
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
caption <- case frontCaption of
|
2015-04-18 18:34:55 -07:00
|
|
|
Nothing -> option (return mempty) tableCaption
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
Just c -> return c
|
2016-01-07 10:40:30 -08:00
|
|
|
-- renormalize widths if greater than 100%:
|
|
|
|
let totalWidth = sum widths
|
|
|
|
let widths' = if totalWidth < 1
|
|
|
|
then widths
|
|
|
|
else map (/ totalWidth) widths
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
caption' <- caption
|
|
|
|
heads' <- heads
|
|
|
|
lns' <- lns
|
2016-01-07 10:40:30 -08:00
|
|
|
return $ B.table caption' (zip aligns widths') heads' lns'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2012-07-24 09:06:13 -07:00
|
|
|
--
|
2007-11-03 23:27:58 +00:00
|
|
|
-- inline
|
|
|
|
--
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
inline :: MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
inline = choice [ whitespace
|
2012-09-27 13:43:48 -07:00
|
|
|
, bareURL
|
2011-01-22 16:04:32 -08:00
|
|
|
, str
|
2007-11-03 23:27:58 +00:00
|
|
|
, endline
|
|
|
|
, code
|
2013-07-20 21:14:38 -07:00
|
|
|
, strongOrEmph
|
2007-11-03 23:27:58 +00:00
|
|
|
, note
|
2010-11-12 00:37:44 -08:00
|
|
|
, cite
|
2016-09-28 12:33:05 +02:00
|
|
|
, bracketedSpan
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, link
|
2007-11-03 23:27:58 +00:00
|
|
|
, image
|
|
|
|
, math
|
|
|
|
, strikeout
|
|
|
|
, subscript
|
2013-07-20 21:14:38 -07:00
|
|
|
, superscript
|
2010-12-12 20:30:55 -08:00
|
|
|
, inlineNote -- after superscript because of ^[link](/foo)^
|
2007-11-03 23:27:58 +00:00
|
|
|
, autoLink
|
2013-08-15 22:39:14 -07:00
|
|
|
, spanHtml
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
, rawHtmlInline
|
2007-11-03 23:27:58 +00:00
|
|
|
, escapedChar
|
2012-01-29 23:54:00 -08:00
|
|
|
, rawLaTeXInline'
|
2010-03-24 10:51:38 -07:00
|
|
|
, exampleRef
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, smart
|
2015-04-18 18:34:55 -07:00
|
|
|
, return . B.singleton <$> charRef
|
2015-11-13 12:06:39 -08:00
|
|
|
, emoji
|
2007-11-03 23:27:58 +00:00
|
|
|
, symbol
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
, ltSign
|
|
|
|
] <?> "inline"
|
2007-12-24 04:22:31 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
escapedChar' :: MarkdownParser Char
|
2011-12-05 20:27:10 -08:00
|
|
|
escapedChar' = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
char '\\'
|
2012-07-26 20:29:08 -07:00
|
|
|
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
|
2016-10-22 23:41:55 +02:00
|
|
|
<|> (guardEnabled Ext_angle_brackets_escapable >>
|
|
|
|
oneOf "\\`*_{}[]()>#+-.!~\"<>")
|
2013-01-16 11:25:17 -08:00
|
|
|
<|> oneOf "\\`*_{}[]()>#+-.!~\""
|
2011-12-05 20:27:10 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
escapedChar :: MarkdownParser (F Inlines)
|
2011-12-05 20:27:10 -08:00
|
|
|
escapedChar = do
|
|
|
|
result <- escapedChar'
|
2012-07-27 09:18:51 -07:00
|
|
|
case result of
|
2015-04-18 18:34:55 -07:00
|
|
|
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
|
2012-07-27 09:18:51 -07:00
|
|
|
'\n' -> guardEnabled Ext_escaped_line_breaks >>
|
2015-04-18 18:34:55 -07:00
|
|
|
return (return B.linebreak) -- "\[newline]" is a linebreak
|
|
|
|
_ -> return $ return $ B.str [result]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
ltSign :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
ltSign = do
|
2012-08-13 20:04:17 -07:00
|
|
|
guardDisabled Ext_raw_html
|
2014-07-20 17:22:29 -07:00
|
|
|
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
|
2012-07-26 20:29:08 -07:00
|
|
|
char '<'
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.str "<"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
exampleRef :: MarkdownParser (F Inlines)
|
2010-03-24 10:51:38 -07:00
|
|
|
exampleRef = try $ do
|
2012-07-27 16:00:27 -07:00
|
|
|
guardEnabled Ext_example_lists
|
2010-03-24 10:51:38 -07:00
|
|
|
char '@'
|
|
|
|
lab <- many1 (alphaNum <|> oneOf "-_")
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
st <- askF
|
|
|
|
return $ case M.lookup lab (stateExamples st) of
|
|
|
|
Just n -> B.str (show n)
|
|
|
|
Nothing -> B.str ('@':lab)
|
2010-03-24 10:51:38 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
symbol :: MarkdownParser (F Inlines)
|
2012-07-26 22:32:53 -07:00
|
|
|
symbol = do
|
2011-01-24 22:12:42 -08:00
|
|
|
result <- noneOf "<\\\n\t "
|
|
|
|
<|> try (do lookAhead $ char '\\'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
notFollowedBy' (() <$ rawTeXBlock)
|
2011-01-24 22:12:42 -08:00
|
|
|
char '\\')
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.str [result]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- parses inline code, between n `s and n `s
|
2015-04-18 18:34:55 -07:00
|
|
|
code :: MarkdownParser (F Inlines)
|
2012-07-26 22:32:53 -07:00
|
|
|
code = try $ do
|
2007-11-03 23:27:58 +00:00
|
|
|
starts <- many1 (char '`')
|
|
|
|
skipSpaces
|
|
|
|
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
|
2010-03-06 02:42:15 +00:00
|
|
|
(char '\n' >> notFollowedBy' blankline >> return " "))
|
2012-07-26 22:32:53 -07:00
|
|
|
(try (skipSpaces >> count (length starts) (char '`') >>
|
2007-11-03 23:27:58 +00:00
|
|
|
notFollowedBy (char '`')))
|
2012-07-27 09:18:51 -07:00
|
|
|
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
|
|
|
|
optional whitespace >> attributes)
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.codeWith attr $ trim $ concat result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
math :: MarkdownParser (F Inlines)
|
|
|
|
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
|
|
|
|
<|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
|
|
|
|
((getOption readerSmart >>= guard) *> (return <$> apostrophe)
|
2016-02-04 12:06:32 -08:00
|
|
|
<* notFollowedBy (space <|> satisfy isPunctuation))
|
2008-08-13 03:02:42 +00:00
|
|
|
|
2013-07-20 21:14:38 -07:00
|
|
|
-- Parses material enclosed in *s, **s, _s, or __s.
|
|
|
|
-- Designed to avoid backtracking.
|
|
|
|
enclosure :: Char
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
enclosure c = do
|
2014-07-10 14:37:10 -07:00
|
|
|
-- we can't start an enclosure with _ if after a string and
|
|
|
|
-- the intraword_underscores extension is enabled:
|
|
|
|
guardDisabled Ext_intraword_underscores
|
|
|
|
<|> guard (c == '*')
|
|
|
|
<|> (guard =<< notAfterString)
|
2013-07-20 21:14:38 -07:00
|
|
|
cs <- many1 (char c)
|
2015-04-18 18:34:55 -07:00
|
|
|
(return (B.str cs) <>) <$> whitespace
|
|
|
|
<|> do
|
2014-07-10 14:37:10 -07:00
|
|
|
case length cs of
|
2013-07-20 21:14:38 -07:00
|
|
|
3 -> three c
|
|
|
|
2 -> two c mempty
|
|
|
|
1 -> one c mempty
|
2015-04-18 18:34:55 -07:00
|
|
|
_ -> return (return $ B.str cs)
|
2013-07-20 21:14:38 -07:00
|
|
|
|
2014-07-10 14:51:08 -07:00
|
|
|
ender :: Char -> Int -> MarkdownParser ()
|
|
|
|
ender c n = try $ do
|
|
|
|
count n (char c)
|
|
|
|
guard (c == '*')
|
|
|
|
<|> guardDisabled Ext_intraword_underscores
|
|
|
|
<|> notFollowedBy alphaNum
|
|
|
|
|
2013-07-20 21:14:38 -07:00
|
|
|
-- Parse inlines til you hit one c or a sequence of two cs.
|
|
|
|
-- If one c, emit emph and then parse two.
|
|
|
|
-- If two cs, emit strong and then parse one.
|
2013-11-22 19:41:08 -08:00
|
|
|
-- Otherwise, emit ccc then the results.
|
2015-04-18 18:34:55 -07:00
|
|
|
three :: Char -> MarkdownParser (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
three c = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 3 >> return ((B.strong . B.emph) <$> contents))
|
|
|
|
<|> (ender c 2 >> one c (B.strong <$> contents))
|
|
|
|
<|> (ender c 1 >> two c (B.emph <$> contents))
|
|
|
|
<|> return (return (B.str [c,c,c]) <> contents)
|
2013-07-20 21:14:38 -07:00
|
|
|
|
|
|
|
-- Parse inlines til you hit two c's, and emit strong.
|
|
|
|
-- If you never do hit two cs, emit ** plus inlines parsed.
|
2015-04-18 18:34:55 -07:00
|
|
|
two :: Char -> F Inlines -> MarkdownParser (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
two c prefix' = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 2 >> return (B.strong <$> (prefix' <> contents)))
|
|
|
|
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
|
2013-07-20 21:14:38 -07:00
|
|
|
|
|
|
|
-- Parse inlines til you hit a c, and emit emph.
|
|
|
|
-- If you never hit a c, emit * plus inlines parsed.
|
2015-04-18 18:34:55 -07:00
|
|
|
one :: Char -> F Inlines -> MarkdownParser (F Inlines)
|
2013-07-20 21:14:38 -07:00
|
|
|
one c prefix' = do
|
2014-07-10 14:51:08 -07:00
|
|
|
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
|
2013-07-20 21:14:38 -07:00
|
|
|
<|> try (string [c,c] >>
|
2014-07-10 14:51:08 -07:00
|
|
|
notFollowedBy (ender c 1) >>
|
2013-11-22 19:41:08 -08:00
|
|
|
two c mempty) )
|
2015-04-18 18:34:55 -07:00
|
|
|
(ender c 1 >> return (B.emph <$> (prefix' <> contents)))
|
|
|
|
<|> return (return (B.str [c]) <> (prefix' <> contents))
|
2013-07-20 21:14:38 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
strongOrEmph :: MarkdownParser (F Inlines)
|
2014-07-10 14:37:10 -07:00
|
|
|
strongOrEmph = enclosure '*' <|> enclosure '_'
|
2011-07-30 18:08:49 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
-- | Parses a list of inlines between start and end delimiters.
|
2011-07-30 18:08:49 -07:00
|
|
|
inlinesBetween :: (Show b)
|
2013-01-03 20:32:15 -08:00
|
|
|
=> MarkdownParser a
|
|
|
|
-> MarkdownParser b
|
2015-04-18 18:34:55 -07:00
|
|
|
-> MarkdownParser (F Inlines)
|
2011-07-30 18:08:49 -07:00
|
|
|
inlinesBetween start end =
|
2015-04-18 18:34:55 -07:00
|
|
|
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
|
2014-07-11 12:51:26 +01:00
|
|
|
innerSpace = try $ whitespace <* notFollowedBy' end
|
2009-11-29 19:31:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
strikeout :: MarkdownParser (F Inlines)
|
|
|
|
strikeout = fmap B.strikeout <$>
|
2012-07-26 20:29:08 -07:00
|
|
|
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
|
2011-07-30 19:43:20 -07:00
|
|
|
where strikeStart = string "~~" >> lookAhead nonspaceChar
|
|
|
|
>> notFollowedBy (char '~')
|
|
|
|
strikeEnd = try $ string "~~"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
superscript :: MarkdownParser (F Inlines)
|
|
|
|
superscript = fmap B.superscript <$> try (do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
guardEnabled Ext_superscript
|
|
|
|
char '^'
|
|
|
|
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
subscript :: MarkdownParser (F Inlines)
|
|
|
|
subscript = fmap B.subscript <$> try (do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
guardEnabled Ext_subscript
|
|
|
|
char '~'
|
|
|
|
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
whitespace :: MarkdownParser (F Inlines)
|
|
|
|
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
|
|
|
|
regsp = skipMany spaceChar >> return B.space
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
nonEndline :: Parser [Char] st Char
|
2007-11-03 23:27:58 +00:00
|
|
|
nonEndline = satisfy (/='\n')
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
str :: MarkdownParser (F Inlines)
|
2009-04-30 04:39:45 +00:00
|
|
|
str = do
|
2013-01-25 23:28:57 -08:00
|
|
|
result <- many1 alphaNum
|
2014-05-14 14:45:37 +02:00
|
|
|
updateLastStrPos
|
2009-05-01 04:17:54 +00:00
|
|
|
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
2013-01-25 23:28:57 -08:00
|
|
|
isSmart <- getOption readerSmart
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
if isSmart
|
2009-04-30 04:39:45 +00:00
|
|
|
then case likelyAbbrev result of
|
2015-04-18 18:34:55 -07:00
|
|
|
[] -> return $ return $ B.str result
|
2009-04-30 04:39:45 +00:00
|
|
|
xs -> choice (map (\x ->
|
2010-12-13 19:25:22 -08:00
|
|
|
try (string x >> oneOf " \n" >>
|
2010-12-13 19:34:29 -08:00
|
|
|
lookAhead alphaNum >>
|
2015-04-18 18:34:55 -07:00
|
|
|
return (return $ B.str
|
|
|
|
$ result ++ spacesToNbr x ++ "\160"))) xs)
|
|
|
|
<|> (return $ return $ B.str result)
|
|
|
|
else return $ return $ B.str result
|
2009-04-30 04:39:45 +00:00
|
|
|
|
|
|
|
-- | if the string matches the beginning of an abbreviation (before
|
|
|
|
-- the first period, return strings that would finish the abbreviation.
|
|
|
|
likelyAbbrev :: String -> [String]
|
|
|
|
likelyAbbrev x =
|
|
|
|
let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
|
|
|
|
"Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
|
|
|
|
"vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
|
2010-03-06 04:08:34 +00:00
|
|
|
"Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
|
2012-06-04 11:13:41 -07:00
|
|
|
"ch.", "sec.", "cf.", "cp."]
|
2009-04-30 04:39:45 +00:00
|
|
|
abbrPairs = map (break (=='.')) abbrevs
|
|
|
|
in map snd $ filter (\(y,_) -> y == x) abbrPairs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
2015-04-18 18:34:55 -07:00
|
|
|
endline :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
endline = try $ do
|
|
|
|
newline
|
|
|
|
notFollowedBy blankline
|
2014-02-04 10:05:52 -08:00
|
|
|
-- parse potential list-starts differently if in a list:
|
2014-09-26 13:32:08 +04:00
|
|
|
notFollowedBy (inList >> listStart)
|
2013-09-07 09:36:37 -07:00
|
|
|
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
|
2014-10-14 13:28:28 +02:00
|
|
|
guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
|
2014-03-24 09:56:16 -07:00
|
|
|
guardDisabled Ext_backtick_code_blocks <|>
|
2013-09-08 11:43:46 -07:00
|
|
|
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
|
2014-08-31 12:55:47 -07:00
|
|
|
notFollowedByHtmlCloser
|
2014-06-20 11:10:35 -07:00
|
|
|
(eof >> return mempty)
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
|
2013-07-17 15:38:56 -07:00
|
|
|
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
|
2015-12-11 15:58:11 -08:00
|
|
|
<|> (skipMany spaceChar >> return (return B.softbreak))
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- links
|
|
|
|
--
|
|
|
|
|
|
|
|
-- a reference label for a link
|
2015-04-18 18:34:55 -07:00
|
|
|
reference :: MarkdownParser (F Inlines, String)
|
2007-12-24 04:22:31 +00:00
|
|
|
reference = do notFollowedBy' (string "[^") -- footnote reference
|
2015-04-18 18:34:55 -07:00
|
|
|
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-03-13 19:18:20 -07:00
|
|
|
parenthesizedChars :: MarkdownParser [Char]
|
|
|
|
parenthesizedChars = do
|
|
|
|
result <- charsInBalanced '(' ')' litChar
|
|
|
|
return $ '(' : result ++ ")"
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- source for a link, with optional title
|
2013-01-03 20:32:15 -08:00
|
|
|
source :: MarkdownParser (String, String)
|
2013-01-16 11:25:17 -08:00
|
|
|
source = do
|
|
|
|
char '('
|
2007-12-24 04:22:41 +00:00
|
|
|
skipSpaces
|
2014-08-14 10:56:41 -07:00
|
|
|
let urlChunk =
|
|
|
|
try parenthesizedChars
|
|
|
|
<|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
|
|
|
|
<|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
|
2013-01-16 11:25:17 -08:00
|
|
|
let sourceURL = (unwords . words . concat) <$> many urlChunk
|
2011-12-05 21:13:24 -08:00
|
|
|
let betweenAngles = try $
|
2013-01-16 11:25:17 -08:00
|
|
|
char '<' >> manyTill litChar (char '>')
|
2010-12-10 12:14:51 -08:00
|
|
|
src <- try betweenAngles <|> sourceURL
|
2013-01-16 11:25:17 -08:00
|
|
|
tit <- option "" $ try $ spnl >> linkTitle
|
2007-11-03 23:27:58 +00:00
|
|
|
skipSpaces
|
2013-01-16 11:25:17 -08:00
|
|
|
char ')'
|
2012-09-29 17:09:34 -04:00
|
|
|
return (escapeURI $ trimr src, tit)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2013-01-03 20:32:15 -08:00
|
|
|
linkTitle :: MarkdownParser String
|
2013-02-15 20:27:29 -08:00
|
|
|
linkTitle = quotedTitle '"' <|> quotedTitle '\''
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
link :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
link = try $ do
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
st <- getState
|
|
|
|
guard $ stateAllowLinks st
|
|
|
|
setState $ st{ stateAllowLinks = False }
|
|
|
|
(lab,raw) <- reference
|
|
|
|
setState $ st{ stateAllowLinks = True }
|
2015-04-02 21:09:08 -07:00
|
|
|
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2016-09-28 12:33:05 +02:00
|
|
|
bracketedSpan :: MarkdownParser (F Inlines)
|
|
|
|
bracketedSpan = try $ do
|
|
|
|
guardEnabled Ext_bracketed_spans
|
|
|
|
(lab,_) <- reference
|
|
|
|
attr <- attributes
|
|
|
|
return $ B.spanWith attr <$> lab
|
|
|
|
|
2015-11-19 22:41:12 -08:00
|
|
|
regLink :: (Attr -> String -> String -> Inlines -> Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
-> F Inlines -> MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
regLink constructor lab = try $ do
|
|
|
|
(src, tit) <- source
|
2015-04-02 21:09:08 -07:00
|
|
|
attr <- option nullAttr $
|
2015-11-19 22:58:19 -08:00
|
|
|
guardEnabled Ext_link_attributes >> attributes
|
2015-11-19 22:41:12 -08:00
|
|
|
return $ constructor attr src tit <$> lab
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- a link like [this][ref] or [this][] or [this]
|
2015-11-19 22:41:12 -08:00
|
|
|
referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
|
2015-04-18 18:34:55 -07:00
|
|
|
-> (F Inlines, String) -> MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
referenceLink constructor (lab, raw) = do
|
2013-06-19 08:56:11 -07:00
|
|
|
sp <- (True <$ lookAhead (char ' ')) <|> return False
|
2015-05-13 23:02:54 -07:00
|
|
|
(_,raw') <- option (mempty, "") $
|
2014-12-15 10:50:10 -08:00
|
|
|
lookAhead (try (spnl >> normalCite >> return (mempty, "")))
|
|
|
|
<|>
|
|
|
|
try (spnl >> reference)
|
2015-03-03 03:28:56 +02:00
|
|
|
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
|
2013-01-03 20:32:15 -08:00
|
|
|
let labIsRef = raw' == "" || raw' == "[]"
|
|
|
|
let key = toKey $ if labIsRef then raw else raw'
|
2013-06-19 08:56:11 -07:00
|
|
|
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
|
2015-04-02 21:09:08 -07:00
|
|
|
fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
|
2013-01-03 20:32:15 -08:00
|
|
|
implicitHeaderRefs <- option False $
|
|
|
|
True <$ guardEnabled Ext_implicit_header_references
|
2015-04-18 18:34:55 -07:00
|
|
|
let makeFallback = do
|
|
|
|
parsedRaw' <- parsedRaw
|
|
|
|
fallback' <- fallback
|
|
|
|
return $ B.str "[" <> fallback' <> B.str "]" <>
|
2013-06-19 09:05:30 -07:00
|
|
|
(if sp && not (null raw) then B.space else mempty) <>
|
2015-04-18 18:34:55 -07:00
|
|
|
parsedRaw'
|
|
|
|
return $ do
|
|
|
|
keys <- asksF stateKeys
|
|
|
|
case M.lookup key keys of
|
2015-05-13 23:02:54 -07:00
|
|
|
Nothing ->
|
2015-04-18 18:34:55 -07:00
|
|
|
if implicitHeaderRefs
|
2015-05-13 23:02:54 -07:00
|
|
|
then do
|
|
|
|
headerKeys <- asksF stateHeaderKeys
|
|
|
|
case M.lookup key headerKeys of
|
2015-11-19 22:41:12 -08:00
|
|
|
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
|
2015-04-02 21:09:08 -07:00
|
|
|
Nothing -> makeFallback
|
2015-04-18 18:34:55 -07:00
|
|
|
else makeFallback
|
2015-11-19 22:41:12 -08:00
|
|
|
Just ((src,tit), attr) -> constructor attr src tit <$> lab
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2013-06-19 09:05:30 -07:00
|
|
|
dropBrackets :: String -> String
|
|
|
|
dropBrackets = reverse . dropRB . reverse . dropLB
|
|
|
|
where dropRB (']':xs) = xs
|
|
|
|
dropRB xs = xs
|
|
|
|
dropLB ('[':xs) = xs
|
|
|
|
dropLB xs = xs
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
bareURL :: MarkdownParser (F Inlines)
|
2012-09-27 13:43:48 -07:00
|
|
|
bareURL = try $ do
|
2013-01-15 12:44:50 -08:00
|
|
|
guardEnabled Ext_autolink_bare_uris
|
2015-07-14 13:16:20 -07:00
|
|
|
getState >>= guard . stateAllowLinks
|
2012-09-27 13:43:48 -07:00
|
|
|
(orig, src) <- uri <|> emailAddress
|
2013-09-01 15:18:56 -07:00
|
|
|
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.link src "" (B.str orig)
|
2012-09-27 13:43:48 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
autoLink :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
autoLink = try $ do
|
2015-07-14 13:16:20 -07:00
|
|
|
getState >>= guard . stateAllowLinks
|
2007-11-03 23:27:58 +00:00
|
|
|
char '<'
|
2010-03-23 15:07:48 -07:00
|
|
|
(orig, src) <- uri <|> emailAddress
|
2013-02-28 22:05:22 -08:00
|
|
|
-- in rare cases, something may remain after the uri parser
|
|
|
|
-- is finished, because the uri parser tries to avoid parsing
|
|
|
|
-- final punctuation. for example: in `<http://hi---there>`,
|
|
|
|
-- the URI parser will stop before the dashes.
|
|
|
|
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
|
2016-10-26 12:18:58 +02:00
|
|
|
attr <- option nullAttr $ try $
|
|
|
|
guardEnabled Ext_link_attributes >> attributes
|
|
|
|
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
image :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
image = try $ do
|
|
|
|
char '!'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
(lab,raw) <- reference
|
2013-02-05 20:08:00 -08:00
|
|
|
defaultExt <- getOption readerDefaultImageExtension
|
2015-11-19 22:41:12 -08:00
|
|
|
let constructor attr' src = case takeExtension src of
|
|
|
|
"" -> B.imageWith attr' (addExtension src defaultExt)
|
|
|
|
_ -> B.imageWith attr' src
|
2013-02-05 20:08:00 -08:00
|
|
|
regLink constructor lab <|> referenceLink constructor (lab,raw)
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
note :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
note = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_footnotes
|
2007-11-03 23:27:58 +00:00
|
|
|
ref <- noteMarker
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
notes <- asksF stateNotes'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
case lookup ref notes of
|
2015-04-18 18:34:55 -07:00
|
|
|
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
|
|
|
|
Just contents -> do
|
|
|
|
st <- askF
|
|
|
|
-- process the note in a context that doesn't resolve
|
|
|
|
-- notes, to avoid infinite looping with notes inside
|
|
|
|
-- notes:
|
|
|
|
let contents' = runF contents st{ stateNotes' = [] }
|
|
|
|
return $ B.note contents'
|
|
|
|
|
|
|
|
inlineNote :: MarkdownParser (F Inlines)
|
2007-11-03 23:27:58 +00:00
|
|
|
inlineNote = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_inline_notes
|
2007-11-03 23:27:58 +00:00
|
|
|
char '^'
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
contents <- inlinesInBalancedBrackets
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.note . B.para <$> contents
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
rawLaTeXInline' :: MarkdownParser (F Inlines)
|
2011-01-24 22:12:42 -08:00
|
|
|
rawLaTeXInline' = try $ do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_raw_tex
|
2012-01-29 23:54:00 -08:00
|
|
|
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
|
2015-04-18 18:34:55 -07:00
|
|
|
RawInline _ s <- rawLaTeXInline
|
|
|
|
return $ return $ B.rawInline "tex" s
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
-- "tex" because it might be context or latex
|
2008-08-11 07:04:36 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
rawConTeXtEnvironment :: Parser [Char] st String
|
2012-01-29 23:54:00 -08:00
|
|
|
rawConTeXtEnvironment = try $ do
|
2008-08-11 07:04:36 +00:00
|
|
|
string "\\start"
|
|
|
|
completion <- inBrackets (letter <|> digit <|> spaceChar)
|
|
|
|
<|> (many1 letter)
|
2012-01-29 23:54:00 -08:00
|
|
|
contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
|
2008-08-11 07:04:36 +00:00
|
|
|
(try $ string "\\stop" >> string completion)
|
|
|
|
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
|
2008-08-11 07:04:36 +00:00
|
|
|
inBrackets parser = do
|
|
|
|
char '['
|
|
|
|
contents <- many parser
|
|
|
|
char ']'
|
|
|
|
return $ "[" ++ contents ++ "]"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
spanHtml :: MarkdownParser (F Inlines)
|
2013-08-15 22:39:14 -07:00
|
|
|
spanHtml = try $ do
|
2014-08-08 21:04:25 -07:00
|
|
|
guardEnabled Ext_native_spans
|
2013-08-15 22:39:14 -07:00
|
|
|
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
|
|
|
|
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
|
2013-12-19 21:07:09 -05:00
|
|
|
let ident = fromMaybe "" $ lookup "id" attrs
|
2013-08-15 22:39:14 -07:00
|
|
|
let classes = maybe [] words $ lookup "class" attrs
|
|
|
|
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
|
2014-06-20 15:24:24 -07:00
|
|
|
case lookup "style" keyvals of
|
|
|
|
Just s | null ident && null classes &&
|
|
|
|
map toLower (filter (`notElem` " \t;") s) ==
|
|
|
|
"font-variant:small-caps"
|
2015-04-18 18:34:55 -07:00
|
|
|
-> return $ B.smallcaps <$> contents
|
|
|
|
_ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
|
2013-08-15 22:39:14 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
divHtml :: MarkdownParser (F Blocks)
|
2013-08-15 22:39:14 -07:00
|
|
|
divHtml = try $ do
|
2014-08-08 21:04:25 -07:00
|
|
|
guardEnabled Ext_native_divs
|
2014-02-26 22:46:38 -08:00
|
|
|
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
|
2014-07-10 15:04:18 -07:00
|
|
|
-- we set stateInHtmlBlock so that closing tags that can be either block or
|
|
|
|
-- inline will not be parsed as inline tags
|
|
|
|
oldInHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
|
2014-02-26 22:46:38 -08:00
|
|
|
bls <- option "" (blankline >> option "" blanklines)
|
|
|
|
contents <- mconcat <$>
|
|
|
|
many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
|
|
|
|
closed <- option False (True <$ htmlTag (~== TagClose "div"))
|
|
|
|
if closed
|
|
|
|
then do
|
2014-07-10 15:04:18 -07:00
|
|
|
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
|
2014-02-26 22:46:38 -08:00
|
|
|
let ident = fromMaybe "" $ lookup "id" attrs
|
|
|
|
let classes = maybe [] words $ lookup "class" attrs
|
|
|
|
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ B.divWith (ident, classes, keyvals) <$> contents
|
2014-02-26 22:46:38 -08:00
|
|
|
else -- avoid backtracing
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
|
2013-08-15 22:39:14 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
rawHtmlInline :: MarkdownParser (F Inlines)
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
rawHtmlInline = do
|
2012-08-12 22:04:23 -07:00
|
|
|
guardEnabled Ext_raw_html
|
2014-07-07 15:47:51 -06:00
|
|
|
inHtmlBlock <- stateInHtmlBlock <$> getState
|
|
|
|
let isCloseBlockTag t = case inHtmlBlock of
|
|
|
|
Just t' -> t ~== TagClose t'
|
|
|
|
Nothing -> False
|
2012-07-26 20:29:08 -07:00
|
|
|
mdInHtml <- option False $
|
2014-07-20 17:22:29 -07:00
|
|
|
( guardEnabled Ext_markdown_in_html_blocks
|
|
|
|
<|> guardEnabled Ext_markdown_attribute
|
|
|
|
) >> return True
|
2013-01-25 19:50:34 -08:00
|
|
|
(_,result) <- htmlTag $ if mdInHtml
|
2014-07-07 15:47:51 -06:00
|
|
|
then (\x -> isInlineTag x &&
|
|
|
|
not (isCloseBlockTag x))
|
2013-01-25 19:50:34 -08:00
|
|
|
else not . isTextTag
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ return $ B.rawInline "html" result
|
2007-11-03 23:27:58 +00:00
|
|
|
|
2015-11-13 12:06:39 -08:00
|
|
|
-- Emoji
|
|
|
|
|
|
|
|
emojiChars :: [Char]
|
|
|
|
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
|
|
|
|
|
|
|
|
emoji :: MarkdownParser (F Inlines)
|
|
|
|
emoji = try $ do
|
|
|
|
guardEnabled Ext_emoji
|
|
|
|
char ':'
|
|
|
|
emojikey <- many1 (oneOf emojiChars)
|
|
|
|
char ':'
|
|
|
|
case M.lookup emojikey emojis of
|
|
|
|
Just s -> return (return (B.str s))
|
|
|
|
Nothing -> mzero
|
|
|
|
|
2010-11-12 00:37:44 -08:00
|
|
|
-- Citations
|
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
cite :: MarkdownParser (F Inlines)
|
2010-11-12 00:37:44 -08:00
|
|
|
cite = do
|
2012-07-26 20:29:08 -07:00
|
|
|
guardEnabled Ext_citations
|
2015-04-18 18:34:55 -07:00
|
|
|
citations <- textualCite
|
|
|
|
<|> do (cs, raw) <- withRaw normalCite
|
|
|
|
return $ (flip B.cite (B.text raw)) <$> cs
|
|
|
|
return citations
|
2013-08-18 23:01:23 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
textualCite :: MarkdownParser (F Inlines)
|
2013-02-06 13:14:37 -08:00
|
|
|
textualCite = try $ do
|
2010-11-18 12:38:45 -08:00
|
|
|
(_, key) <- citeKey
|
2012-10-21 23:16:23 -07:00
|
|
|
let first = Citation{ citationId = key
|
|
|
|
, citationPrefix = []
|
|
|
|
, citationSuffix = []
|
|
|
|
, citationMode = AuthorInText
|
|
|
|
, citationNoteNum = 0
|
|
|
|
, citationHash = 0
|
2010-11-12 00:37:44 -08:00
|
|
|
}
|
2013-09-14 22:27:25 -07:00
|
|
|
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
case mbrest of
|
2013-09-14 22:27:25 -07:00
|
|
|
Just (rest, raw) ->
|
|
|
|
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
|
2015-04-18 18:34:55 -07:00
|
|
|
<$> rest
|
2013-09-14 22:27:25 -07:00
|
|
|
Nothing ->
|
2015-04-17 17:33:55 +03:00
|
|
|
(do
|
|
|
|
(cs, raw) <- withRaw $ bareloc first
|
|
|
|
let (spaces',raw') = span isSpace raw
|
|
|
|
spc | null spaces' = mempty
|
|
|
|
| otherwise = B.space
|
|
|
|
lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
|
2015-04-02 21:09:08 -07:00
|
|
|
fallback <- referenceLink B.linkWith (lab,raw')
|
2015-04-17 17:33:55 +03:00
|
|
|
return $ do
|
|
|
|
fallback' <- fallback
|
|
|
|
cs' <- cs
|
|
|
|
return $
|
|
|
|
case B.toList fallback' of
|
|
|
|
Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
|
|
|
|
_ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
|
2015-04-18 18:34:55 -07:00
|
|
|
<|> return (do st <- askF
|
|
|
|
return $ case M.lookup key (stateExamples st) of
|
|
|
|
Just n -> B.str (show n)
|
|
|
|
_ -> B.cite [first] $ B.str $ '@':key)
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
bareloc :: Citation -> MarkdownParser (F [Citation])
|
2010-11-18 12:38:45 -08:00
|
|
|
bareloc c = try $ do
|
|
|
|
spnl
|
|
|
|
char '['
|
2015-04-17 17:33:55 +03:00
|
|
|
notFollowedBy $ char '^'
|
2010-11-18 13:22:20 -08:00
|
|
|
suff <- suffix
|
2015-04-18 18:34:55 -07:00
|
|
|
rest <- option (return []) $ try $ char ';' >> citeList
|
2010-11-18 12:38:45 -08:00
|
|
|
spnl
|
|
|
|
char ']'
|
2015-04-17 17:33:55 +03:00
|
|
|
notFollowedBy $ oneOf "[("
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
suff' <- suff
|
|
|
|
rest' <- rest
|
|
|
|
return $ c{ citationSuffix = B.toList suff' } : rest'
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
normalCite :: MarkdownParser (F [Citation])
|
2010-11-12 00:37:44 -08:00
|
|
|
normalCite = try $ do
|
2010-11-18 12:38:45 -08:00
|
|
|
char '['
|
|
|
|
spnl
|
|
|
|
citations <- citeList
|
|
|
|
spnl
|
|
|
|
char ']'
|
|
|
|
return citations
|
2010-11-12 00:37:44 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
suffix :: MarkdownParser (F Inlines)
|
2010-11-18 12:38:45 -08:00
|
|
|
suffix = try $ do
|
2011-11-09 13:18:01 -08:00
|
|
|
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
2010-11-12 00:37:44 -08:00
|
|
|
spnl
|
2015-04-18 18:34:55 -07:00
|
|
|
rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
|
2011-11-09 13:18:01 -08:00
|
|
|
return $ if hasSpace
|
2015-04-18 18:34:55 -07:00
|
|
|
then (B.space <>) <$> rest
|
2011-11-09 13:18:01 -08:00
|
|
|
else rest
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
prefix :: MarkdownParser (F Inlines)
|
|
|
|
prefix = trimInlinesF . mconcat <$>
|
2010-11-18 13:22:20 -08:00
|
|
|
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
|
2010-11-18 12:38:45 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
citeList :: MarkdownParser (F [Citation])
|
|
|
|
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
|
2010-11-12 00:37:44 -08:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
citation :: MarkdownParser (F Citation)
|
2010-11-12 00:37:44 -08:00
|
|
|
citation = try $ do
|
|
|
|
pref <- prefix
|
2010-11-18 12:38:45 -08:00
|
|
|
(suppress_author, key) <- citeKey
|
2010-11-18 13:22:20 -08:00
|
|
|
suff <- suffix
|
2015-04-18 18:34:55 -07:00
|
|
|
return $ do
|
|
|
|
x <- pref
|
|
|
|
y <- suff
|
|
|
|
return $ Citation{ citationId = key
|
|
|
|
, citationPrefix = B.toList x
|
|
|
|
, citationSuffix = B.toList y
|
|
|
|
, citationMode = if suppress_author
|
|
|
|
then SuppressAuthor
|
|
|
|
else NormalCitation
|
|
|
|
, citationNoteNum = 0
|
|
|
|
, citationHash = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
smart :: MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
smart = do
|
|
|
|
getOption readerSmart >>= guard
|
|
|
|
doubleQuoted <|> singleQuoted <|>
|
2015-04-18 18:34:55 -07:00
|
|
|
choice (map (return <$>) [apostrophe, dash, ellipses])
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
|
2015-04-18 18:34:55 -07:00
|
|
|
singleQuoted :: MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
singleQuoted = try $ do
|
|
|
|
singleQuoteStart
|
|
|
|
withQuoteContext InSingleQuote $
|
2015-04-18 18:34:55 -07:00
|
|
|
fmap B.singleQuoted . trimInlinesF . mconcat <$>
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
many1Till inline singleQuoteEnd
|
|
|
|
|
2013-03-12 19:18:14 -07:00
|
|
|
-- doubleQuoted will handle regular double-quoted sections, as well
|
|
|
|
-- as dialogues with an open double-quote without a close double-quote
|
|
|
|
-- in the same paragraph.
|
2015-04-18 18:34:55 -07:00
|
|
|
doubleQuoted :: MarkdownParser (F Inlines)
|
Major rewrite of markdown reader.
* Use Builder's Inlines/Blocks instead of lists.
* Return values in the reader monad, which are then
run (at the end of parsing) against the final
parser state. This allows links, notes, and
example numbers to be resolved without a second
parser pass.
* An effect of using Builder is that everything is
normalized automatically.
* New exports from Text.Pandoc.Parsing:
widthsFromIndices, NoteTable', KeyTable', Key', toKey',
withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart,
doubleQuoteEnd, ellipses, apostrophe, dash
* Updated opendocument tests.
* Don't derive Show for ParserState.
* Benchmarks: markdown reader takes 82% of the time it took before.
Markdown writer takes 92% of the time (here the speedup is probably
due to the fact that everything is normalized by default).
2012-07-27 21:04:02 -07:00
|
|
|
doubleQuoted = try $ do
|
|
|
|
doubleQuoteStart
|
2013-03-12 19:18:14 -07:00
|
|
|
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
|
2015-04-18 18:34:55 -07:00
|
|
|
(withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
|
|
|
|
(fmap B.doubleQuoted . trimInlinesF $ contents))
|
|
|
|
<|> (return $ return (B.str "\8220") <> contents)
|