Textile reader: list and HTML block parsing improvements.
Closes #1513. Lists can now start without an intervening blank line. Also, html block-level tags that don't start a line are parsed as RawInline and don't interrupt paragraphs, as in RedCloth.
This commit is contained in:
parent
4a535211d8
commit
6fae136cbb
3 changed files with 26 additions and 22 deletions
|
@ -56,7 +56,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
|
|||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
|
||||
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
|
||||
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
|
||||
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
|
||||
import Text.HTML.TagSoup.Match
|
||||
|
@ -133,12 +133,9 @@ blockParsers = [ codeBlock
|
|||
, rawLaTeXBlock'
|
||||
, maybeExplicitBlock "table" table
|
||||
, maybeExplicitBlock "p" para
|
||||
, endBlock
|
||||
, mempty <$ blanklines
|
||||
]
|
||||
|
||||
endBlock :: Parser [Char] ParserState Blocks
|
||||
endBlock = string "\n\n" >> return mempty
|
||||
|
||||
-- | Any block in the order of definition of blockParsers
|
||||
block :: Parser [Char] ParserState Blocks
|
||||
block = do
|
||||
|
@ -193,7 +190,7 @@ header = try $ do
|
|||
attr <- attributes
|
||||
char '.'
|
||||
lookAhead whitespace
|
||||
name <- trimInlines . mconcat <$> manyTill inline blockBreak
|
||||
name <- trimInlines . mconcat <$> many inline
|
||||
attr' <- registerHeader attr name
|
||||
return $ B.headerWith attr' level name
|
||||
|
||||
|
@ -304,17 +301,12 @@ definitionListItem = try $ do
|
|||
ds <- parseFromString parseBlocks (s ++ "\n\n")
|
||||
return [ds]
|
||||
|
||||
-- | This terminates a block such as a paragraph. Because of raw html
|
||||
-- blocks support, we have to lookAhead for a rawHtmlBlock.
|
||||
blockBreak :: Parser [Char] ParserState ()
|
||||
blockBreak = try (newline >> blanklines >> return ()) <|>
|
||||
try (optional spaces >> lookAhead rawHtmlBlock >> return ())
|
||||
|
||||
-- raw content
|
||||
|
||||
-- | A raw Html Block, optionally followed by blanklines
|
||||
rawHtmlBlock :: Parser [Char] ParserState Blocks
|
||||
rawHtmlBlock = try $ do
|
||||
skipMany spaceChar
|
||||
(_,b) <- htmlTag isBlockTag
|
||||
optional blanklines
|
||||
return $ B.rawBlock "html" b
|
||||
|
@ -328,7 +320,7 @@ rawLaTeXBlock' = do
|
|||
|
||||
-- | In textile, paragraphs are separated by blank lines.
|
||||
para :: Parser [Char] ParserState Blocks
|
||||
para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
|
||||
para = B.para . trimInlines . mconcat <$> many1 inline
|
||||
|
||||
-- Tables
|
||||
|
||||
|
@ -505,11 +497,14 @@ whitespace = many1 spaceChar >> return B.space <?> "whitespace"
|
|||
-- | In Textile, an isolated endline character is a line break
|
||||
endline :: Parser [Char] ParserState Inlines
|
||||
endline = try $ do
|
||||
newline >> notFollowedBy blankline
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
notFollowedBy listStart
|
||||
notFollowedBy rawHtmlBlock
|
||||
return B.linebreak
|
||||
|
||||
rawHtmlInline :: Parser [Char] ParserState Inlines
|
||||
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
|
||||
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
|
||||
|
||||
-- | Raw LaTeX Inline
|
||||
rawLaTeXInline' :: Parser [Char] ParserState Inlines
|
||||
|
@ -561,7 +556,9 @@ escapedTag = B.str <$>
|
|||
|
||||
-- | Any special symbol defined in wordBoundaries
|
||||
symbol :: Parser [Char] ParserState Inlines
|
||||
symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
|
||||
symbol = B.str . singleton <$> (notFollowedBy newline *>
|
||||
notFollowedBy rawHtmlBlock *>
|
||||
oneOf wordBoundaries)
|
||||
|
||||
-- | Inline code
|
||||
code :: Parser [Char] ParserState Inlines
|
||||
|
|
|
@ -67,6 +67,11 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
,BulletList
|
||||
[[Plain [Str "one"]]
|
||||
,[Plain [Str "two",LineBreak,Str "->",Space,Str "and",Space,Str "more"]]]
|
||||
,Header 2 ("issue-1513",[],[]) [Str "Issue",Space,Str "#1513"]
|
||||
,Para [Str "List:"]
|
||||
,BulletList
|
||||
[[Plain [Str "one"]]
|
||||
,[Plain [Str "two"]]]
|
||||
,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"]
|
||||
,DefinitionList
|
||||
[([Str "coffee"],
|
||||
|
@ -145,13 +150,9 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
,RawBlock (Format "html") "<div class=\"foobar\">"
|
||||
,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
|
||||
,RawBlock (Format "html") "</div>"
|
||||
,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"]
|
||||
,RawBlock (Format "html") "<div>"
|
||||
,Para [Str "inlined"]
|
||||
,RawBlock (Format "html") "</div>"
|
||||
,Para [Str "as",Space,Str "well."]
|
||||
,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."]
|
||||
,BulletList
|
||||
[[Plain [Str "this",Space,Str "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "</div>"]]
|
||||
[[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]]
|
||||
,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]]
|
||||
,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
|
||||
,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"]
|
||||
|
|
|
@ -123,6 +123,12 @@ h2. Issue #1500
|
|||
* two
|
||||
-> and more
|
||||
|
||||
h2. Issue #1513
|
||||
|
||||
List:
|
||||
* one
|
||||
* two
|
||||
|
||||
h2. Definition List
|
||||
|
||||
- coffee := Hot and black
|
||||
|
|
Loading…
Add table
Reference in a new issue