html inlines and html blocks handling in textile reader

This commit is contained in:
Paul Rivier 2010-11-23 12:09:04 +01:00 committed by John MacFarlane
parent fa0866886b
commit d724c6b568
3 changed files with 56 additions and 18 deletions

View file

@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from Textile to 'Pandoc' document, based on the spec Conversion from Textile to 'Pandoc' document, based on the spec
available at ahttp://redcloth.org/hobix.com/textile/ available at ahttp://redcloth.org/hobix.com/textile/
Implemented and parsed : Implemented and parsed:
- Paragraphs - Paragraphs
- Code blocks - Code blocks
- Lists - Lists
@ -36,8 +36,14 @@ Implemented and parsed :
- Inlines : strong, emph, cite, code, deleted, superscript, - Inlines : strong, emph, cite, code, deleted, superscript,
subscript, links subscript, links
Implemented but discarded : Implemented but discarded:
- HTML-specific and CSS-specific attributes - HTML-specific and CSS-specific attributes
Left to be implemented:
- Pandoc Meta Information (title, author, date)
- footnotes
- should autolink be shared through Parsing.hs ?
-} -}
@ -47,7 +53,8 @@ module Text.Pandoc.Readers.Textile (
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Shared import Text.Pandoc.Shared
import Text.Pandoc.Parsing import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
rawHtmlBlock, rawHtmlInline )
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec
import Data.Char ( digitToInt ) import Data.Char ( digitToInt )
@ -83,6 +90,7 @@ blockParsers = [ codeBlock
, header , header
, blockQuote , blockQuote
, anyList , anyList
, rawHtmlBlock'
, maybeExplicitBlock "table" table , maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para , maybeExplicitBlock "p" para
, nullBlock ] , nullBlock ]
@ -170,10 +178,19 @@ orderedListItemAtDepth depth = try $ do
sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[])) sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
return (p:sublist) return (p:sublist)
-- | This terminates a block such as a paragraph. -- | This terminates a block such as a paragraph. Because of raw html
-- blocks support, we have to lookAhead for a rawHtmlBlock.
blockBreak :: GenParser Char ParserState () blockBreak :: GenParser Char ParserState ()
blockBreak = try $ newline >> blanklines >> return () blockBreak = try $ choice
[newline >> blanklines >> return (),
lookAhead rawHtmlBlock' >> return ()]
-- | A raw Html Block, optionally followed by blanklines
rawHtmlBlock' :: GenParser Char ParserState Block
rawHtmlBlock' = try $ do
b <- rawHtmlBlock
optional blanklines
return b
-- | In textile, paragraphs are separated by blank lines. -- | In textile, paragraphs are separated by blank lines.
para :: GenParser Char ParserState Block para :: GenParser Char ParserState Block
@ -260,6 +277,7 @@ inlineParsers = [ autoLink
, str , str
, whitespace , whitespace
, endline , endline
, rawHtmlInline
, code , code
, simpleInline (string "??") (Cite []) , simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong , simpleInline (string "**") Strong
@ -322,7 +340,6 @@ code :: GenParser Char ParserState Inline
code = surrounded (char '@') anyChar >>= code = surrounded (char '@') anyChar >>=
return . Code return . Code
-- | Html / CSS attributes -- | Html / CSS attributes
attributes :: GenParser Char ParserState String attributes :: GenParser Char ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar, attributes = choice [ enclosed (char '(') (char ')') anyChar,
@ -342,11 +359,3 @@ simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
simpleInline border construct = surrounded border (inlineWithAttribute) >>= simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline where inlineWithAttribute = (try $ optional attributes) >> inline
-- TODO
--
-- - Pandoc Meta Information (title, author, date)
-- - footnotes
-- - should autolink be shared through Parsing.hs ?
-- - embeded HTML, both inlines and blocks

View file

@ -113,4 +113,18 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
, [ Plain [Str "sex"] ] ], , [ Plain [Str "sex"] ] ],
[ [ Plain [Str "joan"] ] [ [ Plain [Str "joan"] ]
, [ Plain [Str "24"] ] , [ Plain [Str "24"] ]
, [ Plain [Str "f"] ] ] ] ] , [ Plain [Str "f"] ] ] ]
, Header 1 [Str "Raw",Space,Str "HTML"]
, Para [Str "However,",Space,HtmlInline "<strong>",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,HtmlInline "</strong>",Space,Str "are",Space,Str "accepted,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"]
, RawHtml "<div class=\"foobar\">"
, Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
, RawHtml "</div>"
, Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"]
, RawHtml "<div>"
, Para [Str "inlined"]
, RawHtml "</div>"
, Para [Str "as",Space,Str "well."]
, BulletList
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ]

View file

@ -161,3 +161,18 @@ p{color:green}. and paragraph attributes, and table attributes.
table{foo:bar}. table{foo:bar}.
| name | age | sex | | name | age | sex |
| joan | 24 | f | | joan | 24 | f |
h1. Raw HTML
However, <strong> raw HTML inlines </strong> are accepted, as well as :
<div class="foobar">
any *Raw HTML Block* with bold
</div>
Html blocks can be <div>inlined</div> as well.
* this <div> won't produce raw html blocks </div>
* but this <strong> will produce inline html </strong>
Can you prove that 2 < 3 ?