html inlines and html blocks handling in textile reader
This commit is contained in:
parent
fa0866886b
commit
d724c6b568
3 changed files with 56 additions and 18 deletions
|
@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
Conversion from Textile to 'Pandoc' document, based on the spec
|
Conversion from Textile to 'Pandoc' document, based on the spec
|
||||||
available at ahttp://redcloth.org/hobix.com/textile/
|
available at ahttp://redcloth.org/hobix.com/textile/
|
||||||
|
|
||||||
Implemented and parsed :
|
Implemented and parsed:
|
||||||
- Paragraphs
|
- Paragraphs
|
||||||
- Code blocks
|
- Code blocks
|
||||||
- Lists
|
- Lists
|
||||||
|
@ -36,8 +36,14 @@ Implemented and parsed :
|
||||||
- Inlines : strong, emph, cite, code, deleted, superscript,
|
- Inlines : strong, emph, cite, code, deleted, superscript,
|
||||||
subscript, links
|
subscript, links
|
||||||
|
|
||||||
Implemented but discarded :
|
Implemented but discarded:
|
||||||
- HTML-specific and CSS-specific attributes
|
- HTML-specific and CSS-specific attributes
|
||||||
|
|
||||||
|
Left to be implemented:
|
||||||
|
- Pandoc Meta Information (title, author, date)
|
||||||
|
- footnotes
|
||||||
|
- should autolink be shared through Parsing.hs ?
|
||||||
|
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +53,8 @@ module Text.Pandoc.Readers.Textile (
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Shared
|
import Text.Pandoc.Shared
|
||||||
import Text.Pandoc.Parsing
|
import Text.Pandoc.Parsing
|
||||||
import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag )
|
import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
|
||||||
|
rawHtmlBlock, rawHtmlInline )
|
||||||
import Text.ParserCombinators.Parsec
|
import Text.ParserCombinators.Parsec
|
||||||
import Data.Char ( digitToInt )
|
import Data.Char ( digitToInt )
|
||||||
|
|
||||||
|
@ -83,6 +90,7 @@ blockParsers = [ codeBlock
|
||||||
, header
|
, header
|
||||||
, blockQuote
|
, blockQuote
|
||||||
, anyList
|
, anyList
|
||||||
|
, rawHtmlBlock'
|
||||||
, maybeExplicitBlock "table" table
|
, maybeExplicitBlock "table" table
|
||||||
, maybeExplicitBlock "p" para
|
, maybeExplicitBlock "p" para
|
||||||
, nullBlock ]
|
, nullBlock ]
|
||||||
|
@ -170,17 +178,26 @@ orderedListItemAtDepth depth = try $ do
|
||||||
sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
|
sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
|
||||||
return (p:sublist)
|
return (p:sublist)
|
||||||
|
|
||||||
-- | This terminates a block such as a paragraph.
|
-- | This terminates a block such as a paragraph. Because of raw html
|
||||||
|
-- blocks support, we have to lookAhead for a rawHtmlBlock.
|
||||||
blockBreak :: GenParser Char ParserState ()
|
blockBreak :: GenParser Char ParserState ()
|
||||||
blockBreak = try $ newline >> blanklines >> return ()
|
blockBreak = try $ choice
|
||||||
|
[newline >> blanklines >> return (),
|
||||||
|
lookAhead rawHtmlBlock' >> return ()]
|
||||||
|
|
||||||
|
-- | A raw Html Block, optionally followed by blanklines
|
||||||
|
rawHtmlBlock' :: GenParser Char ParserState Block
|
||||||
|
rawHtmlBlock' = try $ do
|
||||||
|
b <- rawHtmlBlock
|
||||||
|
optional blanklines
|
||||||
|
return b
|
||||||
|
|
||||||
-- | In textile, paragraphs are separated by blank lines.
|
-- | In textile, paragraphs are separated by blank lines.
|
||||||
para :: GenParser Char ParserState Block
|
para :: GenParser Char ParserState Block
|
||||||
para = try $ do
|
para = try $ do
|
||||||
content <- manyTill inline blockBreak
|
content <- manyTill inline blockBreak
|
||||||
return $ Para $ normalizeSpaces content
|
return $ Para $ normalizeSpaces content
|
||||||
|
|
||||||
|
|
||||||
-- Tables
|
-- Tables
|
||||||
|
|
||||||
|
@ -260,6 +277,7 @@ inlineParsers = [ autoLink
|
||||||
, str
|
, str
|
||||||
, whitespace
|
, whitespace
|
||||||
, endline
|
, endline
|
||||||
|
, rawHtmlInline
|
||||||
, code
|
, code
|
||||||
, simpleInline (string "??") (Cite [])
|
, simpleInline (string "??") (Cite [])
|
||||||
, simpleInline (string "**") Strong
|
, simpleInline (string "**") Strong
|
||||||
|
@ -313,7 +331,7 @@ image = try $ do
|
||||||
|
|
||||||
-- | Any special symbol defined in specialChars
|
-- | Any special symbol defined in specialChars
|
||||||
symbol :: GenParser Char ParserState Inline
|
symbol :: GenParser Char ParserState Inline
|
||||||
symbol = do
|
symbol = do
|
||||||
result <- oneOf specialChars
|
result <- oneOf specialChars
|
||||||
return $ Str [result]
|
return $ Str [result]
|
||||||
|
|
||||||
|
@ -322,7 +340,6 @@ code :: GenParser Char ParserState Inline
|
||||||
code = surrounded (char '@') anyChar >>=
|
code = surrounded (char '@') anyChar >>=
|
||||||
return . Code
|
return . Code
|
||||||
|
|
||||||
|
|
||||||
-- | Html / CSS attributes
|
-- | Html / CSS attributes
|
||||||
attributes :: GenParser Char ParserState String
|
attributes :: GenParser Char ParserState String
|
||||||
attributes = choice [ enclosed (char '(') (char ')') anyChar,
|
attributes = choice [ enclosed (char '(') (char ')') anyChar,
|
||||||
|
@ -341,12 +358,4 @@ simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
|
||||||
-> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
|
-> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
|
||||||
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
|
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
|
||||||
return . construct . normalizeSpaces
|
return . construct . normalizeSpaces
|
||||||
where inlineWithAttribute = (try $ optional attributes) >> inline
|
where inlineWithAttribute = (try $ optional attributes) >> inline
|
||||||
|
|
||||||
|
|
||||||
-- TODO
|
|
||||||
--
|
|
||||||
-- - Pandoc Meta Information (title, author, date)
|
|
||||||
-- - footnotes
|
|
||||||
-- - should autolink be shared through Parsing.hs ?
|
|
||||||
-- - embeded HTML, both inlines and blocks
|
|
|
@ -113,4 +113,18 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
|
||||||
, [ Plain [Str "sex"] ] ],
|
, [ Plain [Str "sex"] ] ],
|
||||||
[ [ Plain [Str "joan"] ]
|
[ [ Plain [Str "joan"] ]
|
||||||
, [ Plain [Str "24"] ]
|
, [ Plain [Str "24"] ]
|
||||||
, [ Plain [Str "f"] ] ] ] ]
|
, [ Plain [Str "f"] ] ] ]
|
||||||
|
, Header 1 [Str "Raw",Space,Str "HTML"]
|
||||||
|
, Para [Str "However,",Space,HtmlInline "<strong>",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,HtmlInline "</strong>",Space,Str "are",Space,Str "accepted,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"]
|
||||||
|
, RawHtml "<div class=\"foobar\">"
|
||||||
|
, Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
|
||||||
|
, RawHtml "</div>"
|
||||||
|
, Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"]
|
||||||
|
, RawHtml "<div>"
|
||||||
|
, Para [Str "inlined"]
|
||||||
|
, RawHtml "</div>"
|
||||||
|
, Para [Str "as",Space,Str "well."]
|
||||||
|
, BulletList
|
||||||
|
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
|
||||||
|
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
|
||||||
|
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ]
|
||||||
|
|
|
@ -161,3 +161,18 @@ p{color:green}. and paragraph attributes, and table attributes.
|
||||||
table{foo:bar}.
|
table{foo:bar}.
|
||||||
| name | age | sex |
|
| name | age | sex |
|
||||||
| joan | 24 | f |
|
| joan | 24 | f |
|
||||||
|
|
||||||
|
h1. Raw HTML
|
||||||
|
|
||||||
|
However, <strong> raw HTML inlines </strong> are accepted, as well as :
|
||||||
|
|
||||||
|
<div class="foobar">
|
||||||
|
any *Raw HTML Block* with bold
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Html blocks can be <div>inlined</div> as well.
|
||||||
|
|
||||||
|
* this <div> won't produce raw html blocks </div>
|
||||||
|
* but this <strong> will produce inline html </strong>
|
||||||
|
|
||||||
|
Can you prove that 2 < 3 ?
|
||||||
|
|
Loading…
Reference in a new issue