Textile reader: implemented acronyms, (tm), (r), (c).
This commit is contained in:
parent
6ef8a363dc
commit
f917b46500
3 changed files with 48 additions and 7 deletions
|
@ -43,8 +43,6 @@ Left to be implemented:
|
||||||
- Pandoc Meta Information (title, author, date)
|
- Pandoc Meta Information (title, author, date)
|
||||||
- footnotes
|
- footnotes
|
||||||
- dimension sign
|
- dimension sign
|
||||||
- registered, trademark, and copyright symbols
|
|
||||||
- acronyms
|
|
||||||
- uppercase
|
- uppercase
|
||||||
- definition lists
|
- definition lists
|
||||||
- continued blocks (ex bq..)
|
- continued blocks (ex bq..)
|
||||||
|
@ -86,7 +84,7 @@ readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
|
||||||
|
|
||||||
-- | Special chars border strings parsing
|
-- | Special chars border strings parsing
|
||||||
specialChars :: [Char]
|
specialChars :: [Char]
|
||||||
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%"
|
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()"
|
||||||
|
|
||||||
-- | Generate a Pandoc ADT from a textile document
|
-- | Generate a Pandoc ADT from a textile document
|
||||||
parseTextile :: GenParser Char ParserState Pandoc
|
parseTextile :: GenParser Char ParserState Pandoc
|
||||||
|
@ -312,6 +310,7 @@ inlines = manyTill inline newline
|
||||||
-- | Inline parsers tried in order
|
-- | Inline parsers tried in order
|
||||||
inlineParsers :: [GenParser Char ParserState Inline]
|
inlineParsers :: [GenParser Char ParserState Inline]
|
||||||
inlineParsers = [ autoLink
|
inlineParsers = [ autoLink
|
||||||
|
, mark
|
||||||
, str
|
, str
|
||||||
, htmlSpan
|
, htmlSpan
|
||||||
-- , smartPunctuation -- from markdown reader
|
-- , smartPunctuation -- from markdown reader
|
||||||
|
@ -332,16 +331,40 @@ inlineParsers = [ autoLink
|
||||||
, symbol
|
, symbol
|
||||||
]
|
]
|
||||||
|
|
||||||
|
-- | Trademark, registered, copyright
|
||||||
|
mark :: GenParser Char st Inline
|
||||||
|
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
|
||||||
|
|
||||||
|
reg :: GenParser Char st Inline
|
||||||
|
reg = do
|
||||||
|
oneOf "Rr"
|
||||||
|
char ')'
|
||||||
|
return $ Str "\174"
|
||||||
|
|
||||||
|
tm :: GenParser Char st Inline
|
||||||
|
tm = do
|
||||||
|
oneOf "Tt"
|
||||||
|
oneOf "Mm"
|
||||||
|
char ')'
|
||||||
|
return $ Str "\8482"
|
||||||
|
|
||||||
|
copy :: GenParser Char st Inline
|
||||||
|
copy = do
|
||||||
|
oneOf "Cc"
|
||||||
|
char ')'
|
||||||
|
return $ Str "\169"
|
||||||
|
|
||||||
-- | Any string
|
-- | Any string
|
||||||
str :: GenParser Char ParserState Inline
|
str :: GenParser Char ParserState Inline
|
||||||
str = do
|
str = do
|
||||||
xs <- many1 (noneOf (specialChars ++ "\t\n "))
|
xs <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||||
optional $ charsInBalanced '(' ')' -- drop acronym explanation
|
optional $ try $ do
|
||||||
-- e.g. PBS(Public Broadcasting Service)
|
lookAhead (char '(')
|
||||||
|
notFollowedBy' mark
|
||||||
|
charsInBalanced '(' ')' -- drop acronym explanation
|
||||||
-- parse a following hyphen if followed by a letter
|
-- parse a following hyphen if followed by a letter
|
||||||
-- (this prevents unwanted interpretation as starting a strikeout section)
|
-- (this prevents unwanted interpretation as starting a strikeout section)
|
||||||
result <- option xs $ try $ do
|
result <- option xs $ try $ do
|
||||||
guard $ not . null $ xs
|
|
||||||
char '-'
|
char '-'
|
||||||
next <- lookAhead letter
|
next <- lookAhead letter
|
||||||
guard $ isLetter (last xs) || isLetter next
|
guard $ isLetter (last xs) || isLetter next
|
||||||
|
|
|
@ -125,4 +125,10 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
|
||||||
, BulletList
|
, BulletList
|
||||||
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
|
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
|
||||||
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
|
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
|
||||||
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ]
|
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
|
||||||
|
, Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"]
|
||||||
|
, Para [Str "PBS"]
|
||||||
|
, Para [Str "Hi",Str "\8482"]
|
||||||
|
, Para [Str "Hi",Space,Str "\8482"]
|
||||||
|
, Para [Str "\174",Space,Str "Hi",Str "\174"]
|
||||||
|
, Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] ]
|
||||||
|
|
|
@ -178,3 +178,15 @@ Html blocks can be <div>inlined</div> as well.
|
||||||
* but this <strong> will produce inline html </strong>
|
* but this <strong> will produce inline html </strong>
|
||||||
|
|
||||||
Can you prove that 2 < 3 ?
|
Can you prove that 2 < 3 ?
|
||||||
|
|
||||||
|
h1. Acronyms and marks
|
||||||
|
|
||||||
|
PBS(Public Broadcasting System)
|
||||||
|
|
||||||
|
Hi(tm)
|
||||||
|
|
||||||
|
Hi (TM)
|
||||||
|
|
||||||
|
(r) Hi(r)
|
||||||
|
|
||||||
|
Hi(c)2008 (C) 2008
|
||||||
|
|
Loading…
Add table
Reference in a new issue