Textile reader: implemented acronyms, (tm), (r), (c).

This commit is contained in:
John MacFarlane 2010-12-07 18:28:36 -08:00
parent 6ef8a363dc
commit f917b46500
3 changed files with 48 additions and 7 deletions

View file

@ -43,8 +43,6 @@ Left to be implemented:
- Pandoc Meta Information (title, author, date)
- footnotes
- dimension sign
- registered, trademark, and copyright symbols
- acronyms
- uppercase
- definition lists
- continued blocks (ex bq..)
@ -86,7 +84,7 @@ readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
-- | Special chars border strings parsing
specialChars :: [Char]
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%"
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()"
-- | Generate a Pandoc ADT from a textile document
parseTextile :: GenParser Char ParserState Pandoc
@ -312,6 +310,7 @@ inlines = manyTill inline newline
-- | Inline parsers tried in order
inlineParsers :: [GenParser Char ParserState Inline]
inlineParsers = [ autoLink
, mark
, str
, htmlSpan
-- , smartPunctuation -- from markdown reader
@ -332,16 +331,40 @@ inlineParsers = [ autoLink
, symbol
]
-- | Trademark, registered, copyright
mark :: GenParser Char st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: GenParser Char st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
tm :: GenParser Char st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
copy :: GenParser Char st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
-- | Any string
str :: GenParser Char ParserState Inline
str = do
xs <- many1 (noneOf (specialChars ++ "\t\n "))
optional $ charsInBalanced '(' ')' -- drop acronym explanation
-- e.g. PBS(Public Broadcasting Service)
optional $ try $ do
lookAhead (char '(')
notFollowedBy' mark
charsInBalanced '(' ')' -- drop acronym explanation
-- parse a following hyphen if followed by a letter
-- (this prevents unwanted interpretation as starting a strikeout section)
result <- option xs $ try $ do
guard $ not . null $ xs
char '-'
next <- lookAhead letter
guard $ isLetter (last xs) || isLetter next

View file

@ -125,4 +125,10 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
, BulletList
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ]
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
, Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"]
, Para [Str "PBS"]
, Para [Str "Hi",Str "\8482"]
, Para [Str "Hi",Space,Str "\8482"]
, Para [Str "\174",Space,Str "Hi",Str "\174"]
, Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] ]

View file

@ -178,3 +178,15 @@ Html blocks can be <div>inlined</div> as well.
* but this <strong> will produce inline html </strong>
Can you prove that 2 < 3 ?
h1. Acronyms and marks
PBS(Public Broadcasting System)
Hi(tm)
Hi (TM)
(r) Hi(r)
Hi(c)2008 (C) 2008