Textile reader: implemented acronyms, (tm), (r), (c).
This commit is contained in:
parent
6ef8a363dc
commit
f917b46500
3 changed files with 48 additions and 7 deletions
|
@ -43,8 +43,6 @@ Left to be implemented:
|
|||
- Pandoc Meta Information (title, author, date)
|
||||
- footnotes
|
||||
- dimension sign
|
||||
- registered, trademark, and copyright symbols
|
||||
- acronyms
|
||||
- uppercase
|
||||
- definition lists
|
||||
- continued blocks (ex bq..)
|
||||
|
@ -86,7 +84,7 @@ readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
|
|||
|
||||
-- | Special chars border strings parsing
|
||||
specialChars :: [Char]
|
||||
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%"
|
||||
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()"
|
||||
|
||||
-- | Generate a Pandoc ADT from a textile document
|
||||
parseTextile :: GenParser Char ParserState Pandoc
|
||||
|
@ -312,6 +310,7 @@ inlines = manyTill inline newline
|
|||
-- | Inline parsers tried in order
|
||||
inlineParsers :: [GenParser Char ParserState Inline]
|
||||
inlineParsers = [ autoLink
|
||||
, mark
|
||||
, str
|
||||
, htmlSpan
|
||||
-- , smartPunctuation -- from markdown reader
|
||||
|
@ -332,16 +331,40 @@ inlineParsers = [ autoLink
|
|||
, symbol
|
||||
]
|
||||
|
||||
-- | Trademark, registered, copyright
|
||||
mark :: GenParser Char st Inline
|
||||
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
|
||||
|
||||
reg :: GenParser Char st Inline
|
||||
reg = do
|
||||
oneOf "Rr"
|
||||
char ')'
|
||||
return $ Str "\174"
|
||||
|
||||
tm :: GenParser Char st Inline
|
||||
tm = do
|
||||
oneOf "Tt"
|
||||
oneOf "Mm"
|
||||
char ')'
|
||||
return $ Str "\8482"
|
||||
|
||||
copy :: GenParser Char st Inline
|
||||
copy = do
|
||||
oneOf "Cc"
|
||||
char ')'
|
||||
return $ Str "\169"
|
||||
|
||||
-- | Any string
|
||||
str :: GenParser Char ParserState Inline
|
||||
str = do
|
||||
xs <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||
optional $ charsInBalanced '(' ')' -- drop acronym explanation
|
||||
-- e.g. PBS(Public Broadcasting Service)
|
||||
optional $ try $ do
|
||||
lookAhead (char '(')
|
||||
notFollowedBy' mark
|
||||
charsInBalanced '(' ')' -- drop acronym explanation
|
||||
-- parse a following hyphen if followed by a letter
|
||||
-- (this prevents unwanted interpretation as starting a strikeout section)
|
||||
result <- option xs $ try $ do
|
||||
guard $ not . null $ xs
|
||||
char '-'
|
||||
next <- lookAhead letter
|
||||
guard $ isLetter (last xs) || isLetter next
|
||||
|
|
|
@ -125,4 +125,10 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
|
|||
, BulletList
|
||||
[ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
|
||||
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
|
||||
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ]
|
||||
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
|
||||
, Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"]
|
||||
, Para [Str "PBS"]
|
||||
, Para [Str "Hi",Str "\8482"]
|
||||
, Para [Str "Hi",Space,Str "\8482"]
|
||||
, Para [Str "\174",Space,Str "Hi",Str "\174"]
|
||||
, Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] ]
|
||||
|
|
|
@ -178,3 +178,15 @@ Html blocks can be <div>inlined</div> as well.
|
|||
* but this <strong> will produce inline html </strong>
|
||||
|
||||
Can you prove that 2 < 3 ?
|
||||
|
||||
h1. Acronyms and marks
|
||||
|
||||
PBS(Public Broadcasting System)
|
||||
|
||||
Hi(tm)
|
||||
|
||||
Hi (TM)
|
||||
|
||||
(r) Hi(r)
|
||||
|
||||
Hi(c)2008 (C) 2008
|
||||
|
|
Loading…
Reference in a new issue