diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 7044cbad4..e1d608eed 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -43,8 +43,6 @@ Left to be implemented: - Pandoc Meta Information (title, author, date) - footnotes - dimension sign - - registered, trademark, and copyright symbols - - acronyms - uppercase - definition lists - continued blocks (ex bq..) @@ -86,7 +84,7 @@ readTextile state s = (readWith parseTextile) state (s ++ "\n\n") -- | Special chars border strings parsing specialChars :: [Char] -specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%" +specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()" -- | Generate a Pandoc ADT from a textile document parseTextile :: GenParser Char ParserState Pandoc @@ -312,6 +310,7 @@ inlines = manyTill inline newline -- | Inline parsers tried in order inlineParsers :: [GenParser Char ParserState Inline] inlineParsers = [ autoLink + , mark , str , htmlSpan -- , smartPunctuation -- from markdown reader @@ -332,16 +331,40 @@ inlineParsers = [ autoLink , symbol ] +-- | Trademark, registered, copyright +mark :: GenParser Char st Inline +mark = try $ char '(' >> (try tm <|> try reg <|> copy) + +reg :: GenParser Char st Inline +reg = do + oneOf "Rr" + char ')' + return $ Str "\174" + +tm :: GenParser Char st Inline +tm = do + oneOf "Tt" + oneOf "Mm" + char ')' + return $ Str "\8482" + +copy :: GenParser Char st Inline +copy = do + oneOf "Cc" + char ')' + return $ Str "\169" + -- | Any string str :: GenParser Char ParserState Inline str = do xs <- many1 (noneOf (specialChars ++ "\t\n ")) - optional $ charsInBalanced '(' ')' -- drop acronym explanation - -- e.g. PBS(Public Broadcasting Service) + optional $ try $ do + lookAhead (char '(') + notFollowedBy' mark + charsInBalanced '(' ')' -- drop acronym explanation -- parse a following hyphen if followed by a letter -- (this prevents unwanted interpretation as starting a strikeout section) result <- option xs $ try $ do - guard $ not . null $ xs char '-' next <- lookAhead letter guard $ isLetter (last xs) || isLetter next diff --git a/tests/textile-reader.native b/tests/textile-reader.native index d657521e6..613c6ac6b 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -125,4 +125,10 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) , BulletList [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ] , [ Plain [Str "but",Space,Str "this",Space,HtmlInline "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline ""] ] ] -, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ] +, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] +, Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"] +, Para [Str "PBS"] +, Para [Str "Hi",Str "\8482"] +, Para [Str "Hi",Space,Str "\8482"] +, Para [Str "\174",Space,Str "Hi",Str "\174"] +, Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] ] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile index 0b65e11bb..ed3b02bfe 100644 --- a/tests/textile-reader.textile +++ b/tests/textile-reader.textile @@ -178,3 +178,15 @@ Html blocks can be