Textile reader: modified str to handle acronyms, hyphens.

* A single hyphen between two word characters is no longer a
  potential strikeout-starter.
* Acronym explanations are dropped.
This commit is contained in:
John MacFarlane 2010-12-03 22:30:08 -08:00
parent 55e43c4991
commit 36d4aa4a09

View file

@ -70,7 +70,8 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
rawHtmlBlock, rawHtmlInline )
import Text.Pandoc.Readers.Markdown (smartPunctuation)
import Text.ParserCombinators.Parsec
import Data.Char ( digitToInt )
import Data.Char ( digitToInt, isLetter )
import Control.Monad ( guard )
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ParserState -- ^ Parser state, including options for parser
@ -312,7 +313,19 @@ inlineParsers = [ autoLink
-- | Any string
str :: GenParser Char ParserState Inline
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
str = do
xs <- many1 (noneOf (specialChars ++ "\t\n "))
optional $ charsInBalanced '(' ')' -- drop acronym explanation
-- e.g. PBS(Public Broadcasting Service)
-- parse a following hyphen if followed by a letter
-- (this prevents unwanted interpretation as starting a strikeout section)
result <- option xs $ try $ do
guard $ not . null $ xs
char '-'
next <- lookAhead letter
guard $ isLetter (last xs) || isLetter next
return $ xs ++ "-"
return $ Str result
-- | Textile allows HTML span infos, we discard them
htmlSpan :: GenParser Char ParserState Inline
@ -384,4 +397,4 @@ simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
-> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline
where inlineWithAttribute = (try $ optional attributes) >> inline