Textile reader: modified str to handle acronyms, hyphens.
* A single hyphen between two word characters is no longer a potential strikeout-starter. * Acronym explanations are dropped.
This commit is contained in:
parent
55e43c4991
commit
36d4aa4a09
1 changed files with 16 additions and 3 deletions
|
@ -70,7 +70,8 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
|
||||||
rawHtmlBlock, rawHtmlInline )
|
rawHtmlBlock, rawHtmlInline )
|
||||||
import Text.Pandoc.Readers.Markdown (smartPunctuation)
|
import Text.Pandoc.Readers.Markdown (smartPunctuation)
|
||||||
import Text.ParserCombinators.Parsec
|
import Text.ParserCombinators.Parsec
|
||||||
import Data.Char ( digitToInt )
|
import Data.Char ( digitToInt, isLetter )
|
||||||
|
import Control.Monad ( guard )
|
||||||
|
|
||||||
-- | Parse a Textile text and return a Pandoc document.
|
-- | Parse a Textile text and return a Pandoc document.
|
||||||
readTextile :: ParserState -- ^ Parser state, including options for parser
|
readTextile :: ParserState -- ^ Parser state, including options for parser
|
||||||
|
@ -312,7 +313,19 @@ inlineParsers = [ autoLink
|
||||||
|
|
||||||
-- | Any string
|
-- | Any string
|
||||||
str :: GenParser Char ParserState Inline
|
str :: GenParser Char ParserState Inline
|
||||||
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
|
str = do
|
||||||
|
xs <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||||
|
optional $ charsInBalanced '(' ')' -- drop acronym explanation
|
||||||
|
-- e.g. PBS(Public Broadcasting Service)
|
||||||
|
-- parse a following hyphen if followed by a letter
|
||||||
|
-- (this prevents unwanted interpretation as starting a strikeout section)
|
||||||
|
result <- option xs $ try $ do
|
||||||
|
guard $ not . null $ xs
|
||||||
|
char '-'
|
||||||
|
next <- lookAhead letter
|
||||||
|
guard $ isLetter (last xs) || isLetter next
|
||||||
|
return $ xs ++ "-"
|
||||||
|
return $ Str result
|
||||||
|
|
||||||
-- | Textile allows HTML span infos, we discard them
|
-- | Textile allows HTML span infos, we discard them
|
||||||
htmlSpan :: GenParser Char ParserState Inline
|
htmlSpan :: GenParser Char ParserState Inline
|
||||||
|
|
Loading…
Reference in a new issue