Textile reader: modified str to handle acronyms, hyphens.
* A single hyphen between two word characters is no longer a potential strikeout-starter. * Acronym explanations are dropped.
This commit is contained in:
parent
55e43c4991
commit
36d4aa4a09
1 changed files with 16 additions and 3 deletions
|
@ -70,7 +70,8 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
|
||||||
rawHtmlBlock, rawHtmlInline )
|
rawHtmlBlock, rawHtmlInline )
|
||||||
import Text.Pandoc.Readers.Markdown (smartPunctuation)
|
import Text.Pandoc.Readers.Markdown (smartPunctuation)
|
||||||
import Text.ParserCombinators.Parsec
|
import Text.ParserCombinators.Parsec
|
||||||
import Data.Char ( digitToInt )
|
import Data.Char ( digitToInt, isLetter )
|
||||||
|
import Control.Monad ( guard )
|
||||||
|
|
||||||
-- | Parse a Textile text and return a Pandoc document.
|
-- | Parse a Textile text and return a Pandoc document.
|
||||||
readTextile :: ParserState -- ^ Parser state, including options for parser
|
readTextile :: ParserState -- ^ Parser state, including options for parser
|
||||||
|
@ -312,7 +313,19 @@ inlineParsers = [ autoLink
|
||||||
|
|
||||||
-- | Any string
|
-- | Any string
|
||||||
str :: GenParser Char ParserState Inline
|
str :: GenParser Char ParserState Inline
|
||||||
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
|
str = do
|
||||||
|
xs <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||||
|
optional $ charsInBalanced '(' ')' -- drop acronym explanation
|
||||||
|
-- e.g. PBS(Public Broadcasting Service)
|
||||||
|
-- parse a following hyphen if followed by a letter
|
||||||
|
-- (this prevents unwanted interpretation as starting a strikeout section)
|
||||||
|
result <- option xs $ try $ do
|
||||||
|
guard $ not . null $ xs
|
||||||
|
char '-'
|
||||||
|
next <- lookAhead letter
|
||||||
|
guard $ isLetter (last xs) || isLetter next
|
||||||
|
return $ xs ++ "-"
|
||||||
|
return $ Str result
|
||||||
|
|
||||||
-- | Textile allows HTML span infos, we discard them
|
-- | Textile allows HTML span infos, we discard them
|
||||||
htmlSpan :: GenParser Char ParserState Inline
|
htmlSpan :: GenParser Char ParserState Inline
|
||||||
|
@ -384,4 +397,4 @@ simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
|
||||||
-> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
|
-> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
|
||||||
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
|
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
|
||||||
return . construct . normalizeSpaces
|
return . construct . normalizeSpaces
|
||||||
where inlineWithAttribute = (try $ optional attributes) >> inline
|
where inlineWithAttribute = (try $ optional attributes) >> inline
|
||||||
|
|
Loading…
Add table
Reference in a new issue