Refactored markdown reader so that Text.Regex is not used.
Replaced email regex test with a custom email autolink parser (autoLinkEmail). Also replaced 'selfClosingTag' with a custom function 'isSelfClosingTag'. git-svn-id: https://pandoc.googlecode.com/svn/trunk@511 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
8f0cfe9bd0
commit
890fbe97ec
1 changed files with 19 additions and 14 deletions
|
@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
|
||||||
readMarkdown
|
readMarkdown
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.List ( findIndex, sortBy, transpose )
|
import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect )
|
||||||
import Data.Char ( isAlphaNum )
|
import Data.Char ( isAlphaNum )
|
||||||
import Text.ParserCombinators.Pandoc
|
import Text.ParserCombinators.Pandoc
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
|
@ -43,7 +43,6 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
|
||||||
htmlEndTag, extractTagType,
|
htmlEndTag, extractTagType,
|
||||||
htmlBlockElement )
|
htmlBlockElement )
|
||||||
import Text.Pandoc.Entities ( decodeEntities )
|
import Text.Pandoc.Entities ( decodeEntities )
|
||||||
import Text.Regex ( matchRegex, mkRegex )
|
|
||||||
import Text.ParserCombinators.Parsec
|
import Text.ParserCombinators.Parsec
|
||||||
|
|
||||||
-- | Read markdown from an input string and return a Pandoc document.
|
-- | Read markdown from an input string and return a Pandoc document.
|
||||||
|
@ -474,14 +473,13 @@ htmlBlock = do
|
||||||
else rawHtmlBlocks
|
else rawHtmlBlocks
|
||||||
|
|
||||||
-- True if tag is self-closing
|
-- True if tag is self-closing
|
||||||
selfClosing tag = case (matchRegex (mkRegex "\\/[[:space:]]*>$") tag) of
|
isSelfClosing tag =
|
||||||
Just _ -> True
|
isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
|
||||||
Nothing -> False
|
|
||||||
|
|
||||||
strictHtmlBlock = try (do
|
strictHtmlBlock = try (do
|
||||||
tag <- anyHtmlBlockTag
|
tag <- anyHtmlBlockTag
|
||||||
let tag' = extractTagType tag
|
let tag' = extractTagType tag
|
||||||
if selfClosing tag || tag' == "hr"
|
if isSelfClosing tag || tag' == "hr"
|
||||||
then return tag
|
then return tag
|
||||||
else do
|
else do
|
||||||
contents <- many (do{notFollowedBy' (htmlEndTag tag');
|
contents <- many (do{notFollowedBy' (htmlEndTag tag');
|
||||||
|
@ -930,17 +928,24 @@ referenceLinkSingle = try (do
|
||||||
else fail "no corresponding key"
|
else fail "no corresponding key"
|
||||||
return (Link label (Ref label)))
|
return (Link label (Ref label)))
|
||||||
|
|
||||||
-- a link <like.this.com>
|
autoLink = do
|
||||||
autoLink = try (do
|
|
||||||
notFollowedBy' (anyHtmlTag <|> anyHtmlEndTag)
|
notFollowedBy' (anyHtmlTag <|> anyHtmlEndTag)
|
||||||
|
autoLinkEmail <|> autoLinkRegular
|
||||||
|
|
||||||
|
-- a link <like@this.com>
|
||||||
|
autoLinkEmail = try $ do
|
||||||
|
char autoLinkStart
|
||||||
|
name <- many1Till (noneOf "/:<> \t\n") (char '@')
|
||||||
|
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
|
||||||
|
let src = name ++ "@" ++ (joinWithSep "." domain)
|
||||||
|
char autoLinkEnd
|
||||||
|
return $ Link [Str src] (Src ("mailto:" ++ src) "")
|
||||||
|
|
||||||
|
-- a link <like.this.com>
|
||||||
|
autoLinkRegular = try $ do
|
||||||
src <- between (char autoLinkStart) (char autoLinkEnd)
|
src <- between (char autoLinkStart) (char autoLinkEnd)
|
||||||
(many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
|
(many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
|
||||||
case (matchRegex emailAddress src) of
|
return $ Link [Str src] (Src src "")
|
||||||
Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
|
|
||||||
Nothing -> return (Link [Str src] (Src src "")))
|
|
||||||
|
|
||||||
emailAddress =
|
|
||||||
mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
|
|
||||||
|
|
||||||
image = try (do
|
image = try (do
|
||||||
char imageStart
|
char imageStart
|
||||||
|
|
Loading…
Reference in a new issue