Changes in entity handling:
+ Entities are parsed (and unicode characters returned) in both Markdown and HTML readers. + Parsers characterEntity, namedEntity, decimalEntity, hexEntity added to Entities.hs; these parse a string and return a unicode character. + Changed 'entity' parser in HTML reader to use the 'characterEntity' parser from Entities.hs. + Added new 'entity' parser to Markdown reader, and added '&' as a special character. Adjusted test suite accordingly since now we get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T".. + stringToSGML moved to Entities.hs. escapeSGML removed as redundant, given encodeEntities. + stringToSGML, encodeEntities, and specialCharToEntity are given a boolean parameter that causes only numerical entities to be used. This is used in the docbook writer. The HTML writer uses named entities where possible, but not all docbook-consumers know about the named entities without special instructions, so it seems safer to use numerical entities there. + decodeEntities is rewritten in a way that avoids Text.Regex, using the new parsers. + charToEntity and charToNumericalEntity added to Entities.hs. + Moved specialCharToEntity from Shared.hs to Entities.hs. + Removed unneeded 'decodeEntities' from 'str' parser in HTML and Markdown readers. + Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and sgmlCharacterEntity from Shared.hs. + Modified Docbook writer so that it doesn't rely on Text.Regex for detecting "mailto" links. git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
f2de08864e
commit
d06417125d
10 changed files with 171 additions and 179 deletions
|
@ -29,62 +29,109 @@ Functions for encoding unicode characters as entity references,
|
|||
and vice versa.
|
||||
-}
|
||||
module Text.Pandoc.Entities (
|
||||
entityToChar,
|
||||
charToEntity,
|
||||
decodeEntities,
|
||||
charToNumericalEntity,
|
||||
specialCharToEntity,
|
||||
encodeEntities,
|
||||
decodeEntities,
|
||||
stringToSGML,
|
||||
characterEntity
|
||||
) where
|
||||
import Data.Char ( chr, ord )
|
||||
import Text.Regex ( mkRegex, matchRegexAll, Regex )
|
||||
import Maybe ( fromMaybe )
|
||||
|
||||
-- | Regular expression for numerical coded entity.
|
||||
numericalEntity :: Text.Regex.Regex
|
||||
numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);"
|
||||
|
||||
-- | Regular expression for character entity.
|
||||
characterEntity :: Text.Regex.Regex
|
||||
characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;"
|
||||
|
||||
-- | Return a string with all entity references decoded to unicode characters
|
||||
-- where possible.
|
||||
decodeEntities :: String -> String
|
||||
decodeEntities str =
|
||||
case (matchRegexAll characterEntity str) of
|
||||
Nothing -> str
|
||||
Just (before, match, rest, _) -> before ++ replacement ++
|
||||
(decodeEntities rest)
|
||||
where replacement = case (entityToChar match) of
|
||||
Just ch -> [ch]
|
||||
Nothing -> match
|
||||
|
||||
-- | Returns a string with characters replaced with entity references where
|
||||
-- possible.
|
||||
encodeEntities :: String -> String
|
||||
encodeEntities [] = []
|
||||
encodeEntities (c:cs) = if ord c < 128
|
||||
then c:(encodeEntities cs)
|
||||
else (charToEntity c) ++ (encodeEntities cs)
|
||||
|
||||
-- | If the string is a valid entity reference, returns @Just@ the character,
|
||||
-- otherwise @Nothing@.
|
||||
entityToChar :: String -> Maybe Char
|
||||
entityToChar entity =
|
||||
case (lookup entity entityTable) of
|
||||
Just ch -> Just ch
|
||||
Nothing -> case (matchRegexAll numericalEntity entity) of
|
||||
Just (_, _, _, [sub]) -> Just (chr (read ('0':sub)))
|
||||
Nothing -> Nothing
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Data.Maybe ( fromMaybe )
|
||||
|
||||
-- | Returns a string containing an entity reference for the character.
|
||||
charToEntity :: Char -> String
|
||||
charToEntity char =
|
||||
let matches = filter (\(entity, character) -> (character == char))
|
||||
entityTable in
|
||||
if (length matches) == 0
|
||||
then "&#" ++ show (ord char) ++ ";"
|
||||
else fst (head matches)
|
||||
let matches = filter (\(entity, character) -> (character == char))
|
||||
entityTable in
|
||||
if (length matches) == 0
|
||||
then charToNumericalEntity char
|
||||
else fst (head matches)
|
||||
|
||||
-- | Returns a string containing a numerical entity reference for the char.
|
||||
charToNumericalEntity :: Char -> String
|
||||
charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
|
||||
|
||||
-- | Escape special character to SGML entity.
|
||||
specialCharToEntity :: Bool -- ^ Use numerical entities only.
|
||||
-> Char -- ^ Character to convert.
|
||||
-> [Char]
|
||||
specialCharToEntity numericalEntities c =
|
||||
if (c `elem` "&<>\"") || (ord c > 127)
|
||||
then if numericalEntities
|
||||
then charToNumericalEntity c
|
||||
else charToEntity c
|
||||
else [c]
|
||||
|
||||
-- | Parse SGML character entity.
|
||||
characterEntity :: GenParser Char st Char
|
||||
characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "SGML entity"
|
||||
|
||||
-- | Parse SGML character entity.
|
||||
namedEntity :: GenParser Char st Char
|
||||
namedEntity = try $ do
|
||||
st <- char '&'
|
||||
body <- many1 alphaNum
|
||||
end <- char ';'
|
||||
let entity = "&" ++ body ++ ";"
|
||||
return $ case (lookup entity entityTable) of
|
||||
Just ch -> ch
|
||||
Nothing -> '?'
|
||||
|
||||
-- | Parse SGML hexadecimal entity.
|
||||
hexEntity :: GenParser Char st Char
|
||||
hexEntity = try $ do
|
||||
st <- string "&#"
|
||||
hex <- oneOf "Xx"
|
||||
body <- many1 (oneOf "0123456789ABCDEFabcdef")
|
||||
end <- char ';'
|
||||
return $ chr $ read ('0':'x':body)
|
||||
|
||||
-- | Parse SGML decimal entity.
|
||||
decimalEntity :: GenParser Char st Char
|
||||
decimalEntity = try $ do
|
||||
st <- string "&#"
|
||||
body <- many1 digit
|
||||
end <- char ';'
|
||||
return $ chr $ read body
|
||||
|
||||
-- | Escape string as needed for SGML. Entity references are not preserved.
|
||||
encodeEntities :: Bool -- ^ Use only numerical entities.
|
||||
-> String -- ^ String to convert.
|
||||
-> String
|
||||
encodeEntities numericalEntities =
|
||||
concatMap (specialCharToEntity numericalEntities)
|
||||
|
||||
-- | Escape string as needed for SGML, using only numerical entities.
|
||||
-- Entity references are not preserved.
|
||||
encodeEntitiesNumerical :: String -> String
|
||||
encodeEntitiesNumerical =
|
||||
concatMap (\c -> "&#" ++ show (ord c) ++ ";")
|
||||
|
||||
-- | Convert entities in a string to characters.
|
||||
decodeEntities :: String -> String
|
||||
decodeEntities str =
|
||||
case parse (many (characterEntity <|> anyChar)) str str of
|
||||
Left err -> error $ "\nError: " ++ show err
|
||||
Right result -> result
|
||||
|
||||
-- | Escape string for SGML, preserving entity references.
|
||||
stringToSGML :: Bool -- ^ Use only numerical entities.
|
||||
-> String -- ^ String to convert.
|
||||
-> String
|
||||
stringToSGML numericalEntities str =
|
||||
let nonentity = do
|
||||
notFollowedBy characterEntity
|
||||
c <- anyChar
|
||||
return $ specialCharToEntity numericalEntities c
|
||||
entity = do
|
||||
ent <- manyTill anyChar (char ';')
|
||||
return (ent ++ ";") in
|
||||
case parse (many (nonentity <|> entity)) str str of
|
||||
Left err -> error $ "\nError: " ++ show err
|
||||
Right result -> concat result
|
||||
|
||||
entityTable :: [(String, Char)]
|
||||
entityTable = [
|
||||
|
|
|
@ -44,7 +44,7 @@ import Text.ParserCombinators.Parsec
|
|||
import Text.ParserCombinators.Pandoc
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities ( decodeEntities, entityToChar )
|
||||
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
|
||||
import Maybe ( fromMaybe )
|
||||
import Data.List ( intersect, takeWhile, dropWhile )
|
||||
import Data.Char ( toUpper, toLower, isAlphaNum )
|
||||
|
@ -391,14 +391,9 @@ text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "t
|
|||
special = choice [ link, image, rawHtmlInline ] <?>
|
||||
"link, inline html, or image"
|
||||
|
||||
entity = try (do
|
||||
char '&'
|
||||
body <- choice [(many1 letter), (try (do
|
||||
char '#'
|
||||
num <- many1 digit
|
||||
return ("#" ++ num)))]
|
||||
char ';'
|
||||
return (Str [fromMaybe '?' (entityToChar ("&" ++ body ++ ";"))]))
|
||||
entity = do
|
||||
ent <- characterEntity
|
||||
return $ Str [ent]
|
||||
|
||||
code = try (do
|
||||
htmlTag "code"
|
||||
|
@ -439,7 +434,7 @@ linebreak = do
|
|||
|
||||
str = do
|
||||
result <- many1 (noneOf "<& \t\n")
|
||||
return (Str (decodeEntities result))
|
||||
return (Str result)
|
||||
|
||||
--
|
||||
-- links and images
|
||||
|
|
|
@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
|
|||
anyHtmlTag, anyHtmlEndTag,
|
||||
htmlEndTag, extractTagType,
|
||||
htmlBlockElement )
|
||||
import Text.Pandoc.Entities ( decodeEntities )
|
||||
import Text.Pandoc.Entities ( characterEntity )
|
||||
import Text.ParserCombinators.Parsec
|
||||
|
||||
-- | Read markdown from an input string and return a Pandoc document.
|
||||
|
@ -88,12 +88,13 @@ blockQuoteChar = '>'
|
|||
hyphenChar = '-'
|
||||
ellipsesChar = '.'
|
||||
listColSepChar = '|'
|
||||
entityStart = '&'
|
||||
|
||||
-- treat these as potentially non-text when parsing inline:
|
||||
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
|
||||
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
|
||||
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
|
||||
hyphenChar, ellipsesChar] ++ quoteChars
|
||||
hyphenChar, ellipsesChar, entityStart] ++ quoteChars
|
||||
|
||||
--
|
||||
-- auxiliary functions
|
||||
|
@ -674,7 +675,7 @@ text = choice [ escapedChar, math, strong, emph, smartPunctuation,
|
|||
code, ltSign, symbol,
|
||||
str, linebreak, tabchar, whitespace, endline ] <?> "text"
|
||||
|
||||
inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
|
||||
inline = choice [ rawLaTeXInline', escapedChar, entity, special, text ] <?> "inline"
|
||||
|
||||
special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
|
||||
autoLink, image ] <?> "link, inline html, note, or image"
|
||||
|
@ -827,9 +828,13 @@ linebreak = try (do
|
|||
|
||||
nonEndline = noneOf endLineChars
|
||||
|
||||
entity = do
|
||||
ent <- characterEntity
|
||||
return $ Str [ent]
|
||||
|
||||
str = do
|
||||
result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars)))
|
||||
return (Str (decodeEntities result))
|
||||
return (Str result)
|
||||
|
||||
-- an endline character that can be treated as a space, not a structural break
|
||||
endline = try (do
|
||||
|
|
|
@ -65,8 +65,6 @@ module Text.Pandoc.Shared (
|
|||
replaceReferenceLinks,
|
||||
replaceRefLinksBlockList,
|
||||
-- * SGML
|
||||
escapeSGML,
|
||||
stringToSGML,
|
||||
inTags,
|
||||
selfClosingTag,
|
||||
inTagsSimple,
|
||||
|
@ -74,7 +72,7 @@ module Text.Pandoc.Shared (
|
|||
) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.ParserCombinators.Parsec as Parsec
|
||||
import Text.Pandoc.Entities ( decodeEntities, charToEntity )
|
||||
import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML )
|
||||
import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>),
|
||||
($$), nest, Doc, isEmpty )
|
||||
import Data.Char ( toLower, ord )
|
||||
|
@ -523,61 +521,11 @@ replaceRefLinksInline keytable (Quoted t lst) =
|
|||
Quoted t (map (replaceRefLinksInline keytable) lst)
|
||||
replaceRefLinksInline keytable other = other
|
||||
|
||||
-- | Parse SGML character entity.
|
||||
sgmlCharacterEntity :: GenParser Char st [Char]
|
||||
sgmlCharacterEntity = sgmlNamedEntity <|> sgmlDecimalEntity <|>
|
||||
sgmlHexEntity <?> "SGML entity"
|
||||
|
||||
-- | Parse SGML character entity.
|
||||
sgmlNamedEntity :: GenParser Char st [Char]
|
||||
sgmlNamedEntity = try $ do
|
||||
st <- Parsec.char '&'
|
||||
body <- many1 alphaNum
|
||||
end <- Parsec.char ';'
|
||||
return $ (st:body) ++ [end]
|
||||
|
||||
-- | Parse SGML decimal entity.
|
||||
sgmlDecimalEntity :: GenParser Char st [Char]
|
||||
sgmlDecimalEntity = try $ do
|
||||
st <- string "&#"
|
||||
body <- many1 (oneOf "0123456789")
|
||||
end <- Parsec.char ';'
|
||||
return $ st ++ body ++ [end]
|
||||
|
||||
-- | Parse SGML hexadecimal entity.
|
||||
sgmlHexEntity :: GenParser Char st [Char]
|
||||
sgmlHexEntity = try $ do
|
||||
st <- string "&#"
|
||||
hex <- oneOf "Xx"
|
||||
body <- many1 (oneOf "0123456789ABCDEFabcdef")
|
||||
end <- Parsec.char ';'
|
||||
return $ st ++ (hex:body) ++ [end]
|
||||
|
||||
-- | Escape special character to SGML entity.
|
||||
specialCharToEntity :: Char -> [Char]
|
||||
specialCharToEntity c = if (c `elem` "&<>\"") || (ord c > 127)
|
||||
then charToEntity c
|
||||
else [c]
|
||||
|
||||
-- | Escape string, preserving character entities.
|
||||
stringToSGML :: String -> String
|
||||
stringToSGML str =
|
||||
let segment = sgmlCharacterEntity <|>
|
||||
(do{c <- anyChar;
|
||||
return $ specialCharToEntity c})
|
||||
sgmlString = (do{segs <- many segment; return $ concat segs}) in
|
||||
case parse sgmlString str str of
|
||||
Left err -> error $ "\nError:\n" ++ show err
|
||||
Right result -> result
|
||||
|
||||
-- | Escape string as needed for SGML. Entity references are not preserved.
|
||||
escapeSGML :: String -> String
|
||||
escapeSGML = concatMap specialCharToEntity
|
||||
|
||||
-- | Return a text object with a string of formatted SGML attributes.
|
||||
attributeList :: [(String, String)] -> Doc
|
||||
attributeList = text . concatMap
|
||||
(\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++ stringToSGML b ++ "\"")
|
||||
(\(a, b) -> " " ++ stringToSGML True a ++ "=\"" ++
|
||||
stringToSGML True b ++ "\"")
|
||||
|
||||
-- | Put the supplied contents between start and end tags of tagType,
|
||||
-- with specified attributes and (if specified) indentation.
|
||||
|
|
|
@ -32,10 +32,9 @@ module Text.Pandoc.Writers.Docbook (
|
|||
) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities ( encodeEntities )
|
||||
import Text.Regex ( mkRegex, matchRegex )
|
||||
import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
|
||||
import Data.Char ( toLower, ord )
|
||||
import Data.List ( isPrefixOf, partition )
|
||||
import Data.List ( isPrefixOf, partition, drop )
|
||||
import Text.PrettyPrint.HughesPJ hiding ( Str )
|
||||
|
||||
-- | Data structure for defining hierarchical Pandoc documents
|
||||
|
@ -65,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $
|
|||
then -- last name first
|
||||
let (lastname, rest) = break (==',') name
|
||||
firstname = removeLeadingSpace rest in
|
||||
inTagsSimple "firstname" (text $ stringToSGML firstname) <>
|
||||
inTagsSimple "surname" (text $ stringToSGML lastname)
|
||||
inTagsSimple "firstname" (text $ stringToSGML True firstname) <>
|
||||
inTagsSimple "surname" (text $ stringToSGML True lastname)
|
||||
else -- last name last
|
||||
let namewords = words name
|
||||
lengthname = length namewords
|
||||
|
@ -74,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $
|
|||
0 -> ("","")
|
||||
1 -> ("", name)
|
||||
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
|
||||
inTagsSimple "firstname" (text $ stringToSGML firstname) $$
|
||||
inTagsSimple "surname" (text $ stringToSGML lastname)
|
||||
inTagsSimple "firstname" (text $ stringToSGML True firstname) $$
|
||||
inTagsSimple "surname" (text $ stringToSGML True lastname)
|
||||
|
||||
-- | Convert Pandoc document to string in Docbook format.
|
||||
writeDocbook :: WriterOptions -> Pandoc -> String
|
||||
|
@ -87,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
|
|||
then inTagsIndented "articleinfo" $
|
||||
(inTagsSimple "title" (wrap opts title)) $$
|
||||
(vcat (map authorToDocbook authors)) $$
|
||||
(inTagsSimple "date" (text $ stringToSGML date))
|
||||
(inTagsSimple "date" (text $ stringToSGML True date))
|
||||
else empty
|
||||
blocks' = replaceReferenceLinks blocks
|
||||
(noteBlocks, blocks'') = partition isNoteBlock blocks'
|
||||
|
@ -142,7 +141,7 @@ blockToDocbook opts (Para lst) =
|
|||
blockToDocbook opts (BlockQuote blocks) =
|
||||
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
|
||||
blockToDocbook opts (CodeBlock str) =
|
||||
text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
|
||||
text "<screen>\n" <> text (encodeEntities True str) <> text "\n</screen>"
|
||||
blockToDocbook opts (BulletList lst) =
|
||||
inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
|
||||
blockToDocbook opts (OrderedList lst) =
|
||||
|
@ -199,7 +198,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
|
|||
|
||||
-- | Convert an inline element to Docbook.
|
||||
inlineToDocbook :: WriterOptions -> Inline -> Doc
|
||||
inlineToDocbook opts (Str str) = text $ stringToSGML str
|
||||
inlineToDocbook opts (Str str) = text $ stringToSGML True str
|
||||
inlineToDocbook opts (Emph lst) =
|
||||
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
|
||||
inlineToDocbook opts (Strong lst) =
|
||||
|
@ -212,24 +211,23 @@ inlineToDocbook opts Ellipses = text "…"
|
|||
inlineToDocbook opts EmDash = text "—"
|
||||
inlineToDocbook opts EnDash = text "–"
|
||||
inlineToDocbook opts (Code str) =
|
||||
inTagsSimple "literal" $ text (escapeSGML str)
|
||||
inTagsSimple "literal" $ text (encodeEntities True str)
|
||||
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
|
||||
inlineToDocbook opts (HtmlInline str) = empty
|
||||
inlineToDocbook opts LineBreak =
|
||||
text $ "<literallayout></literallayout>"
|
||||
inlineToDocbook opts Space = char ' '
|
||||
inlineToDocbook opts (Link txt (Src src tit)) =
|
||||
case (matchRegex (mkRegex "mailto:(.*)") src) of
|
||||
Just [addr] -> inTagsSimple "email" $ text (escapeSGML addr)
|
||||
Nothing -> inTags False "ulink" [("url", src)] $
|
||||
inlinesToDocbook opts txt
|
||||
if isPrefixOf "mailto:" src
|
||||
then inTagsSimple "email" $ text (encodeEntities True $ drop 7 src)
|
||||
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
|
||||
inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
|
||||
inlineToDocbook opts (Image alt (Src src tit)) =
|
||||
let titleDoc = if null tit
|
||||
then empty
|
||||
else inTagsIndented "objectinfo" $
|
||||
inTagsIndented "title"
|
||||
(text $ stringToSGML tit) in
|
||||
(text $ stringToSGML True tit) in
|
||||
inTagsIndented "inlinemediaobject" $
|
||||
inTagsIndented "imageobject" $
|
||||
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
|
||||
|
|
|
@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML (
|
|||
) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities ( encodeEntities )
|
||||
import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
|
||||
import Text.Regex ( mkRegex, matchRegex )
|
||||
import Numeric ( showHex )
|
||||
import Data.Char ( ord, toLower )
|
||||
|
@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) =
|
|||
then empty
|
||||
else selfClosingTag "meta" [("name", "author"),
|
||||
("content",
|
||||
joinWithSep ", " (map stringToSGML authors))]
|
||||
joinWithSep ", " (map (stringToSGML False) authors))]
|
||||
datetext = if (date == "")
|
||||
then empty
|
||||
else selfClosingTag "meta" [("name", "date"),
|
||||
("content", stringToSGML date)] in
|
||||
("content", stringToSGML False date)] in
|
||||
text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$
|
||||
text "</head>\n<body>"
|
||||
|
||||
|
@ -168,7 +168,7 @@ blockToHtml opts (Note ref lst) =
|
|||
(text "↩")
|
||||
blockToHtml opts (Key _ _) = empty
|
||||
blockToHtml opts (CodeBlock str) =
|
||||
text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
|
||||
text "<pre><code>" <> text (encodeEntities False str) <> text "\n</code></pre>"
|
||||
blockToHtml opts (RawHtml str) = text str
|
||||
blockToHtml opts (BulletList lst) =
|
||||
let attribs = if (writerIncremental opts)
|
||||
|
@ -234,7 +234,7 @@ inlineToHtml opts (Emph lst) =
|
|||
inlineToHtml opts (Strong lst) =
|
||||
inTagsSimple "strong" (inlineListToHtml opts lst)
|
||||
inlineToHtml opts (Code str) =
|
||||
inTagsSimple "code" $ text (escapeSGML str)
|
||||
inTagsSimple "code" $ text (encodeEntities False str)
|
||||
inlineToHtml opts (Quoted SingleQuote lst) =
|
||||
text "‘" <> (inlineListToHtml opts lst) <> text "’"
|
||||
inlineToHtml opts (Quoted DoubleQuote lst) =
|
||||
|
@ -243,16 +243,16 @@ inlineToHtml opts EmDash = text "—"
|
|||
inlineToHtml opts EnDash = text "–"
|
||||
inlineToHtml opts Ellipses = text "…"
|
||||
inlineToHtml opts Apostrophe = text "’"
|
||||
inlineToHtml opts (Str str) = text $ stringToSGML str
|
||||
inlineToHtml opts (TeX str) = text $ escapeSGML str
|
||||
inlineToHtml opts (Str str) = text $ stringToSGML False str
|
||||
inlineToHtml opts (TeX str) = text $ encodeEntities False str
|
||||
inlineToHtml opts (HtmlInline str) = text str
|
||||
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
|
||||
inlineToHtml opts Space = space
|
||||
inlineToHtml opts (Link txt (Src src tit)) =
|
||||
let title = stringToSGML tit in
|
||||
let title = stringToSGML False tit in
|
||||
if (isPrefixOf "mailto:" src)
|
||||
then obfuscateLink opts txt src
|
||||
else inTags False "a" ([("href", escapeSGML src)] ++
|
||||
else inTags False "a" ([("href", encodeEntities False src)] ++
|
||||
if null tit then [] else [("title", title)])
|
||||
(inlineListToHtml opts txt)
|
||||
inlineToHtml opts (Link txt (Ref ref)) =
|
||||
|
@ -260,7 +260,7 @@ inlineToHtml opts (Link txt (Ref ref)) =
|
|||
(inlineListToHtml opts ref) <> char ']'
|
||||
-- this is what markdown does, for better or worse
|
||||
inlineToHtml opts (Image alt (Src source tit)) =
|
||||
let title = stringToSGML tit
|
||||
let title = stringToSGML False tit
|
||||
alternate = render $ inlineListToHtml opts alt in
|
||||
selfClosingTag "img" $ [("src", source)] ++
|
||||
(if null alternate then [] else [("alt", alternate)]) ++
|
||||
|
|
|
@ -32,7 +32,6 @@ Markdown: <http://daringfireball.net/projects/markdown/>
|
|||
module Text.Pandoc.Writers.Markdown (
|
||||
writeMarkdown
|
||||
) where
|
||||
import Text.Regex ( matchRegex, mkRegex )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Data.List ( group, isPrefixOf, drop )
|
||||
|
|
|
@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
|
|||
, [ Plain [Str "section:",Space,Str "\167"] ]
|
||||
, [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
|
||||
, [ Plain [Str "copyright:",Space,Str "\169"] ] ]
|
||||
, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
|
||||
, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
|
||||
, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
|
||||
, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
|
||||
, Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
|
||||
, Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
|
||||
, Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
|
||||
|
@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
|
|||
, Key [Str "bar"] (Src "/url/" "Title with "quotes" inside")
|
||||
, Header 2 [Str "With",Space,Str "ampersands"]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
|
||||
, Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")
|
||||
|
|
|
@ -89,7 +89,7 @@
|
|||
</para>
|
||||
<screen>
|
||||
sub status {
|
||||
print "working";
|
||||
print "working";
|
||||
}
|
||||
</screen>
|
||||
<para>
|
||||
|
@ -122,7 +122,7 @@ sub status {
|
|||
</blockquote>
|
||||
</blockquote>
|
||||
<para>
|
||||
This should not be a block quote: 2 > 1.
|
||||
This should not be a block quote: 2 > 1.
|
||||
</para>
|
||||
<para>
|
||||
Box-style:
|
||||
|
@ -133,7 +133,7 @@ sub status {
|
|||
</para>
|
||||
<screen>
|
||||
sub status {
|
||||
print "working";
|
||||
print "working";
|
||||
}
|
||||
</screen>
|
||||
</blockquote>
|
||||
|
@ -177,7 +177,7 @@ sub status {
|
|||
---- (should be four hyphens)
|
||||
|
||||
sub status {
|
||||
print "working";
|
||||
print "working";
|
||||
}
|
||||
|
||||
this code block is indented by one tab
|
||||
|
@ -188,7 +188,7 @@ this code block is indented by one tab
|
|||
<screen>
|
||||
this code block is indented by two tabs
|
||||
|
||||
These should not be escaped: \$ \\ \> \[ \{
|
||||
These should not be escaped: \$ \\ \> \[ \{
|
||||
</screen>
|
||||
</section>
|
||||
<section>
|
||||
|
@ -577,9 +577,9 @@ These should not be escaped: \$ \\ \> \[ \{
|
|||
word.
|
||||
</para>
|
||||
<para>
|
||||
This is code: <literal>></literal>, <literal>$</literal>,
|
||||
This is code: <literal>></literal>, <literal>$</literal>,
|
||||
<literal>\</literal>, <literal>\$</literal>,
|
||||
<literal><html></literal>.
|
||||
<literal><html></literal>.
|
||||
</para>
|
||||
</section>
|
||||
<section>
|
||||
|
@ -602,7 +602,7 @@ These should not be escaped: \$ \\ \> \[ \{
|
|||
</para>
|
||||
<para>
|
||||
Here is some quoted <quote><literal>code</literal></quote> and a
|
||||
<quote><ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink></quote>.
|
||||
<quote><ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink></quote>.
|
||||
</para>
|
||||
<para>
|
||||
Some dashes: one—two—three—four—five.
|
||||
|
@ -691,9 +691,9 @@ These should not be escaped: \$ \\ \> \[ \{
|
|||
</para>
|
||||
<para>
|
||||
<literal>\begin{tabular}{|l|l|}\hline
|
||||
Animal & Number \\ \hline
|
||||
Dog & 2 \\
|
||||
Cat & 1 \\ \hline
|
||||
Animal & Number \\ \hline
|
||||
Dog & 2 \\
|
||||
Cat & 1 \\ \hline
|
||||
\end{tabular}</literal>
|
||||
</para>
|
||||
</section>
|
||||
|
@ -705,44 +705,44 @@ Cat & 1 \\ \hline
|
|||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
I hat: Î
|
||||
I hat: Î
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
o umlaut: ö
|
||||
o umlaut: ö
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
section: §
|
||||
section: §
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
set membership: ∈
|
||||
set membership: ∈
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
copyright: ©
|
||||
copyright: ©
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
<para>
|
||||
AT&T has an ampersand in their name.
|
||||
AT&T has an ampersand in their name.
|
||||
</para>
|
||||
<para>
|
||||
AT&T is another way to write it.
|
||||
AT&T is another way to write it.
|
||||
</para>
|
||||
<para>
|
||||
This & that.
|
||||
This & that.
|
||||
</para>
|
||||
<para>
|
||||
4 < 5.
|
||||
4 < 5.
|
||||
</para>
|
||||
<para>
|
||||
6 > 5.
|
||||
6 > 5.
|
||||
</para>
|
||||
<para>
|
||||
Backslash: \
|
||||
|
@ -775,7 +775,7 @@ Cat & 1 \\ \hline
|
|||
Right paren: )
|
||||
</para>
|
||||
<para>
|
||||
Greater-than: >
|
||||
Greater-than: >
|
||||
</para>
|
||||
<para>
|
||||
Hash: #
|
||||
|
@ -868,25 +868,25 @@ Cat & 1 \\ \hline
|
|||
<title>With ampersands</title>
|
||||
<para>
|
||||
Here's a
|
||||
<ulink url="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</ulink>.
|
||||
<ulink url="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Here's a link with an amersand in the link text:
|
||||
<ulink url="http://att.com/">AT&T</ulink>.
|
||||
<ulink url="http://att.com/">AT&T</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Here's an <ulink url="/script?foo=1&bar=2">inline link</ulink>.
|
||||
Here's an <ulink url="/script?foo=1&bar=2">inline link</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Here's an
|
||||
<ulink url="/script?foo=1&bar=2">inline link in pointy braces</ulink>.
|
||||
<ulink url="/script?foo=1&bar=2">inline link in pointy braces</ulink>.
|
||||
</para>
|
||||
</section>
|
||||
<section>
|
||||
<title>Autolinks</title>
|
||||
<para>
|
||||
With an ampersand:
|
||||
<ulink url="http://example.com/?foo=1&bar=2">http://example.com/?foo=1&bar=2</ulink>
|
||||
<ulink url="http://example.com/?foo=1&bar=2">http://example.com/?foo=1&bar=2</ulink>
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
|
@ -916,10 +916,10 @@ Cat & 1 \\ \hline
|
|||
</blockquote>
|
||||
<para>
|
||||
Auto-links should not occur here:
|
||||
<literal><http://example.com/></literal>
|
||||
<literal><http://example.com/></literal>
|
||||
</para>
|
||||
<screen>
|
||||
or here: <http://example.com/>
|
||||
or here: <http://example.com/>
|
||||
</screen>
|
||||
</section>
|
||||
</section>
|
||||
|
@ -970,7 +970,7 @@ or here: <http://example.com/>
|
|||
footnote (as with list items).
|
||||
</para>
|
||||
<screen>
|
||||
{ <code> }
|
||||
{ <code> }
|
||||
</screen>
|
||||
<para>
|
||||
If you want, you can indent every line, but you can also be lazy
|
||||
|
|
|
@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
|
|||
, [ Plain [Str "section:",Space,Str "\167"] ]
|
||||
, [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
|
||||
, [ Plain [Str "copyright:",Space,Str "\169"] ] ]
|
||||
, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
|
||||
, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
|
||||
, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
|
||||
, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
|
||||
, Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
|
||||
, Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
|
||||
, Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
|
||||
|
@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
|
|||
, Key [Str "bar"] (Src "/url/" "Title with "quotes" inside")
|
||||
, Header 2 [Str "With",Space,Str "ampersands"]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
|
||||
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
|
||||
, Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")
|
||||
|
|
Loading…
Add table
Reference in a new issue