Changes in entity handling:

+ Entities are parsed (and unicode characters returned) in both
  Markdown and HTML readers.
+ Parsers characterEntity, namedEntity, decimalEntity, hexEntity added
  to Entities.hs; these parse a string and return a unicode character.
+ Changed 'entity' parser in HTML reader to use the 'characterEntity'
  parser from Entities.hs.  
+ Added new 'entity' parser to Markdown reader, and added '&' as a 
  special character.  Adjusted test suite accordingly since now we 
  get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T"..
+ stringToSGML moved to Entities.hs.  escapeSGML removed as redundant,
  given encodeEntities.
+ stringToSGML, encodeEntities, and specialCharToEntity are given a
  boolean parameter that causes only numerical entities to be used.
  This is used in the docbook writer.  The HTML writer uses named
  entities where possible, but not all docbook-consumers know about
  the named entities without special instructions, so it seems safer
  to use numerical entities there.
+ decodeEntities is rewritten in a way that avoids Text.Regex, using
  the new parsers.
+ charToEntity and charToNumericalEntity added to Entities.hs.
+ Moved specialCharToEntity from Shared.hs to Entities.hs.
+ Removed unneeded 'decodeEntities' from 'str' parser in HTML and
  Markdown readers.
+ Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and
  sgmlCharacterEntity from Shared.hs.
+ Modified Docbook writer so that it doesn't rely on Text.Regex for
  detecting "mailto" links.



git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-01-27 03:04:40 +00:00
parent f2de08864e
commit d06417125d
10 changed files with 171 additions and 179 deletions

View file

@ -29,62 +29,109 @@ Functions for encoding unicode characters as entity references,
and vice versa.
-}
module Text.Pandoc.Entities (
entityToChar,
charToEntity,
decodeEntities,
charToNumericalEntity,
specialCharToEntity,
encodeEntities,
decodeEntities,
stringToSGML,
characterEntity
) where
import Data.Char ( chr, ord )
import Text.Regex ( mkRegex, matchRegexAll, Regex )
import Maybe ( fromMaybe )
-- | Regular expression for numerical coded entity.
numericalEntity :: Text.Regex.Regex
numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);"
-- | Regular expression for character entity.
characterEntity :: Text.Regex.Regex
characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;"
-- | Return a string with all entity references decoded to unicode characters
-- where possible.
decodeEntities :: String -> String
decodeEntities str =
case (matchRegexAll characterEntity str) of
Nothing -> str
Just (before, match, rest, _) -> before ++ replacement ++
(decodeEntities rest)
where replacement = case (entityToChar match) of
Just ch -> [ch]
Nothing -> match
-- | Returns a string with characters replaced with entity references where
-- possible.
encodeEntities :: String -> String
encodeEntities [] = []
encodeEntities (c:cs) = if ord c < 128
then c:(encodeEntities cs)
else (charToEntity c) ++ (encodeEntities cs)
-- | If the string is a valid entity reference, returns @Just@ the character,
-- otherwise @Nothing@.
entityToChar :: String -> Maybe Char
entityToChar entity =
case (lookup entity entityTable) of
Just ch -> Just ch
Nothing -> case (matchRegexAll numericalEntity entity) of
Just (_, _, _, [sub]) -> Just (chr (read ('0':sub)))
Nothing -> Nothing
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
-- | Returns a string containing an entity reference for the character.
charToEntity :: Char -> String
charToEntity char =
let matches = filter (\(entity, character) -> (character == char))
entityTable in
if (length matches) == 0
then "&#" ++ show (ord char) ++ ";"
else fst (head matches)
let matches = filter (\(entity, character) -> (character == char))
entityTable in
if (length matches) == 0
then charToNumericalEntity char
else fst (head matches)
-- | Returns a string containing a numerical entity reference for the char.
charToNumericalEntity :: Char -> String
charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
-- | Escape special character to SGML entity.
specialCharToEntity :: Bool -- ^ Use numerical entities only.
-> Char -- ^ Character to convert.
-> [Char]
specialCharToEntity numericalEntities c =
if (c `elem` "&<>\"") || (ord c > 127)
then if numericalEntities
then charToNumericalEntity c
else charToEntity c
else [c]
-- | Parse SGML character entity.
characterEntity :: GenParser Char st Char
characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "SGML entity"
-- | Parse SGML character entity.
namedEntity :: GenParser Char st Char
namedEntity = try $ do
st <- char '&'
body <- many1 alphaNum
end <- char ';'
let entity = "&" ++ body ++ ";"
return $ case (lookup entity entityTable) of
Just ch -> ch
Nothing -> '?'
-- | Parse SGML hexadecimal entity.
hexEntity :: GenParser Char st Char
hexEntity = try $ do
st <- string "&#"
hex <- oneOf "Xx"
body <- many1 (oneOf "0123456789ABCDEFabcdef")
end <- char ';'
return $ chr $ read ('0':'x':body)
-- | Parse SGML decimal entity.
decimalEntity :: GenParser Char st Char
decimalEntity = try $ do
st <- string "&#"
body <- many1 digit
end <- char ';'
return $ chr $ read body
-- | Escape string as needed for SGML. Entity references are not preserved.
encodeEntities :: Bool -- ^ Use only numerical entities.
-> String -- ^ String to convert.
-> String
encodeEntities numericalEntities =
concatMap (specialCharToEntity numericalEntities)
-- | Escape string as needed for SGML, using only numerical entities.
-- Entity references are not preserved.
encodeEntitiesNumerical :: String -> String
encodeEntitiesNumerical =
concatMap (\c -> "&#" ++ show (ord c) ++ ";")
-- | Convert entities in a string to characters.
decodeEntities :: String -> String
decodeEntities str =
case parse (many (characterEntity <|> anyChar)) str str of
Left err -> error $ "\nError: " ++ show err
Right result -> result
-- | Escape string for SGML, preserving entity references.
stringToSGML :: Bool -- ^ Use only numerical entities.
-> String -- ^ String to convert.
-> String
stringToSGML numericalEntities str =
let nonentity = do
notFollowedBy characterEntity
c <- anyChar
return $ specialCharToEntity numericalEntities c
entity = do
ent <- manyTill anyChar (char ';')
return (ent ++ ";") in
case parse (many (nonentity <|> entity)) str str of
Left err -> error $ "\nError: " ++ show err
Right result -> concat result
entityTable :: [(String, Char)]
entityTable = [

View file

@ -44,7 +44,7 @@ import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( decodeEntities, entityToChar )
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
import Maybe ( fromMaybe )
import Data.List ( intersect, takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
@ -391,14 +391,9 @@ text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "t
special = choice [ link, image, rawHtmlInline ] <?>
"link, inline html, or image"
entity = try (do
char '&'
body <- choice [(many1 letter), (try (do
char '#'
num <- many1 digit
return ("#" ++ num)))]
char ';'
return (Str [fromMaybe '?' (entityToChar ("&" ++ body ++ ";"))]))
entity = do
ent <- characterEntity
return $ Str [ent]
code = try (do
htmlTag "code"
@ -439,7 +434,7 @@ linebreak = do
str = do
result <- many1 (noneOf "<& \t\n")
return (Str (decodeEntities result))
return (Str result)
--
-- links and images

View file

@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
import Text.Pandoc.Entities ( decodeEntities )
import Text.Pandoc.Entities ( characterEntity )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
@ -88,12 +88,13 @@ blockQuoteChar = '>'
hyphenChar = '-'
ellipsesChar = '.'
listColSepChar = '|'
entityStart = '&'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
hyphenChar, ellipsesChar] ++ quoteChars
hyphenChar, ellipsesChar, entityStart] ++ quoteChars
--
-- auxiliary functions
@ -674,7 +675,7 @@ text = choice [ escapedChar, math, strong, emph, smartPunctuation,
code, ltSign, symbol,
str, linebreak, tabchar, whitespace, endline ] <?> "text"
inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
inline = choice [ rawLaTeXInline', escapedChar, entity, special, text ] <?> "inline"
special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
autoLink, image ] <?> "link, inline html, note, or image"
@ -827,9 +828,13 @@ linebreak = try (do
nonEndline = noneOf endLineChars
entity = do
ent <- characterEntity
return $ Str [ent]
str = do
result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars)))
return (Str (decodeEntities result))
return (Str result)
-- an endline character that can be treated as a space, not a structural break
endline = try (do

View file

@ -65,8 +65,6 @@ module Text.Pandoc.Shared (
replaceReferenceLinks,
replaceRefLinksBlockList,
-- * SGML
escapeSGML,
stringToSGML,
inTags,
selfClosingTag,
inTagsSimple,
@ -74,7 +72,7 @@ module Text.Pandoc.Shared (
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec as Parsec
import Text.Pandoc.Entities ( decodeEntities, charToEntity )
import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML )
import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>),
($$), nest, Doc, isEmpty )
import Data.Char ( toLower, ord )
@ -523,61 +521,11 @@ replaceRefLinksInline keytable (Quoted t lst) =
Quoted t (map (replaceRefLinksInline keytable) lst)
replaceRefLinksInline keytable other = other
-- | Parse SGML character entity.
sgmlCharacterEntity :: GenParser Char st [Char]
sgmlCharacterEntity = sgmlNamedEntity <|> sgmlDecimalEntity <|>
sgmlHexEntity <?> "SGML entity"
-- | Parse SGML character entity.
sgmlNamedEntity :: GenParser Char st [Char]
sgmlNamedEntity = try $ do
st <- Parsec.char '&'
body <- many1 alphaNum
end <- Parsec.char ';'
return $ (st:body) ++ [end]
-- | Parse SGML decimal entity.
sgmlDecimalEntity :: GenParser Char st [Char]
sgmlDecimalEntity = try $ do
st <- string "&#"
body <- many1 (oneOf "0123456789")
end <- Parsec.char ';'
return $ st ++ body ++ [end]
-- | Parse SGML hexadecimal entity.
sgmlHexEntity :: GenParser Char st [Char]
sgmlHexEntity = try $ do
st <- string "&#"
hex <- oneOf "Xx"
body <- many1 (oneOf "0123456789ABCDEFabcdef")
end <- Parsec.char ';'
return $ st ++ (hex:body) ++ [end]
-- | Escape special character to SGML entity.
specialCharToEntity :: Char -> [Char]
specialCharToEntity c = if (c `elem` "&<>\"") || (ord c > 127)
then charToEntity c
else [c]
-- | Escape string, preserving character entities.
stringToSGML :: String -> String
stringToSGML str =
let segment = sgmlCharacterEntity <|>
(do{c <- anyChar;
return $ specialCharToEntity c})
sgmlString = (do{segs <- many segment; return $ concat segs}) in
case parse sgmlString str str of
Left err -> error $ "\nError:\n" ++ show err
Right result -> result
-- | Escape string as needed for SGML. Entity references are not preserved.
escapeSGML :: String -> String
escapeSGML = concatMap specialCharToEntity
-- | Return a text object with a string of formatted SGML attributes.
attributeList :: [(String, String)] -> Doc
attributeList = text . concatMap
(\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++ stringToSGML b ++ "\"")
(\(a, b) -> " " ++ stringToSGML True a ++ "=\"" ++
stringToSGML True b ++ "\"")
-- | Put the supplied contents between start and end tags of tagType,
-- with specified attributes and (if specified) indentation.

View file

@ -32,10 +32,9 @@ module Text.Pandoc.Writers.Docbook (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition )
import Data.List ( isPrefixOf, partition, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
-- | Data structure for defining hierarchical Pandoc documents
@ -65,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
inTagsSimple "firstname" (text $ stringToSGML firstname) <>
inTagsSimple "surname" (text $ stringToSGML lastname)
inTagsSimple "firstname" (text $ stringToSGML True firstname) <>
inTagsSimple "surname" (text $ stringToSGML True lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@ -74,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
inTagsSimple "firstname" (text $ stringToSGML firstname) $$
inTagsSimple "surname" (text $ stringToSGML lastname)
inTagsSimple "firstname" (text $ stringToSGML True firstname) $$
inTagsSimple "surname" (text $ stringToSGML True lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
@ -87,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
(inTagsSimple "date" (text $ stringToSGML date))
(inTagsSimple "date" (text $ stringToSGML True date))
else empty
blocks' = replaceReferenceLinks blocks
(noteBlocks, blocks'') = partition isNoteBlock blocks'
@ -142,7 +141,7 @@ blockToDocbook opts (Para lst) =
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
blockToDocbook opts (CodeBlock str) =
text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
text "<screen>\n" <> text (encodeEntities True str) <> text "\n</screen>"
blockToDocbook opts (BulletList lst) =
inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
blockToDocbook opts (OrderedList lst) =
@ -199,7 +198,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
inlineToDocbook opts (Str str) = text $ stringToSGML str
inlineToDocbook opts (Str str) = text $ stringToSGML True str
inlineToDocbook opts (Emph lst) =
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
inlineToDocbook opts (Strong lst) =
@ -212,24 +211,23 @@ inlineToDocbook opts Ellipses = text "&#8230;"
inlineToDocbook opts EmDash = text "&#8212;"
inlineToDocbook opts EnDash = text "&#8211;"
inlineToDocbook opts (Code str) =
inTagsSimple "literal" $ text (escapeSGML str)
inTagsSimple "literal" $ text (encodeEntities True str)
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
inlineToDocbook opts (HtmlInline str) = empty
inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
inlineToDocbook opts Space = char ' '
inlineToDocbook opts (Link txt (Src src tit)) =
case (matchRegex (mkRegex "mailto:(.*)") src) of
Just [addr] -> inTagsSimple "email" $ text (escapeSGML addr)
Nothing -> inTags False "ulink" [("url", src)] $
inlinesToDocbook opts txt
if isPrefixOf "mailto:" src
then inTagsSimple "email" $ text (encodeEntities True $ drop 7 src)
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
inlineToDocbook opts (Image alt (Src src tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title"
(text $ stringToSGML tit) in
(text $ stringToSGML True tit) in
inTagsIndented "inlinemediaobject" $
inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]

View file

@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( encodeEntities )
import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) =
then empty
else selfClosingTag "meta" [("name", "author"),
("content",
joinWithSep ", " (map stringToSGML authors))]
joinWithSep ", " (map (stringToSGML False) authors))]
datetext = if (date == "")
then empty
else selfClosingTag "meta" [("name", "date"),
("content", stringToSGML date)] in
("content", stringToSGML False date)] in
text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$
text "</head>\n<body>"
@ -168,7 +168,7 @@ blockToHtml opts (Note ref lst) =
(text "&#8617;")
blockToHtml opts (Key _ _) = empty
blockToHtml opts (CodeBlock str) =
text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
text "<pre><code>" <> text (encodeEntities False str) <> text "\n</code></pre>"
blockToHtml opts (RawHtml str) = text str
blockToHtml opts (BulletList lst) =
let attribs = if (writerIncremental opts)
@ -234,7 +234,7 @@ inlineToHtml opts (Emph lst) =
inlineToHtml opts (Strong lst) =
inTagsSimple "strong" (inlineListToHtml opts lst)
inlineToHtml opts (Code str) =
inTagsSimple "code" $ text (escapeSGML str)
inTagsSimple "code" $ text (encodeEntities False str)
inlineToHtml opts (Quoted SingleQuote lst) =
text "&lsquo;" <> (inlineListToHtml opts lst) <> text "&rsquo;"
inlineToHtml opts (Quoted DoubleQuote lst) =
@ -243,16 +243,16 @@ inlineToHtml opts EmDash = text "&mdash;"
inlineToHtml opts EnDash = text "&ndash;"
inlineToHtml opts Ellipses = text "&hellip;"
inlineToHtml opts Apostrophe = text "&rsquo;"
inlineToHtml opts (Str str) = text $ stringToSGML str
inlineToHtml opts (TeX str) = text $ escapeSGML str
inlineToHtml opts (Str str) = text $ stringToSGML False str
inlineToHtml opts (TeX str) = text $ encodeEntities False str
inlineToHtml opts (HtmlInline str) = text str
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
inlineToHtml opts Space = space
inlineToHtml opts (Link txt (Src src tit)) =
let title = stringToSGML tit in
let title = stringToSGML False tit in
if (isPrefixOf "mailto:" src)
then obfuscateLink opts txt src
else inTags False "a" ([("href", escapeSGML src)] ++
else inTags False "a" ([("href", encodeEntities False src)] ++
if null tit then [] else [("title", title)])
(inlineListToHtml opts txt)
inlineToHtml opts (Link txt (Ref ref)) =
@ -260,7 +260,7 @@ inlineToHtml opts (Link txt (Ref ref)) =
(inlineListToHtml opts ref) <> char ']'
-- this is what markdown does, for better or worse
inlineToHtml opts (Image alt (Src source tit)) =
let title = stringToSGML tit
let title = stringToSGML False tit
alternate = render $ inlineListToHtml opts alt in
selfClosingTag "img" $ [("src", source)] ++
(if null alternate then [] else [("alt", alternate)]) ++

View file

@ -32,7 +32,6 @@ Markdown: <http://daringfireball.net/projects/markdown/>
module Text.Pandoc.Writers.Markdown (
writeMarkdown
) where
import Text.Regex ( matchRegex, mkRegex )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Data.List ( group, isPrefixOf, drop )

View file

@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
, [ Plain [Str "section:",Space,Str "\167"] ]
, [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
, [ Plain [Str "copyright:",Space,Str "\169"] ] ]
, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
, Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
, Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
, Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
, Key [Str "bar"] (Src "/url/" "Title with &quot;quotes&quot; inside")
, Header 2 [Str "With",Space,Str "ampersands"]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
, Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")

View file

@ -89,7 +89,7 @@
</para>
<screen>
sub status {
print &quot;working&quot;;
print &#34;working&#34;;
}
</screen>
<para>
@ -122,7 +122,7 @@ sub status {
</blockquote>
</blockquote>
<para>
This should not be a block quote: 2 &gt; 1.
This should not be a block quote: 2 &#62; 1.
</para>
<para>
Box-style:
@ -133,7 +133,7 @@ sub status {
</para>
<screen>
sub status {
print &quot;working&quot;;
print &#34;working&#34;;
}
</screen>
</blockquote>
@ -177,7 +177,7 @@ sub status {
---- (should be four hyphens)
sub status {
print &quot;working&quot;;
print &#34;working&#34;;
}
this code block is indented by one tab
@ -188,7 +188,7 @@ this code block is indented by one tab
<screen>
this code block is indented by two tabs
These should not be escaped: \$ \\ \&gt; \[ \{
These should not be escaped: \$ \\ \&#62; \[ \{
</screen>
</section>
<section>
@ -577,9 +577,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{
word.
</para>
<para>
This is code: <literal>&gt;</literal>, <literal>$</literal>,
This is code: <literal>&#62;</literal>, <literal>$</literal>,
<literal>\</literal>, <literal>\$</literal>,
<literal>&lt;html&gt;</literal>.
<literal>&#60;html&#62;</literal>.
</para>
</section>
<section>
@ -602,7 +602,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</para>
<para>
Here is some quoted <quote><literal>code</literal></quote> and a
<quote><ulink url="http://example.com/?foo=1&amp;bar=2">quoted link</ulink></quote>.
<quote><ulink url="http://example.com/?foo=1&#38;bar=2">quoted link</ulink></quote>.
</para>
<para>
Some dashes: one&#8212;two&#8212;three&#8212;four&#8212;five.
@ -691,9 +691,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</para>
<para>
<literal>\begin{tabular}{|l|l|}\hline
Animal &amp; Number \\ \hline
Dog &amp; 2 \\
Cat &amp; 1 \\ \hline
Animal &#38; Number \\ \hline
Dog &#38; 2 \\
Cat &#38; 1 \\ \hline
\end{tabular}</literal>
</para>
</section>
@ -705,44 +705,44 @@ Cat &amp; 1 \\ \hline
<itemizedlist>
<listitem>
<para>
I hat: &Icirc;
I hat: &#206;
</para>
</listitem>
<listitem>
<para>
o umlaut: &ouml;
o umlaut: &#246;
</para>
</listitem>
<listitem>
<para>
section: &sect;
section: &#167;
</para>
</listitem>
<listitem>
<para>
set membership: &isin;
set membership: &#8712;
</para>
</listitem>
<listitem>
<para>
copyright: &copy;
copyright: &#169;
</para>
</listitem>
</itemizedlist>
<para>
AT&amp;T has an ampersand in their name.
AT&#38;T has an ampersand in their name.
</para>
<para>
AT&amp;T is another way to write it.
AT&#38;T is another way to write it.
</para>
<para>
This &amp; that.
This &#38; that.
</para>
<para>
4 &lt; 5.
4 &#60; 5.
</para>
<para>
6 &gt; 5.
6 &#62; 5.
</para>
<para>
Backslash: \
@ -775,7 +775,7 @@ Cat &amp; 1 \\ \hline
Right paren: )
</para>
<para>
Greater-than: &gt;
Greater-than: &#62;
</para>
<para>
Hash: #
@ -868,25 +868,25 @@ Cat &amp; 1 \\ \hline
<title>With ampersands</title>
<para>
Here's a
<ulink url="http://example.com/?foo=1&amp;bar=2">link with an ampersand in the URL</ulink>.
<ulink url="http://example.com/?foo=1&#38;bar=2">link with an ampersand in the URL</ulink>.
</para>
<para>
Here's a link with an amersand in the link text:
<ulink url="http://att.com/">AT&amp;T</ulink>.
<ulink url="http://att.com/">AT&#38;T</ulink>.
</para>
<para>
Here's an <ulink url="/script?foo=1&amp;bar=2">inline link</ulink>.
Here's an <ulink url="/script?foo=1&#38;bar=2">inline link</ulink>.
</para>
<para>
Here's an
<ulink url="/script?foo=1&amp;bar=2">inline link in pointy braces</ulink>.
<ulink url="/script?foo=1&#38;bar=2">inline link in pointy braces</ulink>.
</para>
</section>
<section>
<title>Autolinks</title>
<para>
With an ampersand:
<ulink url="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</ulink>
<ulink url="http://example.com/?foo=1&#38;bar=2">http://example.com/?foo=1&#38;bar=2</ulink>
</para>
<itemizedlist>
<listitem>
@ -916,10 +916,10 @@ Cat &amp; 1 \\ \hline
</blockquote>
<para>
Auto-links should not occur here:
<literal>&lt;http://example.com/&gt;</literal>
<literal>&#60;http://example.com/&#62;</literal>
</para>
<screen>
or here: &lt;http://example.com/&gt;
or here: &#60;http://example.com/&#62;
</screen>
</section>
</section>
@ -970,7 +970,7 @@ or here: &lt;http://example.com/&gt;
footnote (as with list items).
</para>
<screen>
{ &lt;code&gt; }
{ &#60;code&#62; }
</screen>
<para>
If you want, you can indent every line, but you can also be lazy

View file

@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
, [ Plain [Str "section:",Space,Str "\167"] ]
, [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
, [ Plain [Str "copyright:",Space,Str "\169"] ] ]
, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
, Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
, Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
, Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
, Key [Str "bar"] (Src "/url/" "Title with &quot;quotes&quot; inside")
, Header 2 [Str "With",Space,Str "ampersands"]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
, Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
, Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")