diff --git a/debian/changelog b/debian/changelog index 3fa92832f..0b8db6e8c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -186,6 +186,8 @@ pandoc (0.3) unstable; urgency=low + Renamed 'Text/Pandoc/HtmlEntities' module to 'Text/Pandoc/Entities'. Also changed function names so as not to be HTML-specific. + + Refactored SGML string escaping functions from HTML and Docbook + writers into Text/Pandoc/Shared. (escapeSGML, stringToSGML) + Removed 'BlockQuoteContext' from ParserContext, as it isn't used anywhere. + Removed splitBySpace and replaced it with a general, polymorphic diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d4687b10e..d1d40ac23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( + -- * List processing + splitBy, -- * Text processing gsub, joinWithSep, @@ -52,7 +54,6 @@ module Text.Pandoc.Shared ( -- * Pandoc block list processing consolidateList, isNoteBlock, - splitBy, normalizeSpaces, compactify, generateReference, @@ -62,12 +63,21 @@ module Text.Pandoc.Shared ( lookupKeySrc, refsMatch, replaceReferenceLinks, - replaceRefLinksBlockList + replaceRefLinksBlockList, + -- * SGML + escapeSGML, + stringToSGML, + inTags, + selfClosingTag, + inTagsSimple, + inTagsIndented ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec -import Text.Pandoc.Entities ( decodeEntities ) +import Text.Pandoc.Entities ( decodeEntities, encodeEntities ) import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, + isEmpty ) import Char ( toLower ) import List ( find, groupBy ) @@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) = replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other + +-- | Escape string, preserving character entities and quote, and adding +-- smart typography if specified. +stringToSGML :: WriterOptions -> String -> String +stringToSGML options = + let escapeDoubleQuotes = + gsub "(\"|")" "”" . -- rest are right quotes + gsub "(\"|")(&r[sd]quo;)" "”\\2" . + -- never left quo before right quo + gsub "(&l[sd]quo;)(\"|")" "\\2“" . + -- never right quo after left quo + gsub "([ \t])(\"|")" "\\1“" . + -- never right quo after space + gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left + gsub "(\"|")('|`|‘)" "”’" . + -- right if it got through last filter + gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . + -- "'word left + gsub "``" "“" . + gsub "''" "”" + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo + gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo + gsub "([ \t])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right + gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = + gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" + smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . + escapeEllipses in + encodeEntities . (if (writerSmart options) then smartFilter else id) . + (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;")) + +-- | Escape string as needed for HTML. Entity references are not preserved. +escapeSGML :: String -> String +escapeSGML [] = [] +escapeSGML (x:xs) = case x of + '&' -> "&" ++ escapeSGML xs + '<' -> "<" ++ escapeSGML xs + '>' -> ">" ++ escapeSGML xs + '"' -> """ ++ escapeSGML xs + _ -> x:(escapeSGML xs) + +-- | Return a text object with a string of formatted SGML attributes. +attributeList :: WriterOptions -> [(String, String)] -> Doc +attributeList options = + text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++ + stringToSGML options b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented options tagType attribs contents = + let openTag = PP.char '<' <> text tagType <> attributeList options attribs <> + PP.char '>' + closeTag = text " text tagType <> PP.char '>' in + if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc +selfClosingTag options tagType attribs = + PP.char '<' <> text tagType <> attributeList options attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: WriterOptions -> String -> Doc -> Doc +inTagsSimple options tagType = inTags False options tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: WriterOptions -> String -> Doc -> Doc +inTagsIndented options tagType = inTags True options tagType [] + diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 86dbbf6db..0fa4a1d98 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,9 +32,7 @@ module Text.Pandoc.Writers.Docbook ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml ) import Text.Pandoc.Entities ( encodeEntities ) -import Text.Html ( stringToHtmlString ) import Text.Regex ( mkRegex, matchRegex ) import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition ) @@ -54,19 +52,21 @@ hierarchicalize :: [Block] -> [Element] hierarchicalize [] = [] hierarchicalize (block:rest) = case block of - (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in - (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + (Header level title) -> let (thisSection, rest') = break (headerAtLeast + level) rest in + (Sec title (hierarchicalize thisSection)): + (hierarchicalize rest') x -> (Blk x):(hierarchicalize rest) -- | Convert list of authors to a docbook section authorToDocbook :: WriterOptions -> [Char] -> Doc -authorToDocbook options name = indentedInTags "author" $ +authorToDocbook opts name = inTagsIndented opts "author" $ if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTags "firstname" (text $ stringToXML options firstname) <> - inTags "surname" (text $ stringToXML options lastname) + inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) <> + inTagsSimple opts "surname" (text $ stringToSGML opts lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -74,101 +74,82 @@ authorToDocbook options name = indentedInTags "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTags "firstname" (text $ stringToXML options firstname) $$ - inTags "surname" (text $ stringToXML options lastname) + inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) $$ + inTagsSimple opts "surname" (text $ stringToSGML opts lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook options (Pandoc (Meta title authors date) blocks) = - let head = if (writerStandalone options) - then text (writerHeader options) +writeDocbook opts (Pandoc (Meta title authors date) blocks) = + let head = if (writerStandalone opts) + then text (writerHeader opts) else empty - meta = if (writerStandalone options) - then indentedInTags "articleinfo" $ - (inTags "title" (inlinesToDocbook options title)) $$ - (vcat (map (authorToDocbook options) authors)) $$ - (inTags "date" (text date)) + meta = if (writerStandalone opts) + then inTagsIndented opts "articleinfo" $ + (inTagsSimple opts "title" (inlinesToDocbook opts title)) $$ + (vcat (map (authorToDocbook opts) authors)) $$ + (inTagsSimple opts "date" (text $ stringToSGML opts date)) else empty blocks' = replaceReferenceLinks blocks (noteBlocks, blocks'') = partition isNoteBlock blocks' - options' = options {writerNotes = noteBlocks} + opts' = opts {writerNotes = noteBlocks} elements = hierarchicalize blocks'' - body = text (writerIncludeBefore options') <> - vcat (map (elementToDocbook options') elements) $$ - text (writerIncludeAfter options') - body' = if writerStandalone options' - then indentedInTags "article" (meta $$ body) + body = text (writerIncludeBefore opts') <> + vcat (map (elementToDocbook opts') elements) $$ + text (writerIncludeAfter opts') + body' = if writerStandalone opts' + then inTagsIndented opts "article" (meta $$ body) else body in render $ head $$ body' <> text "\n" --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes. -inTagsWithAttrib :: String -> [(String, String)] -> Doc -> Doc -inTagsWithAttrib tagType attribs contents = text ("<" ++ tagType ++ - (concatMap (\(a, b) -> " " ++ attributeStringToXML a ++ - "=\"" ++ attributeStringToXML b ++ "\"") attribs)) <> - if isEmpty contents - then text " />" -- self-closing tag - else text ">" <> contents <> text ("") - --- | Put the supplied contents between start and end tags of tagType. -inTags :: String -> Doc -> Doc -inTags tagType contents = inTagsWithAttrib tagType [] contents - --- | Put the supplied contents in indented block btw start and end tags. -indentedInTags :: [Char] -> Doc -> Doc -indentedInTags tagType contents = text ("<" ++ tagType ++ ">") $$ - nest 2 contents $$ text ("") - -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook options (Blk block) = blockToDocbook options block -elementToDocbook options (Sec title elements) = +elementToDocbook opts (Blk block) = blockToDocbook opts block +elementToDocbook opts (Sec title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] else elements in - indentedInTags "section" $ - inTags "title" (wrap options title) $$ - vcat (map (elementToDocbook options) elements') + inTagsIndented opts "section" $ + inTagsSimple opts "title" (wrap opts title) $$ + vcat (map (elementToDocbook opts) elements') -- | Convert a list of Pandoc blocks to Docbook. blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook options = vcat . map (blockToDocbook options) +blocksToDocbook opts = vcat . map (blockToDocbook opts) -- | Convert a list of lists of blocks to a list of Docbook list items. listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook options items = - vcat $ map (listItemToDocbook options) items +listItemsToDocbook opts items = + vcat $ map (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. listItemToDocbook :: WriterOptions -> [Block] -> Doc -listItemToDocbook options item = +listItemToDocbook opts item = let plainToPara (Plain x) = Para x plainToPara y = y in let item' = map plainToPara item in - indentedInTags "listitem" (blocksToDocbook options item') + inTagsIndented opts "listitem" (blocksToDocbook opts item') -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook options Blank = text "" -blockToDocbook options Null = empty -blockToDocbook options (Plain lst) = wrap options lst -blockToDocbook options (Para lst) = - indentedInTags "para" (wrap options lst) -blockToDocbook options (BlockQuote blocks) = - indentedInTags "blockquote" (blocksToDocbook options blocks) -blockToDocbook options (CodeBlock str) = - text "\n" <> text (codeStringToXML str) <> text "\n" -blockToDocbook options (BulletList lst) = - indentedInTags "itemizedlist" $ listItemsToDocbook options lst -blockToDocbook options (OrderedList lst) = - indentedInTags "orderedlist" $ listItemsToDocbook options lst -blockToDocbook options (RawHtml str) = text str -- raw XML block -blockToDocbook options HorizontalRule = empty -- not semantic -blockToDocbook options (Note _ _) = empty -- shouldn't occur -blockToDocbook options (Key _ _) = empty -- shouldn't occur -blockToDocbook options _ = indentedInTags "para" (text "Unknown block type") +blockToDocbook opts Blank = text "" +blockToDocbook opts Null = empty +blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Para lst) = + inTagsIndented opts "para" (wrap opts lst) +blockToDocbook opts (BlockQuote blocks) = + inTagsIndented opts "blockquote" (blocksToDocbook opts blocks) +blockToDocbook opts (CodeBlock str) = + text "\n" <> text (escapeSGML str) <> text "\n" +blockToDocbook opts (BulletList lst) = + inTagsIndented opts "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook opts (OrderedList lst) = + inTagsIndented opts "orderedlist" $ listItemsToDocbook opts lst +blockToDocbook opts (RawHtml str) = text str -- raw XML block +blockToDocbook opts HorizontalRule = empty -- not semantic +blockToDocbook opts (Note _ _) = empty -- shouldn't occur +blockToDocbook opts (Key _ _) = empty -- shouldn't occur +blockToDocbook opts _ = inTagsIndented opts "para" (text "Unknown block type") -- | Put string in CDATA section cdata :: String -> Doc @@ -176,62 +157,47 @@ cdata str = text $ "" -- | Take list of inline elements and return wrapped doc. wrap :: WriterOptions -> [Inline] -> Doc -wrap options lst = fsep $ map (inlinesToDocbook options) (splitBy Space lst) - --- | Escape a string for XML (with "smart" option if specified). -stringToXML :: WriterOptions -> String -> String -stringToXML options = encodeEntities . - (if writerSmart options - then stringToSmartHtml - else stringToHtml) - --- | Escape string to XML appropriate for attributes -attributeStringToXML :: String -> String -attributeStringToXML = gsub "\"" """ . codeStringToXML - --- | Escape a literal string for XML. -codeStringToXML :: String -> String -codeStringToXML = encodeEntities . gsub "<" "<" . gsub "&" "&" +wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst) -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook options lst = hcat (map (inlineToDocbook options) lst) +inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst) -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook options (Str str) = text $ stringToXML options str -inlineToDocbook options (Emph lst) = - inTags "emphasis" (inlinesToDocbook options lst) -inlineToDocbook options (Strong lst) = - inTagsWithAttrib "emphasis" [("role", "strong")] - (inlinesToDocbook options lst) -inlineToDocbook options (Code str) = - inTags "literal" $ text (codeStringToXML str) -inlineToDocbook options (TeX str) = inlineToDocbook options (Code str) -inlineToDocbook options (HtmlInline str) = empty -inlineToDocbook options LineBreak = +inlineToDocbook opts (Str str) = text $ stringToSGML opts str +inlineToDocbook opts (Emph lst) = + inTagsSimple opts "emphasis" (inlinesToDocbook opts lst) +inlineToDocbook opts (Strong lst) = + inTags False opts "emphasis" [("role", "strong")] + (inlinesToDocbook opts lst) +inlineToDocbook opts (Code str) = + inTagsSimple opts "literal" $ text (escapeSGML str) +inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) +inlineToDocbook opts (HtmlInline str) = empty +inlineToDocbook opts LineBreak = text $ "" -inlineToDocbook options Space = char ' ' -inlineToDocbook options (Link txt (Src src tit)) = +inlineToDocbook opts Space = char ' ' +inlineToDocbook opts (Link txt (Src src tit)) = case (matchRegex (mkRegex "mailto:(.*)") src) of - Just [addr] -> inTags "email" $ text (codeStringToXML addr) - Nothing -> inTagsWithAttrib "ulink" [("url", src)] $ - inlinesToDocbook options txt -inlineToDocbook options (Link text (Ref ref)) = empty -- shouldn't occur -inlineToDocbook options (Image alt (Src src tit)) = + Just [addr] -> inTagsSimple opts "email" $ text (escapeSGML addr) + Nothing -> inTags False opts "ulink" [("url", src)] $ + inlinesToDocbook opts txt +inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur +inlineToDocbook opts (Image alt (Src src tit)) = let titleDoc = if null tit then empty - else indentedInTags "objectinfo" $ - indentedInTags "title" - (text $ stringToXML options tit) in - indentedInTags "inlinemediaobject" $ - indentedInTags "imageobject" $ - titleDoc $$ inTagsWithAttrib "imagedata" [("fileref", src)] empty -inlineToDocbook options (Image alternate (Ref ref)) = empty --shouldn't occur -inlineToDocbook options (NoteRef ref) = - let notes = writerNotes options + else inTagsIndented opts "objectinfo" $ + inTagsIndented opts "title" + (text $ stringToSGML opts tit) in + inTagsIndented opts "inlinemediaobject" $ + inTagsIndented opts "imageobject" $ + titleDoc $$ selfClosingTag opts "imagedata" [("fileref", src)] +inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur +inlineToDocbook opts (NoteRef ref) = + let notes = writerNotes opts hits = filter (\(Note r _) -> r == ref) notes in if null hits then empty else let (Note _ contents) = head hits in - indentedInTags "footnote" $ blocksToDocbook options contents + inTagsIndented opts "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 8de1de43f..b42d78eb0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -29,12 +29,10 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml, - stringToSmartHtml, - stringToHtml ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Html ( stringToHtmlString ) +import Text.Pandoc.Entities ( encodeEntities ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -115,61 +113,6 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar --- | Escape string, preserving character entities and quote. -stringToHtml :: String -> String -stringToHtml str = escapePreservingRegex stringToHtmlString - (mkRegex "\"|(&[[:alnum:]]*;)") str - --- | Escape string as in 'stringToHtml' but add smart typography filter. -stringToSmartHtml :: String -> String -stringToSmartHtml = - let escapeDoubleQuotes = - gsub "(\"|")" "”" . -- rest are right quotes - gsub "(\"|")(&r[sd]quo;)" "”\\2" . - -- never left quo before right quo - gsub "(&l[sd]quo;)(\"|")" "\\2“" . - -- never right quo after left quo - gsub "([ \t])(\"|")" "\\1“" . - -- never right quo after space - gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left - gsub "(\"|")('|`|‘)" "”’" . - -- right if it got through last filter - gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . - -- "'word left - gsub "``" "“" . - gsub "''" "”" - escapeSingleQuotes = - gsub "'" "’" . -- otherwise right - gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo - gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo - gsub "([ \t])'" "\\1‘" . -- never right quo after space - gsub "`" "‘" . -- ` is left - gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right - gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left - gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left - gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive - gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left - gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. - escapeDashes = - gsub " ?-- ?" "—" . - gsub " ?--- ?" "—" . - gsub "([0-9])--?([0-9])" "\\1–\\2" - escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in - escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . - escapeEllipses . stringToHtml - --- | Escape code string as needed for HTML. -codeStringToHtml :: String -> String -codeStringToHtml [] = [] -codeStringToHtml (x:xs) = case x of - '&' -> "&" ++ codeStringToHtml xs - '<' -> "<" ++ codeStringToHtml xs - _ -> x:(codeStringToHtml xs) - --- | Escape string to HTML appropriate for attributes -attributeStringToHtml :: String -> String -attributeStringToHtml = gsub "\"" """ - -- | Returns an HTML header with appropriate bibliographic information. htmlHeader :: WriterOptions -> Meta -> String htmlHeader options (Meta title authors date) = @@ -178,12 +121,12 @@ htmlHeader options (Meta title authors date) = authortext = if (null authors) then "" else "\n" datetext = if (date == "") then "" else "\n" in + (stringToSGML options date) ++ "\" />\n" in (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "\n\n" @@ -216,7 +159,7 @@ blockToHtml options (Note ref lst) = "\">↩\n" blockToHtml options (Key _ _) = "" blockToHtml options (CodeBlock str) = - "
" ++ (codeStringToHtml str) ++ "\n
\n" + "
" ++ (escapeSGML str) ++ "\n
\n" blockToHtml options (RawHtml str) = str blockToHtml options (BulletList lst) = let attribs = if (writerIncremental options) @@ -255,18 +198,17 @@ inlineToHtml options (Emph lst) = inlineToHtml options (Strong lst) = "" ++ (inlineListToHtml options lst) ++ "" inlineToHtml options (Code str) = - "" ++ (codeStringToHtml str) ++ "" -inlineToHtml options (Str str) = - if (writerSmart options) then stringToSmartHtml str else stringToHtml str -inlineToHtml options (TeX str) = (codeStringToHtml str) + "" ++ (escapeSGML str) ++ "" +inlineToHtml options (Str str) = stringToSGML options str +inlineToHtml options (TeX str) = (escapeSGML str) inlineToHtml options (HtmlInline str) = str inlineToHtml options (LineBreak) = "
\n" inlineToHtml options Space = " " inlineToHtml options (Link text (Src src tit)) = - let title = attributeStringToHtml tit in + let title = stringToSGML options tit in if (isPrefixOf "mailto:" src) then obfuscateLink options text src - else "" else ">") ++ (inlineListToHtml options text) ++ "" inlineToHtml options (Link text (Ref ref)) = @@ -274,7 +216,7 @@ inlineToHtml options (Link text (Ref ref)) = (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse inlineToHtml options (Image alt (Src source tit)) = - let title = attributeStringToHtml tit + let title = stringToSGML options tit alternate = inlineListToHtml options alt in "

Smarty

diff --git a/tests/s5.fragment.html b/tests/s5.fragment.html index 370b9c111..c40f2514e 100644 --- a/tests/s5.fragment.html +++ b/tests/s5.fragment.html @@ -6,7 +6,7 @@

Smarty

    -
  • "Hello there"
  • +
  • "Hello there"
  • Here's a -- dash
  • And 'ellipses'...
diff --git a/tests/s5.inserts.html b/tests/s5.inserts.html index 7be33a2c8..c7e544409 100644 --- a/tests/s5.inserts.html +++ b/tests/s5.inserts.html @@ -21,7 +21,7 @@ STUFF INSERTED

Smarty

    -
  • "Hello there"
  • +
  • "Hello there"
  • Here's a -- dash
  • And 'ellipses'...
diff --git a/tests/writer.docbook b/tests/writer.docbook index 3cf7b6bcf..150b63bac 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -89,7 +89,7 @@ sub status { - print "working"; + print "working"; } @@ -133,7 +133,7 @@ sub status { sub status { - print "working"; + print "working"; }
@@ -177,7 +177,7 @@ sub status { ---- (should be four hyphens) sub status { - print "working"; + print "working"; } this code block is indented by one tab @@ -188,7 +188,7 @@ this code block is indented by one tab this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ +These should not be escaped: \$ \\ \> \[ \{
@@ -577,15 +577,16 @@ These should not be escaped: \$ \\ \> \[ \{ word. - This is code: >, $, + This is code: >, $, \, \$, - <html>. + <html>.
Smart quotes, ellipses, dashes - "Hello," said the spider. "'Shelob' is my name." + "Hello," said the spider. "'Shelob' is my + name." 'A', 'B', and 'C' are letters. @@ -594,11 +595,11 @@ These should not be escaped: \$ \\ \> \[ \{ 'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.' - 'He said, "I want to go."' Were you alive in the 70's? + 'He said, "I want to go."' Were you alive in the 70's? Here is some quoted 'code' and a - "quoted link". + "quoted link". Some dashes: one---two --- three--four -- five. @@ -672,7 +673,7 @@ These should not be escaped: \$ \\ \> \[ \{ $22,000 is a lot of money. So is $34,000. (It - worked if "lot" is emphasized.) + worked if "lot" is emphasized.) @@ -909,17 +910,17 @@ Cat & 1 \\ \hline
Auto-links should not occur here: - <http://example.com/> + <http://example.com/> -or here: <http://example.com/> +or here: <http://example.com/>
Images - From "Voyage dans la Lune" by Georges Melies (1902): + From "Voyage dans la Lune" by Georges Melies (1902): @@ -963,7 +964,7 @@ or here: <http://example.com/> footnote (as with list items). - { <code> } + { <code> } If you want, you can indent every line, but you can also be lazy diff --git a/tests/writer.html b/tests/writer.html index 191b1982e..8915a172c 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -39,7 +39,7 @@ here.

Code in a block quote:

sub status {
-    print "working";
+    print "working";
 }
 

A list:

@@ -60,7 +60,7 @@ here.

Example:

sub status {
-    print "working";
+    print "working";
 }
 
@@ -84,7 +84,7 @@ here.

---- (should be four hyphens)
 
 sub status {
-    print "working";
+    print "working";
 }
 
 this code block is indented by one tab
@@ -92,7 +92,7 @@ this code block is indented by one tab
 

And:

    this code block is indented by two tabs
 
-These should not be escaped:  \$ \\ \> \[ \{
+These should not be escaped:  \$ \\ \> \[ \{
 

Lists

@@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{
foo

This should be a code block, though:

-
<div>
+
<div>
     foo
-</div>
+</div>
 

As should this:

-
<div>foo</div>
+
<div>foo</div>
 

Now, nested:

@@ -281,12 +281,12 @@ Blah This is another comment. -->

Code block:

-
<!-- Comment -->
+
<!-- Comment -->
 

Just plain comment, with trailing spaces on the line:

Code:

-
<hr />
+
<hr />
 

Hr's:


@@ -315,14 +315,14 @@ Blah

So is this word.

This is strong and em.

So is this word.

-

This is code: >, $, \, \$, <html>.

+

This is code: >, $, \, \$, <html>.


Smart quotes, ellipses, dashes

-

"Hello," said the spider. "'Shelob' is my name."

+

"Hello," said the spider. "'Shelob' is my name."

'A', 'B', and 'C' are letters.

'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'

-

'He said, "I want to go."' Were you alive in the 70's?

-

Here is some quoted 'code' and a "quoted link".

+

'He said, "I want to go."' Were you alive in the 70's?

+

Here is some quoted 'code' and a "quoted link".

Some dashes: one---two --- three--four -- five.

Dashes between numbers: 5-7, 255-66, 1987-1999.

Ellipses...and. . .and . . . .

@@ -342,7 +342,7 @@ Blah

These shouldn't be math:

  • To get the famous equation, write $e = mc^2$.
  • -
  • $22,000 is a lot of money. So is $34,000. (It worked if "lot" is emphasized.)
  • +
  • $22,000 is a lot of money. So is $34,000. (It worked if "lot" is emphasized.)
  • Escaped $: $73 this should be emphasized 23$.

Here's a LaTeX table:

@@ -355,11 +355,11 @@ Cat & 1 \\ \hline

Special Characters

Here is some unicode:

    -
  • I hat: Î
  • -
  • o umlaut: ö
  • -
  • section: §
  • -
  • set membership: ∈
  • -
  • copyright: ©
  • +
  • I hat: Î
  • +
  • o umlaut: ö
  • +
  • section: §
  • +
  • set membership: ∈
  • +
  • copyright: ©

AT&T has an ampersand in their name.

AT&T is another way to write it.

@@ -414,7 +414,7 @@ document.write(''+'Email link'+'<\/'+'a'+'>')

Foo biz.

With ampersands

Here's a link with an ampersand in the URL.

-

Here's a link with an amersand in the link text: AT&T.

+

Here's a link with an amersand in the link text: AT&T.

Here's an inline link.

Here's an inline link in pointy braces.

Autolinks

@@ -433,12 +433,12 @@ document.write(''+e+'<\/'+'a'+'>');

Blockquoted: http://example.com/

-

Auto-links should not occur here: <http://example.com/>

-
or here: <http://example.com/>
+

Auto-links should not occur here: <http://example.com/>

+
or here: <http://example.com/>
 

Images

-

From "Voyage dans la Lune" by Georges Melies (1902):

+

From "Voyage dans la Lune" by Georges Melies (1902):

lalune

Here is a movie movie icon.


@@ -458,7 +458,7 @@ document.write(''+e+'<\/'+'a'+'>');
  • Here's the long note. This one contains multiple blocks.

    Subsequent blocks are indented to show that they belong to the footnote (as with list items).

    -
      { <code> }
    +
      { <code> }
     

    If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

  • diff --git a/tests/writer.smart.html b/tests/writer.smart.html index c14a6de54..14b70e2fe 100644 --- a/tests/writer.smart.html +++ b/tests/writer.smart.html @@ -39,7 +39,7 @@ here.

    Code in a block quote:

    sub status {
    -    print "working";
    +    print "working";
     }
     

    A list:

    @@ -60,7 +60,7 @@ here.

    Example:

    sub status {
    -    print "working";
    +    print "working";
     }
     
    @@ -84,7 +84,7 @@ here.

    ---- (should be four hyphens)
     
     sub status {
    -    print "working";
    +    print "working";
     }
     
     this code block is indented by one tab
    @@ -92,7 +92,7 @@ this code block is indented by one tab
     

    And:

        this code block is indented by two tabs
     
    -These should not be escaped:  \$ \\ \> \[ \{
    +These should not be escaped:  \$ \\ \> \[ \{
     

    Lists

    @@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{
    foo

    This should be a code block, though:

    -
    <div>
    +
    <div>
         foo
    -</div>
    +</div>
     

    As should this:

    -
    <div>foo</div>
    +
    <div>foo</div>
     

    Now, nested:

    @@ -281,12 +281,12 @@ Blah This is another comment. -->

    Code block:

    -
    <!-- Comment -->
    +
    <!-- Comment -->
     

    Just plain comment, with trailing spaces on the line:

    Code:

    -
    <hr />
    +
    <hr />
     

    Hr’s:


    @@ -315,7 +315,7 @@ Blah

    So is this word.

    This is strong and em.

    So is this word.

    -

    This is code: >, $, \, \$, <html>.

    +

    This is code: >, $, \, \$, <html>.


    Smart quotes, ellipses, dashes

    “Hello,” said the spider. “‘Shelob’ is my name.”

    @@ -355,11 +355,11 @@ Cat & 1 \\ \hline

    Special Characters

    Here is some unicode:

      -
    • I hat: Î
    • -
    • o umlaut: ö
    • -
    • section: §
    • -
    • set membership: ∈
    • -
    • copyright: ©
    • +
    • I hat: Î
    • +
    • o umlaut: ö
    • +
    • section: §
    • +
    • set membership: ∈
    • +
    • copyright: ©

    AT&T has an ampersand in their name.

    AT&T is another way to write it.

    @@ -389,7 +389,7 @@ Cat & 1 \\ \hline

    URL and title.

    URL and title.

    URL and title.

    -

    URL and title

    +

    URL and title

    URL and title