Strip out illegal XML characters in escapeXMLString.

Closes #5119.
This commit is contained in:
John MacFarlane 2018-12-04 09:24:15 -08:00
parent 48115fcf1a
commit 38200c0291
2 changed files with 15 additions and 1 deletions

View file

@ -57,7 +57,12 @@ escapeCharForXML x = case x of
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
escapeStringForXML = concatMap escapeCharForXML
escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar
where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' ||
(c >= '\x20' && c <= '\xD7FF') ||
(c >= '\xE000' && c <= '\xFFFD') ||
(c >= '\x10000' && c <= '\x10FFFF')
-- see https://www.w3.org/TR/xml/#charsets
-- | Escape newline characters as &#10;
escapeNls :: String -> String

9
test/command/5119.md Normal file
View file

@ -0,0 +1,9 @@
```
% pandoc -t docbook
h&#x4;i
^D
<para>
hi
</para>
```