Modified fromUTF8 to strip out the BOM (byte order marker)
wherever it is present. See http://en.wikipedia.org/wiki/Byte_Order_Mark and http://six.pairlist.net/pipermail/markdown-discuss/2007-October/000874.html. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1054 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
447b99e35d
commit
65a5db2d41
1 changed files with 1 additions and 0 deletions
|
@ -16,6 +16,7 @@ module Text.Pandoc.UTF8 (
|
|||
-- | Take a UTF-8 string and decode it into a Unicode string.
|
||||
fromUTF8 :: String -> String
|
||||
fromUTF8 "" = ""
|
||||
fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
|
||||
fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
|
||||
'\x80' <= c' && c' <= '\xbf' =
|
||||
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
|
||||
|
|
Loading…
Add table
Reference in a new issue