UTF8 module: Remove \r when reading.

This should prevent problems with extra CRs on windows.
2013-01-06 16:35:41 -08:00 · 2013-01-06 16:35:41 -08:00 · c7f4333f83
commit c7f4333f83
parent f779411fe2
1 changed files with 7 additions and 4 deletions
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@ -56,7 +56,6 @@ import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
-import Data.Text.Encoding.Error

 readFile :: FilePath -> IO String
 readFile f = do
@ -82,19 +81,23 @@ hPutStrLn :: Handle -> String -> IO ()
 hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s

 hGetContents :: Handle -> IO String
-hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h
+hGetContents = fmap toStringLazy . BL.hGetContents
 -- hGetContents h = hSetEncoding h utf8_bom
 --                   >> hSetNewlineMode h universalNewlineMode
 --                   >> IO.hGetContents h

+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
 toString :: B.ByteString -> String
-toString = T.unpack . T.decodeUtf8With lenientDecode
+toString = filter (/='\r') . T.unpack . T.decodeUtf8

 fromString :: String -> B.ByteString
 fromString = T.encodeUtf8 . T.pack

+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
 toStringLazy :: BL.ByteString -> String
-toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode
+toStringLazy = filter (/='\r') . TL.unpack . TL.decodeUtf8

 fromStringLazy :: String -> BL.ByteString
 fromStringLazy = TL.encodeUtf8 . TL.pack