From c7f4333f83b7441ad9a3a43c798b8f029670ffa3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 6 Jan 2013 16:35:41 -0800 Subject: [PATCH] UTF8 module: Remove `\r` when reading. This should prevent problems with extra CRs on windows. --- src/Text/Pandoc/UTF8.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 46be536d0..582afb6dc 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -56,7 +56,6 @@ import qualified Data.Text.Encoding as T import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL -import Data.Text.Encoding.Error readFile :: FilePath -> IO String readFile f = do @@ -82,19 +81,23 @@ hPutStrLn :: Handle -> String -> IO () hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s hGetContents :: Handle -> IO String -hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h +hGetContents = fmap toStringLazy . BL.hGetContents -- hGetContents h = hSetEncoding h utf8_bom -- >> hSetNewlineMode h universalNewlineMode -- >> IO.hGetContents h +-- | Convert UTF8-encoded ByteString to String, also +-- removing '\r' characters. toString :: B.ByteString -> String -toString = T.unpack . T.decodeUtf8With lenientDecode +toString = filter (/='\r') . T.unpack . T.decodeUtf8 fromString :: String -> B.ByteString fromString = T.encodeUtf8 . T.pack +-- | Convert UTF8-encoded ByteString to String, also +-- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode +toStringLazy = filter (/='\r') . TL.unpack . TL.decodeUtf8 fromStringLazy :: String -> BL.ByteString fromStringLazy = TL.encodeUtf8 . TL.pack