pandoc/System/IO/UTF8.hs
fiddlosopher d00da31622 Simplified and improved UTF8 handling:
+ Removed utf8-string cabal configuration flag.
+ Instead, we just include System.IO.UTF8 and Codec.Binary.UTF8.String
  from utf8-string package in the source tree, avoiding a dependency
  on utf8-string and avoiding crufty custom UTF8 code. (The old
  Text.Pandoc.UTF8 had problems with the getContents function.)
+ Removed lots of CPP directives that are no longer needed.
+ In Setup.hs, use '-i..' in running RunTests.hs, so the local UTF8
  code will be found.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1411 788f1e2b-df1e-0410-8736-df70ead52e1b
2008-08-14 01:32:54 +00:00

118 lines
3.5 KiB
Haskell

-----------------------------------------------------------------------------
-- |
-- Module : System.IO.UTF8
-- Copyright : (c) Eric Mertens 2007
-- License : BSD3-style (see LICENSE)
--
-- Maintainer: emertens@galois.com
-- Stability : experimental
-- Portability : portable
--
-- String IO preserving UTF8 encoding.
--
module System.IO.UTF8 (
print
, putStr
, putStrLn
, getLine
, readLn
, readFile
, writeFile
, appendFile
, getContents
, hGetLine
, hGetContents
, hPutStr
, hPutStrLn
) where
import Control.Monad (liftM)
import Data.Char (ord, chr)
import Data.Word (Word8)
import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read,
Show(..))
import System.IO (Handle, IO, FilePath)
import qualified System.IO as IO
import Codec.Binary.UTF8.String (encode, decode)
-- | Encode a string in UTF8 form.
encodeString :: String -> String
encodeString xs = bytesToString (encode xs)
-- | Decode a string from UTF8
decodeString :: String -> String
decodeString xs = decode (stringToBytes xs)
-- | Convert a list of bytes to a String
bytesToString :: [Word8] -> String
bytesToString xs = map (chr . fromEnum) xs
-- | String to list of bytes.
stringToBytes :: String -> [Word8]
stringToBytes xs = map (toEnum . ord) xs
-- | The 'print' function outputs a value of any printable type to the
-- standard output device. This function differs from the
-- System.IO.print in that it preserves any UTF8 encoding of the shown value.
--
print :: Show a => a -> IO ()
print x = putStrLn (show x)
-- | Write a UTF8 string to the standard output device
putStr :: String -> IO ()
putStr x = IO.putStr (encodeString x)
-- | The same as 'putStr', but adds a newline character.
putStrLn :: String -> IO ()
putStrLn x = IO.putStrLn (encodeString x)
-- | Read a UTF8 line from the standard input device
getLine :: IO String
getLine = liftM decodeString IO.getLine
-- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8
readLn :: Read a => IO a
readLn = IO.readIO =<< getLine
-- | The 'readFile' function reads a file and
-- returns the contents of the file as a UTF8 string.
-- The file is read lazily, on demand, as with 'getContents'.
readFile :: FilePath -> IO String
readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>=
IO.hGetContents)
-- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@,
-- to the file @file@.
writeFile :: FilePath -> String -> IO ()
writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h ->
IO.hPutStr h $ encodeString c
-- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@,
-- to the file @file@.
appendFile :: FilePath -> String -> IO ()
appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h ->
IO.hPutStr h $ encodeString c
-- | Read a UTF8 line from a Handle
hGetLine :: Handle -> IO String
hGetLine h = liftM decodeString $ IO.hGetLine h
-- | Lazily read a UTF8 string from a Handle
hGetContents :: Handle -> IO String
hGetContents h = liftM decodeString (IO.hGetContents h)
-- | Write a UTF8 string to a Handle.
hPutStr :: Handle -> String -> IO ()
hPutStr h s = IO.hPutStr h (encodeString s)
-- | Write a UTF8 string to a Handle, appending a newline.
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = IO.hPutStrLn h (encodeString s)
-- | Lazily read stdin as a UTF8 string.
getContents :: IO String
getContents = liftM decodeString IO.getContents