2013-08-04 14:12:13 -07:00
|
|
|
{-# LANGUAGE CPP #-}
|
2010-05-06 20:27:10 -07:00
|
|
|
{-
|
2016-03-22 17:20:39 -07:00
|
|
|
Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
|
2010-05-06 20:27:10 -07:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.UTF8
|
2016-03-22 17:20:39 -07:00
|
|
|
Copyright : Copyright (C) 2010-2016 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
License : GNU GPL, version 2 or above
|
2010-05-06 20:27:10 -07:00
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
2012-09-23 22:53:34 -07:00
|
|
|
UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
|
2010-05-06 20:27:10 -07:00
|
|
|
-}
|
|
|
|
module Text.Pandoc.UTF8 ( readFile
|
|
|
|
, writeFile
|
|
|
|
, getContents
|
|
|
|
, putStr
|
|
|
|
, putStrLn
|
|
|
|
, hPutStr
|
|
|
|
, hPutStrLn
|
2011-01-30 16:01:31 -08:00
|
|
|
, hGetContents
|
2012-09-25 19:54:21 -07:00
|
|
|
, toString
|
|
|
|
, fromString
|
|
|
|
, toStringLazy
|
|
|
|
, fromStringLazy
|
2012-09-23 10:43:03 -07:00
|
|
|
, encodePath
|
2012-09-23 11:01:09 -07:00
|
|
|
, decodeArg
|
2010-05-06 20:27:10 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
where
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
import System.IO hiding (readFile, writeFile, getContents,
|
|
|
|
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
|
2013-05-29 09:11:01 -07:00
|
|
|
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
|
2011-01-30 16:01:31 -08:00
|
|
|
import qualified System.IO as IO
|
2012-09-25 19:54:21 -07:00
|
|
|
import qualified Data.ByteString.Char8 as B
|
|
|
|
import qualified Data.ByteString.Lazy as BL
|
|
|
|
import qualified Data.Text.Encoding as T
|
|
|
|
import qualified Data.Text as T
|
|
|
|
import qualified Data.Text.Lazy as TL
|
|
|
|
import qualified Data.Text.Lazy.Encoding as TL
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
readFile :: FilePath -> IO String
|
|
|
|
readFile f = do
|
2012-06-22 21:24:02 +02:00
|
|
|
h <- openFile (encodePath f) ReadMode
|
2011-01-30 16:01:31 -08:00
|
|
|
hGetContents h
|
|
|
|
|
|
|
|
writeFile :: FilePath -> String -> IO ()
|
2012-06-22 21:24:02 +02:00
|
|
|
writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
getContents :: IO String
|
|
|
|
getContents = hGetContents stdin
|
|
|
|
|
|
|
|
putStr :: String -> IO ()
|
|
|
|
putStr s = hPutStr stdout s
|
|
|
|
|
|
|
|
putStrLn :: String -> IO ()
|
|
|
|
putStrLn s = hPutStrLn stdout s
|
|
|
|
|
|
|
|
hPutStr :: Handle -> String -> IO ()
|
|
|
|
hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s
|
|
|
|
|
|
|
|
hPutStrLn :: Handle -> String -> IO ()
|
|
|
|
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
|
|
|
|
|
|
|
|
hGetContents :: Handle -> IO String
|
2013-07-04 15:43:42 -07:00
|
|
|
hGetContents = fmap toString . B.hGetContents
|
2012-09-26 09:04:21 -07:00
|
|
|
-- hGetContents h = hSetEncoding h utf8_bom
|
|
|
|
-- >> hSetNewlineMode h universalNewlineMode
|
|
|
|
-- >> IO.hGetContents h
|
2011-01-30 16:01:31 -08:00
|
|
|
|
2013-02-08 09:45:15 -08:00
|
|
|
-- | Drop BOM (byte order marker) if present at beginning of string.
|
|
|
|
-- Note that Data.Text converts the BOM to code point FEFF, zero-width
|
|
|
|
-- no-break space, so if the string begins with this we strip it off.
|
|
|
|
dropBOM :: String -> String
|
|
|
|
dropBOM ('\xFEFF':xs) = xs
|
|
|
|
dropBOM xs = xs
|
|
|
|
|
2015-05-05 12:41:35 -07:00
|
|
|
filterCRs :: String -> String
|
|
|
|
filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
|
|
|
|
filterCRs ('\r':xs) = '\n' : filterCRs xs
|
|
|
|
filterCRs (x:xs) = x : filterCRs xs
|
|
|
|
filterCRs [] = []
|
|
|
|
|
2013-01-06 16:35:41 -08:00
|
|
|
-- | Convert UTF8-encoded ByteString to String, also
|
|
|
|
-- removing '\r' characters.
|
2012-09-25 19:54:21 -07:00
|
|
|
toString :: B.ByteString -> String
|
2015-05-05 12:41:35 -07:00
|
|
|
toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
|
2012-09-25 19:54:21 -07:00
|
|
|
|
|
|
|
fromString :: String -> B.ByteString
|
|
|
|
fromString = T.encodeUtf8 . T.pack
|
|
|
|
|
2013-01-06 16:35:41 -08:00
|
|
|
-- | Convert UTF8-encoded ByteString to String, also
|
|
|
|
-- removing '\r' characters.
|
2012-09-25 19:54:21 -07:00
|
|
|
toStringLazy :: BL.ByteString -> String
|
2015-05-05 12:41:35 -07:00
|
|
|
toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
|
2012-09-25 19:54:21 -07:00
|
|
|
|
|
|
|
fromStringLazy :: String -> BL.ByteString
|
|
|
|
fromStringLazy = TL.encodeUtf8 . TL.pack
|
|
|
|
|
2012-06-22 21:24:02 +02:00
|
|
|
encodePath :: FilePath -> FilePath
|
2012-09-23 11:01:09 -07:00
|
|
|
decodeArg :: String -> String
|
2012-06-25 14:27:22 -07:00
|
|
|
#if MIN_VERSION_base(4,4,0)
|
2012-06-22 21:24:02 +02:00
|
|
|
encodePath = id
|
2012-09-23 11:01:09 -07:00
|
|
|
decodeArg = id
|
2012-06-22 21:24:02 +02:00
|
|
|
#else
|
2012-09-25 19:54:21 -07:00
|
|
|
encodePath = B.unpack . fromString
|
|
|
|
decodeArg = toString . B.pack
|
2012-06-22 21:24:02 +02:00
|
|
|
#endif
|