2017-06-10 15:54:35 +02:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2010-05-06 20:27:10 -07:00
|
|
|
{-
|
2018-01-05 20:19:47 +01:00
|
|
|
Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
|
2010-05-06 20:27:10 -07:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.UTF8
|
2018-01-05 20:19:47 +01:00
|
|
|
Copyright : Copyright (C) 2010-2018 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
License : GNU GPL, version 2 or above
|
2010-05-06 20:27:10 -07:00
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
2012-09-23 22:53:34 -07:00
|
|
|
UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
|
2010-05-06 20:27:10 -07:00
|
|
|
-}
|
|
|
|
module Text.Pandoc.UTF8 ( readFile
|
|
|
|
, getContents
|
2017-05-17 15:13:35 +02:00
|
|
|
, writeFileWith
|
|
|
|
, writeFile
|
|
|
|
, putStrWith
|
2010-05-06 20:27:10 -07:00
|
|
|
, putStr
|
2017-05-17 15:13:35 +02:00
|
|
|
, putStrLnWith
|
2010-05-06 20:27:10 -07:00
|
|
|
, putStrLn
|
2017-05-17 15:13:35 +02:00
|
|
|
, hPutStrWith
|
2010-05-06 20:27:10 -07:00
|
|
|
, hPutStr
|
2017-05-17 15:13:35 +02:00
|
|
|
, hPutStrLnWith
|
2010-05-06 20:27:10 -07:00
|
|
|
, hPutStrLn
|
2011-01-30 16:01:31 -08:00
|
|
|
, hGetContents
|
2012-09-25 19:54:21 -07:00
|
|
|
, toString
|
2017-06-10 15:54:35 +02:00
|
|
|
, toText
|
2012-09-25 19:54:21 -07:00
|
|
|
, fromString
|
2017-06-10 16:05:56 +02:00
|
|
|
, fromText
|
2012-09-25 19:54:21 -07:00
|
|
|
, toStringLazy
|
2017-06-10 16:05:56 +02:00
|
|
|
, fromTextLazy
|
2017-06-10 15:54:35 +02:00
|
|
|
, toTextLazy
|
2012-09-25 19:54:21 -07:00
|
|
|
, fromStringLazy
|
2012-09-23 10:43:03 -07:00
|
|
|
, encodePath
|
2012-09-23 11:01:09 -07:00
|
|
|
, decodeArg
|
2010-05-06 20:27:10 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
where
|
2011-01-30 16:01:31 -08:00
|
|
|
|
2012-09-25 19:54:21 -07:00
|
|
|
import qualified Data.ByteString.Char8 as B
|
2017-06-10 15:54:35 +02:00
|
|
|
import qualified Data.ByteString.Lazy.Char8 as BL
|
2012-09-25 19:54:21 -07:00
|
|
|
import qualified Data.Text as T
|
2017-03-04 13:03:41 +01:00
|
|
|
import qualified Data.Text.Encoding as T
|
2012-09-25 19:54:21 -07:00
|
|
|
import qualified Data.Text.Lazy as TL
|
|
|
|
import qualified Data.Text.Lazy.Encoding as TL
|
2017-03-04 13:03:41 +01:00
|
|
|
import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
|
|
|
|
import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
|
|
|
|
putStrLn, readFile, writeFile)
|
|
|
|
import qualified System.IO as IO
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
readFile :: FilePath -> IO String
|
|
|
|
readFile f = do
|
2012-06-22 21:24:02 +02:00
|
|
|
h <- openFile (encodePath f) ReadMode
|
2011-01-30 16:01:31 -08:00
|
|
|
hGetContents h
|
|
|
|
|
|
|
|
getContents :: IO String
|
|
|
|
getContents = hGetContents stdin
|
|
|
|
|
2017-05-17 15:13:35 +02:00
|
|
|
writeFileWith :: Newline -> FilePath -> String -> IO ()
|
|
|
|
writeFileWith eol f s =
|
|
|
|
withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
|
|
|
|
|
|
|
|
writeFile :: FilePath -> String -> IO ()
|
|
|
|
writeFile = writeFileWith nativeNewline
|
|
|
|
|
|
|
|
putStrWith :: Newline -> String -> IO ()
|
|
|
|
putStrWith eol s = hPutStrWith eol stdout s
|
|
|
|
|
2011-01-30 16:01:31 -08:00
|
|
|
putStr :: String -> IO ()
|
2017-05-17 15:13:35 +02:00
|
|
|
putStr = putStrWith nativeNewline
|
|
|
|
|
|
|
|
putStrLnWith :: Newline -> String -> IO ()
|
|
|
|
putStrLnWith eol s = hPutStrLnWith eol stdout s
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
putStrLn :: String -> IO ()
|
2017-05-17 15:13:35 +02:00
|
|
|
putStrLn = putStrLnWith nativeNewline
|
|
|
|
|
|
|
|
hPutStrWith :: Newline -> Handle -> String -> IO ()
|
|
|
|
hPutStrWith eol h s =
|
|
|
|
hSetNewlineMode h (NewlineMode eol eol) >>
|
|
|
|
hSetEncoding h utf8 >> IO.hPutStr h s
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
hPutStr :: Handle -> String -> IO ()
|
2017-05-17 15:13:35 +02:00
|
|
|
hPutStr = hPutStrWith nativeNewline
|
|
|
|
|
|
|
|
hPutStrLnWith :: Newline -> Handle -> String -> IO ()
|
|
|
|
hPutStrLnWith eol h s =
|
|
|
|
hSetNewlineMode h (NewlineMode eol eol) >>
|
|
|
|
hSetEncoding h utf8 >> IO.hPutStrLn h s
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
hPutStrLn :: Handle -> String -> IO ()
|
2017-05-17 15:13:35 +02:00
|
|
|
hPutStrLn = hPutStrLnWith nativeNewline
|
2011-01-30 16:01:31 -08:00
|
|
|
|
|
|
|
hGetContents :: Handle -> IO String
|
2013-07-04 15:43:42 -07:00
|
|
|
hGetContents = fmap toString . B.hGetContents
|
2012-09-26 09:04:21 -07:00
|
|
|
-- hGetContents h = hSetEncoding h utf8_bom
|
|
|
|
-- >> hSetNewlineMode h universalNewlineMode
|
|
|
|
-- >> IO.hGetContents h
|
2011-01-30 16:01:31 -08:00
|
|
|
|
2017-06-10 15:54:35 +02:00
|
|
|
-- | Convert UTF8-encoded ByteString to Text, also
|
|
|
|
-- removing '\r' characters.
|
|
|
|
toText :: B.ByteString -> T.Text
|
|
|
|
toText = T.decodeUtf8 . filterCRs . dropBOM
|
|
|
|
where dropBOM bs =
|
|
|
|
if "\xEF\xBB\xBF" `B.isPrefixOf` bs
|
|
|
|
then B.drop 3 bs
|
|
|
|
else bs
|
|
|
|
filterCRs = B.filter (/='\r')
|
2015-05-05 12:41:35 -07:00
|
|
|
|
2013-01-06 16:35:41 -08:00
|
|
|
-- | Convert UTF8-encoded ByteString to String, also
|
|
|
|
-- removing '\r' characters.
|
2012-09-25 19:54:21 -07:00
|
|
|
toString :: B.ByteString -> String
|
2017-06-10 15:54:35 +02:00
|
|
|
toString = T.unpack . toText
|
2012-09-25 19:54:21 -07:00
|
|
|
|
2017-06-10 15:54:35 +02:00
|
|
|
-- | Convert UTF8-encoded ByteString to Text, also
|
|
|
|
-- removing '\r' characters.
|
|
|
|
toTextLazy :: BL.ByteString -> TL.Text
|
|
|
|
toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
|
|
|
|
where dropBOM bs =
|
|
|
|
if "\xEF\xBB\xBF" `BL.isPrefixOf` bs
|
|
|
|
then BL.drop 3 bs
|
|
|
|
else bs
|
|
|
|
filterCRs = BL.filter (/='\r')
|
2012-09-25 19:54:21 -07:00
|
|
|
|
2013-01-06 16:35:41 -08:00
|
|
|
-- | Convert UTF8-encoded ByteString to String, also
|
|
|
|
-- removing '\r' characters.
|
2012-09-25 19:54:21 -07:00
|
|
|
toStringLazy :: BL.ByteString -> String
|
2017-06-10 15:54:35 +02:00
|
|
|
toStringLazy = TL.unpack . toTextLazy
|
|
|
|
|
2017-06-10 16:05:56 +02:00
|
|
|
fromText :: T.Text -> B.ByteString
|
|
|
|
fromText = T.encodeUtf8
|
|
|
|
|
|
|
|
fromTextLazy :: TL.Text -> BL.ByteString
|
|
|
|
fromTextLazy = TL.encodeUtf8
|
|
|
|
|
2017-06-10 15:54:35 +02:00
|
|
|
fromString :: String -> B.ByteString
|
2017-06-10 16:05:56 +02:00
|
|
|
fromString = fromText . T.pack
|
2012-09-25 19:54:21 -07:00
|
|
|
|
|
|
|
fromStringLazy :: String -> BL.ByteString
|
2017-06-10 16:05:56 +02:00
|
|
|
fromStringLazy = fromTextLazy . TL.pack
|
2012-09-25 19:54:21 -07:00
|
|
|
|
2012-06-22 21:24:02 +02:00
|
|
|
encodePath :: FilePath -> FilePath
|
|
|
|
encodePath = id
|
2016-09-01 07:07:03 -04:00
|
|
|
|
|
|
|
decodeArg :: String -> String
|
2012-09-23 11:01:09 -07:00
|
|
|
decodeArg = id
|