From d00da316227c26bad857998d0398a23917ee0a7b Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Thu, 14 Aug 2008 01:32:54 +0000
Subject: [PATCH] Simplified and improved UTF8 handling:

+ Removed utf8-string cabal configuration flag.
+ Instead, we just include System.IO.UTF8 and Codec.Binary.UTF8.String
  from utf8-string package in the source tree, avoiding a dependency
  on utf8-string and avoiding crufty custom UTF8 code. (The old
  Text.Pandoc.UTF8 had problems with the getContents function.)
+ Removed lots of CPP directives that are no longer needed.
+ In Setup.hs, use '-i..' in running RunTests.hs, so the local UTF8
  code will be found.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1411 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Codec/Binary/UTF8/String.hs |  97 +++++++++++++++++++++++++++++
 INSTALL                     |   5 --
 Main.hs                     |   9 ---
 Setup.hs                    |   2 +-
 System/IO/UTF8.hs           | 118 ++++++++++++++++++++++++++++++++++++
 Text/Pandoc/ODT.hs          |   6 +-
 Text/Pandoc/PDF.hs          |   5 --
 Text/Pandoc/Shared.hs       |   4 --
 Text/Pandoc/TH.hs           |   5 --
 Text/Pandoc/UTF8.hs         |  76 -----------------------
 debian/copyright            |  52 +++++++---------
 pandoc.cabal                |  19 ++----
 12 files changed, 245 insertions(+), 153 deletions(-)
 create mode 100644 Codec/Binary/UTF8/String.hs
 create mode 100644 System/IO/UTF8.hs
 delete mode 100644 Text/Pandoc/UTF8.hs

diff --git a/Codec/Binary/UTF8/String.hs b/Codec/Binary/UTF8/String.hs
new file mode 100644
index 000000000..27c003f00
--- /dev/null
+++ b/Codec/Binary/UTF8/String.hs
@@ -0,0 +1,97 @@
+--
+-- |
+-- Module      :  Codec.Binary.UTF8.String
+-- Copyright   :  (c) Eric Mertens 2007
+-- License     :  BSD3-style (see LICENSE)
+-- 
+-- Maintainer:    emertens@galois.com
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- Support for encoding UTF8 Strings to and from @[Word8]@
+--
+
+module Codec.Binary.UTF8.String (
+      encode
+    , decode
+    , encodeString
+    , decodeString
+  ) where
+
+import Data.Word        (Word8)
+import Data.Bits        ((.|.),(.&.),shiftL,shiftR)
+import Data.Char        (chr,ord)
+
+default(Int)
+
+-- | Encode a string using 'encode' and store the result in a 'String'.
+encodeString :: String -> String
+encodeString xs = map (toEnum . fromEnum) (encode xs)
+
+-- | Decode a string using 'decode' using a 'String' as input.
+-- | This is not safe but it is necessary if UTF-8 encoded text
+-- | has been loaded into a 'String' prior to being decoded.
+decodeString :: String -> String
+decodeString xs = decode (map (toEnum . fromEnum) xs)
+
+replacement_character :: Char
+replacement_character = '\xfffd'
+
+-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
+encode :: String -> [Word8]
+encode = concatMap (map fromIntegral . go . ord)
+ where
+  go oc
+   | oc <= 0x7f       = [oc]
+
+   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+
+   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
+                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
+                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
+                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+
+--
+-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
+--
+decode :: [Word8] -> String
+decode [    ] = ""
+decode (c:cs)
+  | c < 0x80  = chr (fromEnum c) : decode cs
+  | c < 0xc0  = replacement_character : decode cs
+  | c < 0xe0  = multi1
+  | c < 0xf0  = multi_byte 2 0xf  0x800
+  | c < 0xf8  = multi_byte 3 0x7  0x10000
+  | c < 0xfc  = multi_byte 4 0x3  0x200000
+  | c < 0xfe  = multi_byte 5 0x1  0x4000000
+  | otherwise = replacement_character : decode cs
+  where
+    multi1 = case cs of
+      c1 : ds | c1 .&. 0xc0 == 0x80 ->
+        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
+        in if d >= 0x000080 then toEnum d : decode ds
+                            else replacement_character : decode ds
+      _ -> replacement_character : decode cs
+
+    multi_byte :: Int -> Word8 -> Int -> [Char]
+    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
+      where
+        aux 0 rs acc
+          | overlong <= acc && acc <= 0x10ffff &&
+            (acc < 0xd800 || 0xdfff < acc)     &&
+            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
+          | otherwise = replacement_character : decode rs
+
+        aux n (r:rs) acc
+          | r .&. 0xc0 == 0x80 = aux (n-1) rs
+                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
+
+        aux _ rs     _ = replacement_character : decode rs
+
diff --git a/INSTALL b/INSTALL
index 589de9ee4..9707683ae 100644
--- a/INSTALL
+++ b/INSTALL
@@ -24,10 +24,6 @@ GHC and the required libraries using `apt-get`:
     
     sudo apt-get install ghc6 libghc6-xhtml-dev libghc6-mtl-dev libghc6-network-dev
 
-Pandoc will use the [utf8-string] library if it is installed; otherwise, it
-will use its own internal module for UTF-8 I/O.  The utf8-string library is
-not a required dependency, but it may improve performance slightly.
-
 [GHC]: http://www.haskell.org/ghc/
 [GHC Download]: http://www.haskell.org/ghc/download.html
 [Cabal]: http://www.haskell.org/cabal/
@@ -36,7 +32,6 @@ not a required dependency, but it may improve performance slightly.
 [Fink]: http://finkproject.org
 [Ubuntu]: http://www.ubuntu.com
 [debian]: http://www.debian.org/
-[utf8-string]: http://hackage.haskell.org/cgi-bin/hackage-scripts/package/utf8-string
 
 Getting the source
 ------------------
diff --git a/Main.hs b/Main.hs
index 67ab68b99..635b2f790 100644
--- a/Main.hs
+++ b/Main.hs
@@ -43,11 +43,7 @@ import Data.Maybe ( fromMaybe )
 import Data.Char ( toLower )
 import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
 import System.IO ( stdout, stderr )
-#ifdef _UTF8STRING
 import System.IO.UTF8
-#else
-import Text.Pandoc.UTF8
-#endif
 #ifdef _CITEPROC
 import Text.CSL
 import Text.Pandoc.Biblio
@@ -61,11 +57,6 @@ copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n" ++
 
 compileInfo :: String
 compileInfo =
-#ifdef _UTF8STRING
-  " +utf8-string" ++
-#else
-  " -utf8-string" ++
-#endif
 #ifdef _CITEPROC
   " +citeproc" ++
 #else
diff --git a/Setup.hs b/Setup.hs
index 67d9ae7ad..d1c3c5c90 100644
--- a/Setup.hs
+++ b/Setup.hs
@@ -17,7 +17,7 @@ main = defaultMainWithHooks $
 
 -- | Run test suite.
 runTestSuite _ _ _ _ = do
-  inDirectory "tests" $ runCommand "runhaskell RunTests.hs" >>= waitForProcess
+  inDirectory "tests" $ runCommand "runhaskell -i.. RunTests.hs" >>= waitForProcess
   return ()
 
 -- | If reference.odt needs rebuilding, build it.
diff --git a/System/IO/UTF8.hs b/System/IO/UTF8.hs
new file mode 100644
index 000000000..d0af4c38e
--- /dev/null
+++ b/System/IO/UTF8.hs
@@ -0,0 +1,118 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  System.IO.UTF8
+-- Copyright   :  (c) Eric Mertens 2007
+-- License     :  BSD3-style (see LICENSE)
+-- 
+-- Maintainer:    emertens@galois.com
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- String IO preserving UTF8 encoding.
+--
+
+module System.IO.UTF8 (
+      print
+    , putStr
+    , putStrLn
+    , getLine
+    , readLn
+    , readFile
+    , writeFile
+    , appendFile
+    , getContents
+    , hGetLine
+    , hGetContents
+    , hPutStr
+    , hPutStrLn
+  ) where
+
+import Control.Monad (liftM)
+import Data.Char (ord, chr)
+import Data.Word (Word8)
+import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read,
+                Show(..))
+import System.IO (Handle, IO, FilePath)
+import qualified System.IO as IO
+
+import Codec.Binary.UTF8.String (encode, decode)
+
+
+-- | Encode a string in UTF8 form.
+encodeString :: String -> String
+encodeString xs = bytesToString (encode xs)
+
+-- | Decode a string from UTF8
+decodeString :: String -> String
+decodeString xs = decode (stringToBytes xs)
+
+-- | Convert a list of bytes to a String
+bytesToString :: [Word8] -> String
+bytesToString xs = map (chr . fromEnum) xs
+
+-- | String to list of bytes.
+stringToBytes :: String -> [Word8]
+stringToBytes xs = map (toEnum . ord) xs
+
+-- | The 'print' function outputs a value of any printable type to the
+-- standard output device. This function differs from the
+-- System.IO.print in that it preserves any UTF8 encoding of the shown value.
+--
+print :: Show a => a -> IO ()
+print x = putStrLn (show x)
+
+-- | Write a UTF8 string to the standard output device
+putStr :: String -> IO ()
+putStr x = IO.putStr (encodeString x)
+
+-- | The same as 'putStr', but adds a newline character.
+putStrLn :: String -> IO ()
+putStrLn x = IO.putStrLn (encodeString x)
+
+-- | Read a UTF8 line from the standard input device
+getLine :: IO String
+getLine = liftM decodeString IO.getLine
+
+-- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8
+readLn :: Read a => IO a
+readLn = IO.readIO =<< getLine
+
+-- | The 'readFile' function reads a file and
+-- returns the contents of the file as a UTF8 string.
+-- The file is read lazily, on demand, as with 'getContents'.
+readFile :: FilePath -> IO String
+readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>=
+                                 IO.hGetContents)
+
+-- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@,
+-- to the file @file@.
+writeFile :: FilePath -> String -> IO ()
+writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h ->
+                    IO.hPutStr h $ encodeString c
+
+-- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@,
+-- to the file @file@.
+appendFile :: FilePath -> String -> IO ()
+appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h ->
+                    IO.hPutStr h $ encodeString c
+
+-- | Read a UTF8 line from a Handle
+hGetLine :: Handle -> IO String
+hGetLine h = liftM decodeString $ IO.hGetLine h
+
+-- | Lazily read a UTF8 string from a Handle
+hGetContents :: Handle -> IO String
+hGetContents h = liftM decodeString (IO.hGetContents h)
+
+-- | Write a UTF8 string to a Handle.
+hPutStr :: Handle -> String -> IO ()
+hPutStr h s = IO.hPutStr h (encodeString s)
+
+-- | Write a UTF8 string to a Handle, appending a newline.
+hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn h s = IO.hPutStrLn h (encodeString s)
+
+-- | Lazily read stdin as a UTF8 string.
+getContents :: IO String
+getContents = liftM decodeString IO.getContents
+
diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs
index 763625e24..8c3b1b45f 100644
--- a/Text/Pandoc/ODT.hs
+++ b/Text/Pandoc/ODT.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE TemplateHaskell, CPP #-}
+{-# LANGUAGE TemplateHaskell #-}
 {-
 Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
 
@@ -44,11 +44,7 @@ import qualified Data.ByteString as B ( writeFile, pack )
 import Data.ByteString.Internal ( c2w )
 import Prelude hiding ( writeFile, readFile )
 import System.IO ( stderr )
-#ifdef _UTF8STRING
 import System.IO.UTF8
-#else
-import Text.Pandoc.UTF8
-#endif
 
 -- | Produce an ODT file from OpenDocument XML.
 saveOpenDocumentAsODT :: FilePath    -- ^ Pathname of ODT file to be produced.
diff --git a/Text/Pandoc/PDF.hs b/Text/Pandoc/PDF.hs
index 4257e4712..1e2d5e9b5 100644
--- a/Text/Pandoc/PDF.hs
+++ b/Text/Pandoc/PDF.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP #-}
 {-
 Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
 
@@ -38,11 +37,7 @@ import System.Environment ( getEnvironment )
 import Text.Pandoc.Shared ( withTempDir )
 import Prelude hiding ( writeFile, readFile, putStrLn )
 import System.IO ( stderr, openFile, IOMode (..), hClose )
-#ifdef _UTF8STRING
 import System.IO.UTF8
-#else
-import Text.Pandoc.UTF8
-#endif
 
 -- | Produce an PDF file from LaTeX.
 saveLaTeXAsPDF :: FilePath    -- ^ Pathname of PDF file to be produced.
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index 38c1cf6b4..2c53ffa7a 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -118,11 +118,7 @@ import System.FilePath ( (</>), (<.>) )
 import System.IO.Error ( catch, ioError, isAlreadyExistsError )
 import System.Directory
 import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
-#ifdef _UTF8STRING
 import System.IO.UTF8
-#else
-import Text.Pandoc.UTF8
-#endif
 
 --
 -- List processing
diff --git a/Text/Pandoc/TH.hs b/Text/Pandoc/TH.hs
index 376dfa00d..dfd6be28b 100644
--- a/Text/Pandoc/TH.hs
+++ b/Text/Pandoc/TH.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {-
 Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
@@ -39,11 +38,7 @@ import Language.Haskell.TH.Syntax (Lift (..))
 import qualified Data.ByteString as B
 import Data.ByteString.Internal ( w2c )
 import Prelude hiding ( readFile )
-#ifdef _UTF8STRING
 import System.IO.UTF8
-#else
-import Text.Pandoc.UTF8
-#endif
 
 -- | Insert contents of text file into a template.
 contentsOf :: FilePath -> ExpQ
diff --git a/Text/Pandoc/UTF8.hs b/Text/Pandoc/UTF8.hs
deleted file mode 100644
index f8d041db7..000000000
--- a/Text/Pandoc/UTF8.hs
+++ /dev/null
@@ -1,76 +0,0 @@
--- | Functions for IO using UTF-8 encoding.
--- 
--- The basic encoding and decoding functions are taken from
--- <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
--- (c) 2003, OGI School of Science & Engineering, Oregon Health and
--- Science University.  
---
--- From the Char module supplied with HBC.
--- Modified by Martin Norbaeck to pass illegal UTF-8 sequences unchanged.
--- Modified by John MacFarlane to use [Word8] and export IO functions.
-
-module Text.Pandoc.UTF8 ( 
-              putStrLn
-            , putStr
-            , hPutStrLn
-            , hPutStr
-            , getContents
-            , readFile
-            , writeFile
-            ) where
-import Data.Word
-import System.IO ( Handle )
-import qualified Data.ByteString.Lazy as BS
-import Prelude hiding ( putStrLn, putStr, getContents, readFile, writeFile )
-
-putStrLn :: String -> IO ()
-putStrLn =  BS.putStrLn . BS.pack . toUTF8
-
-putStr :: String -> IO ()
-putStr = BS.putStr . BS.pack . toUTF8
-
-hPutStrLn :: Handle -> String -> IO ()
-hPutStrLn h =  BS.hPut h . BS.pack . toUTF8 . (++ "\n")
-
-hPutStr :: Handle -> String -> IO ()
-hPutStr h = BS.hPut h . BS.pack . toUTF8
-
-readFile :: FilePath -> IO String 
-readFile p = BS.readFile p >>= return . fromUTF8 . BS.unpack
-
-writeFile :: FilePath -> String -> IO ()
-writeFile p = BS.writeFile p . BS.pack . toUTF8
-
-getContents :: IO String
-getContents = BS.getContents >>= return . fromUTF8 . BS.unpack
-
--- | Take a list of bytes in UTF-8 encoding and decode it into a Unicode string.
-fromUTF8 :: [Word8] -> String
-fromUTF8 [] = ""
-fromUTF8 (0xef : 0xbb : 0xbf :cs) = fromUTF8 cs -- skip BOM (byte order marker)
-fromUTF8 (c:c':cs) | 0xc0 <= c  && c  <= 0xdf && 
-                       0x80 <= c' && c' <= 0xbf =
-	toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:c':c'':cs) | 0xe0 <= c   && c   <= 0xef && 
-		                   0x80 <= c'  && c'  <= 0xbf &&
-                           0x80 <= c'' && c'' <= 0xbf =
-	toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:cs) = toEnum (fromEnum c) : fromUTF8 cs
-
--- | Take a Unicode string and encode it as a list of bytes in UTF-8 encoding.
-toUTF8 :: String -> [Word8]
-toUTF8 "" = []
-toUTF8 (c:cs) =
-	if c > '\x0000' && c < '\x0080' then
-	    toEnum (fromEnum c) : toUTF8 cs
-	else if c < toEnum 0x0800 then
-	    let i = fromEnum c
-	    in  toEnum (0xc0 + i `div` 0x40) : 
-	        toEnum (0x80 + i `mod` 0x40) : 
-		toUTF8 cs
-	else
-	    let i = fromEnum c
-	    in  toEnum (0xe0 + i `div` 0x1000) : 
-	        toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : 
-		toEnum (0x80 + i `mod` 0x40) : 
-		toUTF8 cs
diff --git a/debian/copyright b/debian/copyright
index 533981520..444f73796 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -58,42 +58,36 @@ by Eric A. Meyer
 Released under an explicit Public Domain License
 
 ----------------------------------------------------------------------
-UTF8.hs
-Copyright (c) 2003, OGI School of Science & Engineering, Oregon Health &
-Science University, All rights reserved.
-Modified by Martin Norbäck, to pass illegal utf-8 sequences through unchanged.
-Modified 2006-8 John MacFarlane.
+System/IO/UTF8.hs and Codec/Binary/UTF8/String.hs
+from the utf8-string package on HackageDB
+Copyright (c) 2007, Galois Inc.
+All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Galois Inc. nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
 
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above
-  copyright notice, this list of conditions and the following
-  disclaimer in the documentation and/or other materials provided
-  with the distribution.
-
-- Neither the name of OGI or OHSU nor the names of its
-  contributors may be used to endorse or promote products derived
-  from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+THIS SOFTWARE IS PROVIDED BY Galois Inc. ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL Galois Inc. BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ----------------------------------------------------------------------
 Text/XML/Light/*
+from the xml package on HackageDB
 (c) 2007 Galois Inc.
 
 All rights reserved.
diff --git a/pandoc.cabal b/pandoc.cabal
index d52bb0d63..a58eb1ddf 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -134,9 +134,6 @@ Flag executable
 Flag library
   Description:   Build the pandoc library.
   Default:       True
-Flag utf8-string
-  Description:   Use utf8-string library for UTF-8 I/O.
-  Default:       True
 Flag citeproc
   Description:   Compile in support for citeproc-hs bibliographic formatting.
   Default:       False
@@ -149,11 +146,6 @@ Library
   if flag(highlighting)
     Build-depends: highlighting-kate
     cpp-options:   -D_HIGHLIGHTING
-  if flag(utf8-string)
-    Build-depends: utf8-string
-    cpp-options:   -D_UTF8STRING
-  else
-    Other-Modules: Text.Pandoc.UTF8
   if flag(citeproc)
     Build-depends: citeproc-hs
     Exposed-Modules: Text.Pandoc.Biblio
@@ -190,12 +182,16 @@ Library
                    Text.Pandoc.Writers.S5
   Other-Modules:   Text.Pandoc.XML,
                    Text.Pandoc.TH,
+                   -- from xml package
                    Text.XML.Light,
                    Text.XML.Light.Types,
                    Text.XML.Light.Output,
                    Text.XML.Light.Input,
                    Text.XML.Light.Proc,
-                   Text.XML.Light.Cursor
+                   Text.XML.Light.Cursor,
+                   -- from utf8-string package
+                   System.IO.UTF8,
+                   Codec.Binary.UTF8.String
   Extensions:      CPP, TemplateHaskell, FlexibleInstances
   Ghc-Options:     -O2 -Wall -threaded
   Ghc-Prof-Options: -auto-all
@@ -214,11 +210,6 @@ Executable pandoc
 
   if flag(highlighting)
     cpp-options:   -D_HIGHLIGHTING
-  if flag(utf8-string)
-    Build-depends: utf8-string
-    cpp-options:   -D_UTF8STRING
-  else
-    Other-Modules: Text.Pandoc.UTF8
   if flag(citeproc)
     Build-depends: citeproc-hs
     cpp-options:   -D_CITEPROC