More intelligent handling of text encodings.
Previously, UTF-8 was enforced for both input and output. The new system: * For input, UTF-8 is tried first; if an error is raised, the locale encoding is tried. * For output, the locale encoding is always used.
5 changed files with 30 additions and 20 deletions
@ -146,7 +146,6 @@ where
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (Blocks)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
@ -708,7 +707,7 @@ readWith parser state input =
testStringWith :: (Show a) => Parser [Char] ParserState a
-> String
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
testStringWith parser str = putStrLn $ show $
readWith parser defaultParserState str
-- | Parsing options.
@ -90,7 +90,7 @@ import Paths_pandoc (getDataFileName)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
import System.IO (stderr, hPutStrLn)
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
@ -503,14 +503,14 @@ readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
err :: Int -> String -> IO a
err exitCode msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
hPutStrLn stderr $ name ++ ": " ++ msg
exitWith $ ExitFailure exitCode
return undefined
warn :: String -> IO ()
warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
hPutStrLn stderr $ name ++ ": " ++ msg
-- Safe read
@ -25,7 +25,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
UTF-8 aware string IO functions that will work with GHC 6.12 or 7.
The reading functions first attempt to read UTF-8; if an encoding
error is encountered, the local encoding is used instead. This
should work well in practice because text in other encodings
is usually not valid UTF-8.
module Text.Pandoc.UTF8 ( readFile
, writeFile
@ -45,10 +49,11 @@ where
import Codec.Binary.UTF8.String (encodeString, decodeString)
import Control.Exception (catch, throwIO)
import GHC.IO.Exception (IOException(..), IOErrorType(..))
import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, catch )
import qualified System.IO as IO
readFile :: FilePath -> IO String
@ -75,7 +80,14 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h
hGetContents h = do
hSetEncoding h utf8_bom
catch (IO.hGetContents h) $ \e ->
case ioe_type e of
InvalidArgument -> do
hSetEncoding h localeEncoding
IO.hGetContents h
_ -> throwIO e
encodePath :: FilePath -> FilePath
decodeArg :: String -> String
@ -33,8 +33,7 @@ import System.FilePath ( (</>) )
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Data.ByteString.Lazy.UTF8 ( fromString, toString )
import Text.Pandoc.UTF8 as UTF8
import System.IO ( stderr )
import System.IO ( stderr, hPutStrLn )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
@ -663,7 +662,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
, mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
, graphic ] ]
else do
liftIO $ UTF8.hPutStrLn stderr $
liftIO $ hPutStrLn stderr $
"Could not find image `" ++ src ++ "', skipping..."
inlinesToOpenXML opts alt
@ -46,7 +46,7 @@ import System.Console.GetOpt
import Data.Char ( toLower )
import Data.List ( intercalate, isPrefixOf )
import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
import System.IO ( stdout )
import System.IO ( stdout, hPutStr, hPutStrLn )
import System.IO.Error ( isDoesNotExistError )
import qualified Control.Exception as E
import Control.Exception.Extensible ( throwIO )
@ -312,7 +312,7 @@ options =
(\arg _ -> do
templ <- getDefaultTemplate Nothing arg
case templ of
Right t -> UTF8.hPutStr stdout t
Right t -> hPutStr stdout t
Left e -> error $ show e
exitWith ExitSuccess)
@ -663,7 +663,7 @@ options =
(\_ -> do
prg <- getProgName
UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
exitWith ExitSuccess ))
"" -- "Print version"
@ -672,7 +672,7 @@ options =
(\_ -> do
prg <- getProgName
UTF8.hPutStr stdout (usageMessage prg options)
hPutStr stdout (usageMessage prg options)
exitWith ExitSuccess ))
"" -- "Show help"
@ -827,8 +827,8 @@ main = do
} = opts
when dumpArgs $
do UTF8.hPutStrLn stdout outputFile
mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
do hPutStrLn stdout outputFile
mapM_ (\arg -> hPutStrLn stdout arg) args
exitWith ExitSuccess
let sources = if ignoreArgs then [] else args
@ -1026,8 +1026,8 @@ main = do
writeBinary = B.writeFile (UTF8.encodePath outputFile)
let writerFn :: FilePath -> String -> IO ()
writerFn "-" = UTF8.putStr
writerFn f = UTF8.writeFile f
writerFn "-" = putStr
writerFn f = writeFile f
case getWriter writerName' of
Left e -> err 9 e
