More intelligent handling of text encodings.
Previously, UTF-8 was enforced for both input and output. The new system: * For input, UTF-8 is tried first; if an error is raised, the locale encoding is tried. * For output, the locale encoding is always used.
This commit is contained in:
parent
31107741f0
commit
7272735b3d
5 changed files with 30 additions and 20 deletions
|
@ -146,7 +146,6 @@ where
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Builder (Blocks)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
||||
import Text.Parsec
|
||||
import Text.Parsec.Pos (newPos)
|
||||
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
|
||||
|
@ -708,7 +707,7 @@ readWith parser state input =
|
|||
testStringWith :: (Show a) => Parser [Char] ParserState a
|
||||
-> String
|
||||
-> IO ()
|
||||
testStringWith parser str = UTF8.putStrLn $ show $
|
||||
testStringWith parser str = putStrLn $ show $
|
||||
readWith parser defaultParserState str
|
||||
|
||||
-- | Parsing options.
|
||||
|
|
|
@ -90,7 +90,7 @@ import Paths_pandoc (getDataFileName)
|
|||
import Text.Pandoc.Pretty (charWidth)
|
||||
import System.Locale (defaultTimeLocale)
|
||||
import Data.Time
|
||||
import System.IO (stderr)
|
||||
import System.IO (stderr, hPutStrLn)
|
||||
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
|
||||
renderOptions)
|
||||
|
||||
|
@ -503,14 +503,14 @@ readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
|
|||
err :: Int -> String -> IO a
|
||||
err exitCode msg = do
|
||||
name <- getProgName
|
||||
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
||||
hPutStrLn stderr $ name ++ ": " ++ msg
|
||||
exitWith $ ExitFailure exitCode
|
||||
return undefined
|
||||
|
||||
warn :: String -> IO ()
|
||||
warn msg = do
|
||||
name <- getProgName
|
||||
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
||||
hPutStrLn stderr $ name ++ ": " ++ msg
|
||||
|
||||
--
|
||||
-- Safe read
|
||||
|
|
|
@ -25,7 +25,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
|
||||
UTF-8 aware string IO functions that will work with GHC 6.12 or 7.
|
||||
The reading functions first attempt to read UTF-8; if an encoding
|
||||
error is encountered, the local encoding is used instead. This
|
||||
should work well in practice because text in other encodings
|
||||
is usually not valid UTF-8.
|
||||
-}
|
||||
module Text.Pandoc.UTF8 ( readFile
|
||||
, writeFile
|
||||
|
@ -45,10 +49,11 @@ where
|
|||
#else
|
||||
import Codec.Binary.UTF8.String (encodeString, decodeString)
|
||||
#endif
|
||||
|
||||
import Control.Exception (catch, throwIO)
|
||||
import GHC.IO.Exception (IOException(..), IOErrorType(..))
|
||||
import System.IO hiding (readFile, writeFile, getContents,
|
||||
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
|
||||
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
|
||||
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, catch )
|
||||
import qualified System.IO as IO
|
||||
|
||||
readFile :: FilePath -> IO String
|
||||
|
@ -75,7 +80,14 @@ hPutStrLn :: Handle -> String -> IO ()
|
|||
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
|
||||
|
||||
hGetContents :: Handle -> IO String
|
||||
hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h
|
||||
hGetContents h = do
|
||||
hSetEncoding h utf8_bom
|
||||
catch (IO.hGetContents h) $ \e ->
|
||||
case ioe_type e of
|
||||
InvalidArgument -> do
|
||||
hSetEncoding h localeEncoding
|
||||
IO.hGetContents h
|
||||
_ -> throwIO e
|
||||
|
||||
encodePath :: FilePath -> FilePath
|
||||
decodeArg :: String -> String
|
||||
|
|
|
@ -33,8 +33,7 @@ import System.FilePath ( (</>) )
|
|||
import qualified Data.ByteString.Lazy as B
|
||||
import qualified Data.Map as M
|
||||
import Data.ByteString.Lazy.UTF8 ( fromString, toString )
|
||||
import Text.Pandoc.UTF8 as UTF8
|
||||
import System.IO ( stderr )
|
||||
import System.IO ( stderr, hPutStrLn )
|
||||
import Codec.Archive.Zip
|
||||
import Data.Time.Clock.POSIX
|
||||
import Paths_pandoc ( getDataFileName )
|
||||
|
@ -663,7 +662,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
|
|||
, mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
|
||||
, graphic ] ]
|
||||
else do
|
||||
liftIO $ UTF8.hPutStrLn stderr $
|
||||
liftIO $ hPutStrLn stderr $
|
||||
"Could not find image `" ++ src ++ "', skipping..."
|
||||
inlinesToOpenXML opts alt
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ import System.Console.GetOpt
|
|||
import Data.Char ( toLower )
|
||||
import Data.List ( intercalate, isPrefixOf )
|
||||
import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
|
||||
import System.IO ( stdout )
|
||||
import System.IO ( stdout, hPutStr, hPutStrLn )
|
||||
import System.IO.Error ( isDoesNotExistError )
|
||||
import qualified Control.Exception as E
|
||||
import Control.Exception.Extensible ( throwIO )
|
||||
|
@ -312,7 +312,7 @@ options =
|
|||
(\arg _ -> do
|
||||
templ <- getDefaultTemplate Nothing arg
|
||||
case templ of
|
||||
Right t -> UTF8.hPutStr stdout t
|
||||
Right t -> hPutStr stdout t
|
||||
Left e -> error $ show e
|
||||
exitWith ExitSuccess)
|
||||
"FORMAT")
|
||||
|
@ -663,7 +663,7 @@ options =
|
|||
(NoArg
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
|
||||
hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
|
||||
copyrightMessage)
|
||||
exitWith ExitSuccess ))
|
||||
"" -- "Print version"
|
||||
|
@ -672,7 +672,7 @@ options =
|
|||
(NoArg
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
UTF8.hPutStr stdout (usageMessage prg options)
|
||||
hPutStr stdout (usageMessage prg options)
|
||||
exitWith ExitSuccess ))
|
||||
"" -- "Show help"
|
||||
|
||||
|
@ -827,8 +827,8 @@ main = do
|
|||
} = opts
|
||||
|
||||
when dumpArgs $
|
||||
do UTF8.hPutStrLn stdout outputFile
|
||||
mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
|
||||
do hPutStrLn stdout outputFile
|
||||
mapM_ (\arg -> hPutStrLn stdout arg) args
|
||||
exitWith ExitSuccess
|
||||
|
||||
let sources = if ignoreArgs then [] else args
|
||||
|
@ -1026,8 +1026,8 @@ main = do
|
|||
writeBinary = B.writeFile (UTF8.encodePath outputFile)
|
||||
|
||||
let writerFn :: FilePath -> String -> IO ()
|
||||
writerFn "-" = UTF8.putStr
|
||||
writerFn f = UTF8.writeFile f
|
||||
writerFn "-" = putStr
|
||||
writerFn f = writeFile f
|
||||
|
||||
case getWriter writerName' of
|
||||
Left e -> err 9 e
|
||||
|
|
Loading…
Add table
Reference in a new issue