Text.Pandoc.UTF8: change IO functions to return Text, not String.
[API change] This affects `readFile`, `getContents`, `writeFileWith`, `writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`. `hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`. This avoids the need to uselessly create a linked list of characters when emiting output.
This commit is contained in:
parent
607c014e9d
commit
80fde18fb1
11 changed files with 70 additions and 68 deletions
|
@ -73,8 +73,9 @@ convertWithOpts opts = do
|
|||
let verbosity = optVerbosity opts
|
||||
|
||||
when (optDumpArgs opts) $
|
||||
do UTF8.hPutStrLn stdout outputFile
|
||||
mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
|
||||
do UTF8.hPutStrLn stdout (T.pack outputFile)
|
||||
mapM_ (UTF8.hPutStrLn stdout . T.pack)
|
||||
(fromMaybe ["-"] $ optInputFiles opts)
|
||||
exitSuccess
|
||||
|
||||
let sources = case optInputFiles opts of
|
||||
|
@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr
|
|||
writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
|
||||
|
||||
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
|
||||
-- TODO this implementation isn't maximally efficient:
|
||||
writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
|
||||
writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack
|
||||
writerFn eol "-" = liftIO . UTF8.putStrWith eol
|
||||
writerFn eol f = liftIO . UTF8.writeFileWith eol f
|
||||
|
|
|
@ -812,10 +812,10 @@ options =
|
|||
map (\c -> ['-',c]) shorts ++
|
||||
map ("--" ++) longs
|
||||
let allopts = unwords (concatMap optnames options)
|
||||
UTF8.hPutStrLn stdout $ printf tpl allopts
|
||||
(unwords readersNames)
|
||||
(unwords writersNames)
|
||||
(unwords $ map (T.unpack . fst) highlightingStyles)
|
||||
UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
|
||||
(T.unpack $ T.unwords readersNames)
|
||||
(T.unpack $ T.unwords writersNames)
|
||||
(T.unpack $ T.unwords $ map fst highlightingStyles)
|
||||
(unwords datafiles)
|
||||
exitSuccess ))
|
||||
"" -- "Print bash completion script"
|
||||
|
@ -854,7 +854,7 @@ options =
|
|||
else if extensionEnabled x allExts
|
||||
then '-'
|
||||
else ' ') : drop 4 (show x)
|
||||
mapM_ (UTF8.hPutStrLn stdout . showExt)
|
||||
mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
|
||||
[ex | ex <- extList, extensionEnabled ex allExts]
|
||||
exitSuccess )
|
||||
"FORMAT")
|
||||
|
@ -868,14 +868,14 @@ options =
|
|||
, sShortname s `notElem`
|
||||
[T.pack "Alert", T.pack "Alert_indent"]
|
||||
]
|
||||
mapM_ (UTF8.hPutStrLn stdout) (sort langs)
|
||||
mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
|
||||
exitSuccess ))
|
||||
""
|
||||
|
||||
, Option "" ["list-highlight-styles"]
|
||||
(NoArg
|
||||
(\_ -> do
|
||||
mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
|
||||
mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
|
||||
exitSuccess ))
|
||||
""
|
||||
|
||||
|
@ -893,7 +893,7 @@ options =
|
|||
| T.null t -> -- e.g. for docx, odt, json:
|
||||
E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
|
||||
("templates/default." ++ arg)
|
||||
| otherwise -> write . T.unpack $ t
|
||||
| otherwise -> write t
|
||||
Left e -> E.throwIO e
|
||||
exitSuccess)
|
||||
"FORMAT")
|
||||
|
@ -940,11 +940,13 @@ options =
|
|||
(\_ -> do
|
||||
prg <- getProgName
|
||||
defaultDatadirs <- defaultUserDataDirs
|
||||
UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
|
||||
compileInfo ++
|
||||
"\nUser data directory: " ++
|
||||
intercalate " or " defaultDatadirs ++
|
||||
('\n':copyrightMessage))
|
||||
UTF8.hPutStrLn stdout
|
||||
$ T.pack
|
||||
$ prg ++ " " ++ T.unpack pandocVersion ++
|
||||
compileInfo ++
|
||||
"\nUser data directory: " ++
|
||||
intercalate " or " defaultDatadirs ++
|
||||
('\n':copyrightMessage)
|
||||
exitSuccess ))
|
||||
"" -- "Print version"
|
||||
|
||||
|
@ -952,7 +954,7 @@ options =
|
|||
(NoArg
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
UTF8.hPutStr stdout (usageMessage prg options)
|
||||
UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
|
||||
exitSuccess ))
|
||||
"" -- "Show help"
|
||||
]
|
||||
|
@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
|
|||
handleUnrecognizedOption x =
|
||||
(("Unknown option " ++ x ++ ".") :)
|
||||
|
||||
readersNames :: [String]
|
||||
readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
|
||||
readersNames :: [Text]
|
||||
readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))
|
||||
|
||||
writersNames :: [String]
|
||||
writersNames :: [Text]
|
||||
writersNames = sort
|
||||
("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
|
||||
("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))
|
||||
|
||||
splitField :: String -> (String, String)
|
||||
splitField = second (tailDef "true") . break (`elemText` ":=")
|
||||
|
|
|
@ -59,8 +59,8 @@ optToOutputSettings opts = do
|
|||
let outputFile = fromMaybe "-" (optOutputFile opts)
|
||||
|
||||
when (optDumpArgs opts) . liftIO $ do
|
||||
UTF8.hPutStrLn stdout outputFile
|
||||
mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
|
||||
UTF8.hPutStrLn stdout (T.pack outputFile)
|
||||
mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
|
||||
exitSuccess
|
||||
|
||||
epubMetadata <- traverse readUtf8File $ optEpubMetadata opts
|
||||
|
|
|
@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
|
|||
logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
|
||||
logOutput msg = liftIO $ do
|
||||
UTF8.hPutStr stderr $
|
||||
"[" ++ show (messageVerbosity msg) ++ "] "
|
||||
"[" <> T.pack (show (messageVerbosity msg)) <> "] "
|
||||
alertIndent $ T.lines $ showLogMessage msg
|
||||
|
||||
-- | Prints the list of lines to @stderr@, indenting every but the first
|
||||
|
@ -191,10 +191,10 @@ logOutput msg = liftIO $ do
|
|||
alertIndent :: [Text] -> IO ()
|
||||
alertIndent [] = return ()
|
||||
alertIndent (l:ls) = do
|
||||
UTF8.hPutStrLn stderr $ unpack l
|
||||
UTF8.hPutStrLn stderr l
|
||||
mapM_ go ls
|
||||
where go l' = do UTF8.hPutStr stderr " "
|
||||
UTF8.hPutStrLn stderr $ unpack l'
|
||||
UTF8.hPutStrLn stderr l'
|
||||
|
||||
-- | Extract media from the mediabag into a directory.
|
||||
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
|
||||
|
|
|
@ -191,6 +191,6 @@ handleError (Left e) =
|
|||
|
||||
err :: Int -> Text -> IO a
|
||||
err exitCode msg = do
|
||||
UTF8.hPutStrLn stderr (T.unpack msg)
|
||||
UTF8.hPutStrLn stderr msg
|
||||
exitWith $ ExitFailure exitCode
|
||||
return undefined
|
||||
|
|
|
@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do
|
|||
| isAscii c = T.singleton c
|
||||
| otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
|
||||
let addCodePoint = T.concatMap toCodePoint
|
||||
let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
|
||||
let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
|
||||
| l <- ls
|
||||
, isMissingCharacterWarning l
|
||||
]
|
||||
|
@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do
|
|||
env <- liftIO getEnvironment
|
||||
when (verbosity >= INFO) $ liftIO $
|
||||
showVerboseInfo (Just tmpDir) program programArgs env
|
||||
(utf8ToString sourceBL)
|
||||
(utf8ToText sourceBL)
|
||||
(exit, out) <- liftIO $ E.catch
|
||||
(pipeProcess (Just env) program programArgs sourceBL)
|
||||
(handlePDFProgramNotFound program)
|
||||
|
@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
|
|||
(pipeProcess (Just env'') program programArgs BL.empty)
|
||||
(handlePDFProgramNotFound program)
|
||||
when (verbosity >= INFO) $ liftIO $ do
|
||||
UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
|
||||
UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
|
||||
BL.hPutStr stderr out
|
||||
UTF8.hPutStr stderr "\n"
|
||||
if runNumber < numRuns
|
||||
|
@ -405,7 +405,7 @@ generic2pdf :: Verbosity
|
|||
generic2pdf verbosity program args source = do
|
||||
env' <- getEnvironment
|
||||
when (verbosity >= INFO) $
|
||||
showVerboseInfo Nothing program args env' (T.unpack source)
|
||||
showVerboseInfo Nothing program args env' source
|
||||
(exit, out) <- E.catch
|
||||
(pipeProcess (Just env') program args
|
||||
(BL.fromStrict $ UTF8.fromText source))
|
||||
|
@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath
|
|||
-> String
|
||||
-> [String]
|
||||
-> [(String, String)]
|
||||
-> String
|
||||
-> Text
|
||||
-> IO ()
|
||||
showVerboseInfo mbTmpDir program programArgs env source = do
|
||||
case mbTmpDir of
|
||||
Just tmpDir -> do
|
||||
UTF8.hPutStrLn stderr "[makePDF] temp dir:"
|
||||
UTF8.hPutStrLn stderr tmpDir
|
||||
UTF8.hPutStrLn stderr (T.pack tmpDir)
|
||||
Nothing -> return ()
|
||||
UTF8.hPutStrLn stderr "[makePDF] Command line:"
|
||||
UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
|
||||
UTF8.hPutStrLn stderr $
|
||||
T.pack program <> " " <> T.pack (unwords (map show programArgs))
|
||||
UTF8.hPutStr stderr "\n"
|
||||
UTF8.hPutStrLn stderr "[makePDF] Environment:"
|
||||
mapM_ (UTF8.hPutStrLn stderr . show) env
|
||||
mapM_ (UTF8.hPutStrLn stderr . tshow) env
|
||||
UTF8.hPutStr stderr "\n"
|
||||
UTF8.hPutStrLn stderr "[makePDF] Source:"
|
||||
UTF8.hPutStrLn stderr source
|
||||
|
@ -517,8 +518,8 @@ handlePDFProgramNotFound program e
|
|||
E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
|
||||
| otherwise = E.throwIO e
|
||||
|
||||
utf8ToString :: ByteString -> String
|
||||
utf8ToString lbs =
|
||||
utf8ToText :: ByteString -> Text
|
||||
utf8ToText lbs =
|
||||
case decodeUtf8' lbs of
|
||||
Left _ -> BC.unpack lbs -- if decoding fails, treat as latin1
|
||||
Right t -> TL.unpack t
|
||||
Left _ -> T.pack $ BC.unpack lbs -- if decoding fails, treat as latin1
|
||||
Right t -> TL.toStrict t
|
||||
|
|
|
@ -1148,7 +1148,7 @@ testStringWith :: Show a
|
|||
=> ParserT Text ParserState Identity a
|
||||
-> Text
|
||||
-> IO ()
|
||||
testStringWith parser str = UTF8.putStrLn $ show $
|
||||
testStringWith parser str = UTF8.putStrLn $ tshow $
|
||||
readWith parser defaultParserState str
|
||||
|
||||
-- | Parsing options.
|
||||
|
|
|
@ -39,67 +39,65 @@ where
|
|||
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.ByteString.Lazy.Char8 as BL
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as TIO
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
|
||||
import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
|
||||
putStrLn, readFile, writeFile)
|
||||
import qualified System.IO as IO
|
||||
|
||||
readFile :: FilePath -> IO String
|
||||
readFile :: FilePath -> IO Text
|
||||
readFile f = do
|
||||
h <- openFile (encodePath f) ReadMode
|
||||
hGetContents h
|
||||
|
||||
getContents :: IO String
|
||||
getContents :: IO Text
|
||||
getContents = hGetContents stdin
|
||||
|
||||
writeFileWith :: Newline -> FilePath -> String -> IO ()
|
||||
writeFileWith :: Newline -> FilePath -> Text -> IO ()
|
||||
writeFileWith eol f s =
|
||||
withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
|
||||
|
||||
writeFile :: FilePath -> String -> IO ()
|
||||
writeFile :: FilePath -> Text -> IO ()
|
||||
writeFile = writeFileWith nativeNewline
|
||||
|
||||
putStrWith :: Newline -> String -> IO ()
|
||||
putStrWith :: Newline -> Text -> IO ()
|
||||
putStrWith eol s = hPutStrWith eol stdout s
|
||||
|
||||
putStr :: String -> IO ()
|
||||
putStr :: Text -> IO ()
|
||||
putStr = putStrWith nativeNewline
|
||||
|
||||
putStrLnWith :: Newline -> String -> IO ()
|
||||
putStrLnWith :: Newline -> Text -> IO ()
|
||||
putStrLnWith eol s = hPutStrLnWith eol stdout s
|
||||
|
||||
putStrLn :: String -> IO ()
|
||||
putStrLn :: Text -> IO ()
|
||||
putStrLn = putStrLnWith nativeNewline
|
||||
|
||||
hPutStrWith :: Newline -> Handle -> String -> IO ()
|
||||
hPutStrWith :: Newline -> Handle -> Text -> IO ()
|
||||
hPutStrWith eol h s =
|
||||
hSetNewlineMode h (NewlineMode eol eol) >>
|
||||
hSetEncoding h utf8 >> IO.hPutStr h s
|
||||
hSetEncoding h utf8 >> TIO.hPutStr h s
|
||||
|
||||
hPutStr :: Handle -> String -> IO ()
|
||||
hPutStr :: Handle -> Text -> IO ()
|
||||
hPutStr = hPutStrWith nativeNewline
|
||||
|
||||
hPutStrLnWith :: Newline -> Handle -> String -> IO ()
|
||||
hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
|
||||
hPutStrLnWith eol h s =
|
||||
hSetNewlineMode h (NewlineMode eol eol) >>
|
||||
hSetEncoding h utf8 >> IO.hPutStrLn h s
|
||||
hSetEncoding h utf8 >> TIO.hPutStrLn h s
|
||||
|
||||
hPutStrLn :: Handle -> String -> IO ()
|
||||
hPutStrLn :: Handle -> Text -> IO ()
|
||||
hPutStrLn = hPutStrLnWith nativeNewline
|
||||
|
||||
hGetContents :: Handle -> IO String
|
||||
hGetContents = fmap toString . B.hGetContents
|
||||
-- hGetContents h = hSetEncoding h utf8_bom
|
||||
-- >> hSetNewlineMode h universalNewlineMode
|
||||
-- >> IO.hGetContents h
|
||||
hGetContents :: Handle -> IO Text
|
||||
hGetContents = fmap toText . B.hGetContents
|
||||
|
||||
-- | Convert UTF8-encoded ByteString to Text, also
|
||||
-- removing '\r' characters.
|
||||
toText :: B.ByteString -> T.Text
|
||||
toText :: B.ByteString -> Text
|
||||
toText = T.decodeUtf8 . filterCRs . dropBOM
|
||||
where dropBOM bs =
|
||||
if "\xEF\xBB\xBF" `B.isPrefixOf` bs
|
||||
|
@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
|
|||
toStringLazy :: BL.ByteString -> String
|
||||
toStringLazy = TL.unpack . toTextLazy
|
||||
|
||||
fromText :: T.Text -> B.ByteString
|
||||
fromText :: Text -> B.ByteString
|
||||
fromText = T.encodeUtf8
|
||||
|
||||
fromTextLazy :: TL.Text -> BL.ByteString
|
||||
|
|
|
@ -130,7 +130,7 @@ runCommandTest testExePath fp num code =
|
|||
let cmdline = "% " <> cmd
|
||||
let x = cmdline <> "\n" <> input <> "^D\n" <> norm
|
||||
let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm
|
||||
let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw)
|
||||
let updated = T.replace (T.pack x) (T.pack y) raw
|
||||
UTF8.writeFile fp' updated
|
||||
|
||||
extractCommandTest :: FilePath -> FilePath -> TestTree
|
||||
|
|
|
@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup)
|
|||
import Test.Tasty.Golden.Advanced (goldenTest)
|
||||
import Tests.Helpers hiding (test)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import qualified Data.Text as T
|
||||
|
||||
tests :: FilePath -> [TestTree]
|
||||
tests pandocPath =
|
||||
|
@ -231,7 +232,7 @@ tests pandocPath =
|
|||
-- makes sure file is fully closed after reading
|
||||
readFile' :: FilePath -> IO String
|
||||
readFile' f = do s <- UTF8.readFile f
|
||||
return $! (length s `seq` s)
|
||||
return $! (T.length s `seq` T.unpack s)
|
||||
|
||||
lhsWriterTests :: FilePath -> String -> [TestTree]
|
||||
lhsWriterTests pandocPath format
|
||||
|
@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm =
|
|||
$ UTF8.toStringLazy out
|
||||
-- filter \r so the tests will work on Windows machines
|
||||
else fail $ "Pandoc failed with error code " ++ show ec
|
||||
updateGolden = UTF8.writeFile norm
|
||||
updateGolden = UTF8.writeFile norm . T.pack
|
||||
options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts
|
||||
|
||||
compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String)
|
||||
|
|
|
@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t
|
|||
tests :: [TestTree]
|
||||
tests = [ testGroup "tokenization"
|
||||
[ testCase "tokenizer round trip on test case" $ do
|
||||
orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex"
|
||||
orig <- UTF8.readFile "../test/latex-reader.latex"
|
||||
let new = untokenize $ tokenize "../test/latex-reader.latex"
|
||||
orig
|
||||
assertEqual "untokenize . tokenize is identity" orig new
|
||||
|
|
Loading…
Add table
Reference in a new issue