Text.Pandoc.UTF8: change IO functions to return Text, not String.

[API change] This affects `readFile`, `getContents`, `writeFileWith`,
`writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`.
`hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`.

This avoids the need to uselessly create a linked list of characters
when emiting output.
This commit is contained in:
John MacFarlane 2021-02-22 11:30:07 -08:00
parent 607c014e9d
commit 80fde18fb1
11 changed files with 70 additions and 68 deletions

View file

@ -73,8 +73,9 @@ convertWithOpts opts = do
let verbosity = optVerbosity opts
when (optDumpArgs opts) $
do UTF8.hPutStrLn stdout outputFile
mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
do UTF8.hPutStrLn stdout (T.pack outputFile)
mapM_ (UTF8.hPutStrLn stdout . T.pack)
(fromMaybe ["-"] $ optInputFiles opts)
exitSuccess
let sources = case optInputFiles opts of
@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr
writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
-- TODO this implementation isn't maximally efficient:
writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack
writerFn eol "-" = liftIO . UTF8.putStrWith eol
writerFn eol f = liftIO . UTF8.writeFileWith eol f

View file

@ -812,10 +812,10 @@ options =
map (\c -> ['-',c]) shorts ++
map ("--" ++) longs
let allopts = unwords (concatMap optnames options)
UTF8.hPutStrLn stdout $ printf tpl allopts
(unwords readersNames)
(unwords writersNames)
(unwords $ map (T.unpack . fst) highlightingStyles)
UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
(T.unpack $ T.unwords readersNames)
(T.unpack $ T.unwords writersNames)
(T.unpack $ T.unwords $ map fst highlightingStyles)
(unwords datafiles)
exitSuccess ))
"" -- "Print bash completion script"
@ -854,7 +854,7 @@ options =
else if extensionEnabled x allExts
then '-'
else ' ') : drop 4 (show x)
mapM_ (UTF8.hPutStrLn stdout . showExt)
mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
[ex | ex <- extList, extensionEnabled ex allExts]
exitSuccess )
"FORMAT")
@ -868,14 +868,14 @@ options =
, sShortname s `notElem`
[T.pack "Alert", T.pack "Alert_indent"]
]
mapM_ (UTF8.hPutStrLn stdout) (sort langs)
mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
exitSuccess ))
""
, Option "" ["list-highlight-styles"]
(NoArg
(\_ -> do
mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
exitSuccess ))
""
@ -893,7 +893,7 @@ options =
| T.null t -> -- e.g. for docx, odt, json:
E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
("templates/default." ++ arg)
| otherwise -> write . T.unpack $ t
| otherwise -> write t
Left e -> E.throwIO e
exitSuccess)
"FORMAT")
@ -940,11 +940,13 @@ options =
(\_ -> do
prg <- getProgName
defaultDatadirs <- defaultUserDataDirs
UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
compileInfo ++
"\nUser data directory: " ++
intercalate " or " defaultDatadirs ++
('\n':copyrightMessage))
UTF8.hPutStrLn stdout
$ T.pack
$ prg ++ " " ++ T.unpack pandocVersion ++
compileInfo ++
"\nUser data directory: " ++
intercalate " or " defaultDatadirs ++
('\n':copyrightMessage)
exitSuccess ))
"" -- "Print version"
@ -952,7 +954,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
UTF8.hPutStr stdout (usageMessage prg options)
UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
exitSuccess ))
"" -- "Show help"
]
@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
handleUnrecognizedOption x =
(("Unknown option " ++ x ++ ".") :)
readersNames :: [String]
readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
readersNames :: [Text]
readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))
writersNames :: [String]
writersNames :: [Text]
writersNames = sort
("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))
splitField :: String -> (String, String)
splitField = second (tailDef "true") . break (`elemText` ":=")

View file

@ -59,8 +59,8 @@ optToOutputSettings opts = do
let outputFile = fromMaybe "-" (optOutputFile opts)
when (optDumpArgs opts) . liftIO $ do
UTF8.hPutStrLn stdout outputFile
mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
UTF8.hPutStrLn stdout (T.pack outputFile)
mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
exitSuccess
epubMetadata <- traverse readUtf8File $ optEpubMetadata opts

View file

@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
logOutput msg = liftIO $ do
UTF8.hPutStr stderr $
"[" ++ show (messageVerbosity msg) ++ "] "
"[" <> T.pack (show (messageVerbosity msg)) <> "] "
alertIndent $ T.lines $ showLogMessage msg
-- | Prints the list of lines to @stderr@, indenting every but the first
@ -191,10 +191,10 @@ logOutput msg = liftIO $ do
alertIndent :: [Text] -> IO ()
alertIndent [] = return ()
alertIndent (l:ls) = do
UTF8.hPutStrLn stderr $ unpack l
UTF8.hPutStrLn stderr l
mapM_ go ls
where go l' = do UTF8.hPutStr stderr " "
UTF8.hPutStrLn stderr $ unpack l'
UTF8.hPutStrLn stderr l'
-- | Extract media from the mediabag into a directory.
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc

View file

@ -191,6 +191,6 @@ handleError (Left e) =
err :: Int -> Text -> IO a
err exitCode msg = do
UTF8.hPutStrLn stderr (T.unpack msg)
UTF8.hPutStrLn stderr msg
exitWith $ ExitFailure exitCode
return undefined

View file

@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do
| isAscii c = T.singleton c
| otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
let addCodePoint = T.concatMap toCodePoint
let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
| l <- ls
, isMissingCharacterWarning l
]
@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do
env <- liftIO getEnvironment
when (verbosity >= INFO) $ liftIO $
showVerboseInfo (Just tmpDir) program programArgs env
(utf8ToString sourceBL)
(utf8ToText sourceBL)
(exit, out) <- liftIO $ E.catch
(pipeProcess (Just env) program programArgs sourceBL)
(handlePDFProgramNotFound program)
@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
(pipeProcess (Just env'') program programArgs BL.empty)
(handlePDFProgramNotFound program)
when (verbosity >= INFO) $ liftIO $ do
UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
BL.hPutStr stderr out
UTF8.hPutStr stderr "\n"
if runNumber < numRuns
@ -405,7 +405,7 @@ generic2pdf :: Verbosity
generic2pdf verbosity program args source = do
env' <- getEnvironment
when (verbosity >= INFO) $
showVerboseInfo Nothing program args env' (T.unpack source)
showVerboseInfo Nothing program args env' source
(exit, out) <- E.catch
(pipeProcess (Just env') program args
(BL.fromStrict $ UTF8.fromText source))
@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath
-> String
-> [String]
-> [(String, String)]
-> String
-> Text
-> IO ()
showVerboseInfo mbTmpDir program programArgs env source = do
case mbTmpDir of
Just tmpDir -> do
UTF8.hPutStrLn stderr "[makePDF] temp dir:"
UTF8.hPutStrLn stderr tmpDir
UTF8.hPutStrLn stderr (T.pack tmpDir)
Nothing -> return ()
UTF8.hPutStrLn stderr "[makePDF] Command line:"
UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
UTF8.hPutStrLn stderr $
T.pack program <> " " <> T.pack (unwords (map show programArgs))
UTF8.hPutStr stderr "\n"
UTF8.hPutStrLn stderr "[makePDF] Environment:"
mapM_ (UTF8.hPutStrLn stderr . show) env
mapM_ (UTF8.hPutStrLn stderr . tshow) env
UTF8.hPutStr stderr "\n"
UTF8.hPutStrLn stderr "[makePDF] Source:"
UTF8.hPutStrLn stderr source
@ -517,8 +518,8 @@ handlePDFProgramNotFound program e
E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
| otherwise = E.throwIO e
utf8ToString :: ByteString -> String
utf8ToString lbs =
utf8ToText :: ByteString -> Text
utf8ToText lbs =
case decodeUtf8' lbs of
Left _ -> BC.unpack lbs -- if decoding fails, treat as latin1
Right t -> TL.unpack t
Left _ -> T.pack $ BC.unpack lbs -- if decoding fails, treat as latin1
Right t -> TL.toStrict t

View file

@ -1148,7 +1148,7 @@ testStringWith :: Show a
=> ParserT Text ParserState Identity a
-> Text
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
testStringWith parser str = UTF8.putStrLn $ tshow $
readWith parser defaultParserState str
-- | Parsing options.

View file

@ -39,67 +39,65 @@ where
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
putStrLn, readFile, writeFile)
import qualified System.IO as IO
readFile :: FilePath -> IO String
readFile :: FilePath -> IO Text
readFile f = do
h <- openFile (encodePath f) ReadMode
hGetContents h
getContents :: IO String
getContents :: IO Text
getContents = hGetContents stdin
writeFileWith :: Newline -> FilePath -> String -> IO ()
writeFileWith :: Newline -> FilePath -> Text -> IO ()
writeFileWith eol f s =
withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
writeFile :: FilePath -> String -> IO ()
writeFile :: FilePath -> Text -> IO ()
writeFile = writeFileWith nativeNewline
putStrWith :: Newline -> String -> IO ()
putStrWith :: Newline -> Text -> IO ()
putStrWith eol s = hPutStrWith eol stdout s
putStr :: String -> IO ()
putStr :: Text -> IO ()
putStr = putStrWith nativeNewline
putStrLnWith :: Newline -> String -> IO ()
putStrLnWith :: Newline -> Text -> IO ()
putStrLnWith eol s = hPutStrLnWith eol stdout s
putStrLn :: String -> IO ()
putStrLn :: Text -> IO ()
putStrLn = putStrLnWith nativeNewline
hPutStrWith :: Newline -> Handle -> String -> IO ()
hPutStrWith :: Newline -> Handle -> Text -> IO ()
hPutStrWith eol h s =
hSetNewlineMode h (NewlineMode eol eol) >>
hSetEncoding h utf8 >> IO.hPutStr h s
hSetEncoding h utf8 >> TIO.hPutStr h s
hPutStr :: Handle -> String -> IO ()
hPutStr :: Handle -> Text -> IO ()
hPutStr = hPutStrWith nativeNewline
hPutStrLnWith :: Newline -> Handle -> String -> IO ()
hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
hPutStrLnWith eol h s =
hSetNewlineMode h (NewlineMode eol eol) >>
hSetEncoding h utf8 >> IO.hPutStrLn h s
hSetEncoding h utf8 >> TIO.hPutStrLn h s
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn = hPutStrLnWith nativeNewline
hGetContents :: Handle -> IO String
hGetContents = fmap toString . B.hGetContents
-- hGetContents h = hSetEncoding h utf8_bom
-- >> hSetNewlineMode h universalNewlineMode
-- >> IO.hGetContents h
hGetContents :: Handle -> IO Text
hGetContents = fmap toText . B.hGetContents
-- | Convert UTF8-encoded ByteString to Text, also
-- removing '\r' characters.
toText :: B.ByteString -> T.Text
toText :: B.ByteString -> Text
toText = T.decodeUtf8 . filterCRs . dropBOM
where dropBOM bs =
if "\xEF\xBB\xBF" `B.isPrefixOf` bs
@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
toStringLazy :: BL.ByteString -> String
toStringLazy = TL.unpack . toTextLazy
fromText :: T.Text -> B.ByteString
fromText :: Text -> B.ByteString
fromText = T.encodeUtf8
fromTextLazy :: TL.Text -> BL.ByteString

View file

@ -130,7 +130,7 @@ runCommandTest testExePath fp num code =
let cmdline = "% " <> cmd
let x = cmdline <> "\n" <> input <> "^D\n" <> norm
let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm
let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw)
let updated = T.replace (T.pack x) (T.pack y) raw
UTF8.writeFile fp' updated
extractCommandTest :: FilePath -> FilePath -> TestTree

View file

@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup)
import Test.Tasty.Golden.Advanced (goldenTest)
import Tests.Helpers hiding (test)
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
tests :: FilePath -> [TestTree]
tests pandocPath =
@ -231,7 +232,7 @@ tests pandocPath =
-- makes sure file is fully closed after reading
readFile' :: FilePath -> IO String
readFile' f = do s <- UTF8.readFile f
return $! (length s `seq` s)
return $! (T.length s `seq` T.unpack s)
lhsWriterTests :: FilePath -> String -> [TestTree]
lhsWriterTests pandocPath format
@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm =
$ UTF8.toStringLazy out
-- filter \r so the tests will work on Windows machines
else fail $ "Pandoc failed with error code " ++ show ec
updateGolden = UTF8.writeFile norm
updateGolden = UTF8.writeFile norm . T.pack
options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts
compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String)

View file

@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t
tests :: [TestTree]
tests = [ testGroup "tokenization"
[ testCase "tokenizer round trip on test case" $ do
orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex"
orig <- UTF8.readFile "../test/latex-reader.latex"
let new = untokenize $ tokenize "../test/latex-reader.latex"
orig
assertEqual "untokenize . tokenize is identity" orig new