Text.Pandoc.UTF8: change IO functions to return Text, not String.

[API change] This affects `readFile`, `getContents`, `writeFileWith`, `writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`. `hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`. This avoids the need to uselessly create a linked list of characters when emiting output.
2021-02-22 11:30:07 -08:00 · 2021-02-22 11:30:07 -08:00 · 80fde18fb1
commit 80fde18fb1
parent 607c014e9d
11 changed files with 70 additions and 68 deletions
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@ -73,8 +73,9 @@ convertWithOpts opts = do
  let verbosity = optVerbosity opts

  when (optDumpArgs opts) $
-    do UTF8.hPutStrLn stdout outputFile
-       mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
+    do UTF8.hPutStrLn stdout (T.pack outputFile)
+       mapM_ (UTF8.hPutStrLn stdout . T.pack)
+             (fromMaybe ["-"] $ optInputFiles opts)
       exitSuccess

  let sources = case optInputFiles opts of
@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr
 writeFnBinary f   = liftIO . BL.writeFile (UTF8.encodePath f)

 writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
-- TODO this implementation isn't maximally efficient:
-writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
-writerFn eol f   = liftIO . UTF8.writeFileWith eol f . T.unpack
+writerFn eol "-" = liftIO . UTF8.putStrWith eol
+writerFn eol f   = liftIO . UTF8.writeFileWith eol f
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@ -812,10 +812,10 @@ options =
                           map (\c -> ['-',c]) shorts ++
                           map ("--" ++) longs
                     let allopts = unwords (concatMap optnames options)
-                     UTF8.hPutStrLn stdout $ printf tpl allopts
-                         (unwords readersNames)
-                         (unwords writersNames)
-                         (unwords $ map (T.unpack . fst) highlightingStyles)
+                     UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
+                         (T.unpack $ T.unwords readersNames)
+                         (T.unpack $ T.unwords writersNames)
+                         (T.unpack $ T.unwords $ map fst highlightingStyles)
                         (unwords datafiles)
                     exitSuccess ))
                 "" -- "Print bash completion script"
@ -854,7 +854,7 @@ options =
                               else if extensionEnabled x allExts
                                       then '-'
                                       else ' ') : drop 4 (show x)
-                     mapM_ (UTF8.hPutStrLn stdout . showExt)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
                       [ex | ex <- extList, extensionEnabled ex allExts]
                     exitSuccess )
                  "FORMAT")
@ -868,14 +868,14 @@ options =
                                 , sShortname s `notElem`
                                    [T.pack "Alert", T.pack "Alert_indent"]
                                 ]
-                     mapM_ (UTF8.hPutStrLn stdout) (sort langs)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
                     exitSuccess ))
                 ""

    , Option "" ["list-highlight-styles"]
                 (NoArg
                  (\_ -> do
-                     mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
+                     mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
                     exitSuccess ))
                 ""

@ -893,7 +893,7 @@ options =
                            | T.null t -> -- e.g. for docx, odt, json:
                                E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
                                  ("templates/default." ++ arg)
-                            | otherwise -> write . T.unpack $ t
+                            | otherwise -> write t
                          Left e  -> E.throwIO e
                     exitSuccess)
                  "FORMAT")
@ -940,11 +940,13 @@ options =
                  (\_ -> do
                     prg <- getProgName
                     defaultDatadirs <- defaultUserDataDirs
-                     UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
-                       compileInfo ++
-                       "\nUser data directory: " ++
-                       intercalate " or " defaultDatadirs ++
-                       ('\n':copyrightMessage))
+                     UTF8.hPutStrLn stdout
+                      $ T.pack
+                      $ prg ++ " " ++ T.unpack pandocVersion ++
+                        compileInfo ++
+                        "\nUser data directory: " ++
+                        intercalate " or " defaultDatadirs ++
+                        ('\n':copyrightMessage)
                     exitSuccess ))
                 "" -- "Print version"

@ -952,7 +954,7 @@ options =
                 (NoArg
                  (\_ -> do
                     prg <- getProgName
-                     UTF8.hPutStr stdout (usageMessage prg options)
+                     UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
                     exitSuccess ))
                 "" -- "Show help"
    ]
@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
 handleUnrecognizedOption x =
  (("Unknown option " ++ x ++ ".") :)

-readersNames :: [String]
-readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
+readersNames :: [Text]
+readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))

-writersNames :: [String]
+writersNames :: [Text]
 writersNames = sort
-  ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
+  ("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))

 splitField :: String -> (String, String)
 splitField = second (tailDef "true") . break (`elemText` ":=")
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@ -59,8 +59,8 @@ optToOutputSettings opts = do
  let outputFile = fromMaybe "-" (optOutputFile opts)

  when (optDumpArgs opts) . liftIO $ do
-    UTF8.hPutStrLn stdout outputFile
-    mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
+    UTF8.hPutStrLn stdout (T.pack outputFile)
+    mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
    exitSuccess

  epubMetadata <- traverse readUtf8File $ optEpubMetadata opts
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
 logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
 logOutput msg = liftIO $ do
  UTF8.hPutStr stderr $
-      "[" ++ show (messageVerbosity msg) ++ "] "
+      "[" <> T.pack (show (messageVerbosity msg)) <> "] "
  alertIndent $ T.lines $ showLogMessage msg

 -- | Prints the list of lines to @stderr@, indenting every but the first
@ -191,10 +191,10 @@ logOutput msg = liftIO $ do
 alertIndent :: [Text] -> IO ()
 alertIndent [] = return ()
 alertIndent (l:ls) = do
-  UTF8.hPutStrLn stderr $ unpack l
+  UTF8.hPutStrLn stderr l
  mapM_ go ls
  where go l' = do UTF8.hPutStr stderr "  "
-                   UTF8.hPutStrLn stderr $ unpack l'
+                   UTF8.hPutStrLn stderr l'

 -- | Extract media from the mediabag into a directory.
 extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@ -191,6 +191,6 @@ handleError (Left e) =

 err :: Int -> Text -> IO a
 err exitCode msg = do
-  UTF8.hPutStrLn stderr (T.unpack msg)
+  UTF8.hPutStrLn stderr msg
  exitWith $ ExitFailure exitCode
  return undefined
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do
        | isAscii c   = T.singleton c
        | otherwise   = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
  let addCodePoint = T.concatMap toCodePoint
-  let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
+  let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
                 | l <- ls
                 , isMissingCharacterWarning l
                 ]
@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do
    env <- liftIO getEnvironment
    when (verbosity >= INFO) $ liftIO $
      showVerboseInfo (Just tmpDir) program programArgs env
-         (utf8ToString sourceBL)
+         (utf8ToText sourceBL)
    (exit, out) <- liftIO $ E.catch
      (pipeProcess (Just env) program programArgs sourceBL)
      (handlePDFProgramNotFound program)
@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
            (pipeProcess (Just env'') program programArgs BL.empty)
            (handlePDFProgramNotFound program)
          when (verbosity >= INFO) $ liftIO $ do
-            UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
+            UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
            BL.hPutStr stderr out
            UTF8.hPutStr stderr "\n"
          if runNumber < numRuns
@ -405,7 +405,7 @@ generic2pdf :: Verbosity
 generic2pdf verbosity program args source = do
  env' <- getEnvironment
  when (verbosity >= INFO) $
-    showVerboseInfo Nothing program args env' (T.unpack source)
+    showVerboseInfo Nothing program args env' source
  (exit, out) <- E.catch
    (pipeProcess (Just env') program args
                     (BL.fromStrict $ UTF8.fromText source))
@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath
                -> String
                -> [String]
                -> [(String, String)]
-                -> String
+                -> Text
                -> IO ()
 showVerboseInfo mbTmpDir program programArgs env source = do
  case mbTmpDir of
    Just tmpDir -> do
      UTF8.hPutStrLn stderr "[makePDF] temp dir:"
-      UTF8.hPutStrLn stderr tmpDir
+      UTF8.hPutStrLn stderr (T.pack tmpDir)
    Nothing -> return ()
  UTF8.hPutStrLn stderr "[makePDF] Command line:"
-  UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
+  UTF8.hPutStrLn stderr $
+       T.pack program <> " " <> T.pack (unwords (map show programArgs))
  UTF8.hPutStr stderr "\n"
  UTF8.hPutStrLn stderr "[makePDF] Environment:"
-  mapM_ (UTF8.hPutStrLn stderr . show) env
+  mapM_ (UTF8.hPutStrLn stderr . tshow) env
  UTF8.hPutStr stderr "\n"
  UTF8.hPutStrLn stderr "[makePDF] Source:"
  UTF8.hPutStrLn stderr source
@ -517,8 +518,8 @@ handlePDFProgramNotFound program e
      E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
  | otherwise = E.throwIO e

-utf8ToString :: ByteString -> String
-utf8ToString lbs =
+utf8ToText :: ByteString -> Text
+utf8ToText lbs =
  case decodeUtf8' lbs of
-    Left _  -> BC.unpack lbs  -- if decoding fails, treat as latin1
-    Right t -> TL.unpack t
+    Left _  -> T.pack $ BC.unpack lbs  -- if decoding fails, treat as latin1
+    Right t -> TL.toStrict t
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@ -1148,7 +1148,7 @@ testStringWith :: Show a
               => ParserT Text ParserState Identity a
               -> Text
               -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
+testStringWith parser str = UTF8.putStrLn $ tshow $
                            readWith parser defaultParserState str

 -- | Parsing options.
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@ -39,67 +39,65 @@ where

 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy.Char8 as BL
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
 import qualified Data.Text.Encoding as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
 import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
 import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
                  putStrLn, readFile, writeFile)
-import qualified System.IO as IO

-readFile :: FilePath -> IO String
+readFile :: FilePath -> IO Text
 readFile f = do
  h <- openFile (encodePath f) ReadMode
  hGetContents h

-getContents :: IO String
+getContents :: IO Text
 getContents = hGetContents stdin

-writeFileWith :: Newline -> FilePath -> String -> IO ()
+writeFileWith :: Newline -> FilePath -> Text -> IO ()
 writeFileWith eol f s =
  withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s

-writeFile :: FilePath -> String -> IO ()
+writeFile :: FilePath -> Text -> IO ()
 writeFile = writeFileWith nativeNewline

-putStrWith :: Newline -> String -> IO ()
+putStrWith :: Newline -> Text -> IO ()
 putStrWith eol s = hPutStrWith eol stdout s

-putStr :: String -> IO ()
+putStr :: Text -> IO ()
 putStr = putStrWith nativeNewline

-putStrLnWith :: Newline -> String -> IO ()
+putStrLnWith :: Newline -> Text -> IO ()
 putStrLnWith eol s = hPutStrLnWith eol stdout s

-putStrLn :: String -> IO ()
+putStrLn :: Text -> IO ()
 putStrLn = putStrLnWith nativeNewline

-hPutStrWith :: Newline -> Handle -> String -> IO ()
+hPutStrWith :: Newline -> Handle -> Text -> IO ()
 hPutStrWith eol h s =
  hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStr h s
+  hSetEncoding h utf8 >> TIO.hPutStr h s

-hPutStr :: Handle -> String -> IO ()
+hPutStr :: Handle -> Text -> IO ()
 hPutStr = hPutStrWith nativeNewline

-hPutStrLnWith :: Newline -> Handle -> String -> IO ()
+hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
 hPutStrLnWith eol h s =
  hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStrLn h s
+  hSetEncoding h utf8 >> TIO.hPutStrLn h s

-hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn :: Handle -> Text -> IO ()
 hPutStrLn = hPutStrLnWith nativeNewline

-hGetContents :: Handle -> IO String
-hGetContents = fmap toString . B.hGetContents
-- hGetContents h = hSetEncoding h utf8_bom
--                   >> hSetNewlineMode h universalNewlineMode
--                   >> IO.hGetContents h
+hGetContents :: Handle -> IO Text
+hGetContents = fmap toText . B.hGetContents

 -- | Convert UTF8-encoded ByteString to Text, also
 -- removing '\r' characters.
-toText :: B.ByteString -> T.Text
+toText :: B.ByteString -> Text
 toText = T.decodeUtf8 . filterCRs . dropBOM
  where dropBOM bs =
         if "\xEF\xBB\xBF" `B.isPrefixOf` bs
@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
 toStringLazy :: BL.ByteString -> String
 toStringLazy = TL.unpack . toTextLazy

-fromText :: T.Text -> B.ByteString
+fromText :: Text -> B.ByteString
 fromText = T.encodeUtf8

 fromTextLazy :: TL.Text -> BL.ByteString
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@ -130,7 +130,7 @@ runCommandTest testExePath fp num code =
    let cmdline = "% " <> cmd
    let x = cmdline <> "\n" <> input <> "^D\n" <> norm
    let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm
-    let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw)
+    let updated = T.replace (T.pack x) (T.pack y) raw
    UTF8.writeFile fp' updated

 extractCommandTest :: FilePath -> FilePath -> TestTree
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup)
 import Test.Tasty.Golden.Advanced (goldenTest)
 import Tests.Helpers hiding (test)
 import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Text as T

 tests :: FilePath -> [TestTree]
 tests pandocPath =
@ -231,7 +232,7 @@ tests pandocPath =
 -- makes sure file is fully closed after reading
 readFile' :: FilePath -> IO String
 readFile' f = do s <- UTF8.readFile f
-                 return $! (length s `seq` s)
+                 return $! (T.length s `seq` T.unpack s)

 lhsWriterTests :: FilePath -> String -> [TestTree]
 lhsWriterTests pandocPath format
@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm =
                             $ UTF8.toStringLazy out
                   -- filter \r so the tests will work on Windows machines
                 else fail $ "Pandoc failed with error code " ++ show ec
-        updateGolden = UTF8.writeFile norm
+        updateGolden = UTF8.writeFile norm . T.pack
        options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts

 compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String)
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t
 tests :: [TestTree]
 tests = [ testGroup "tokenization"
          [ testCase "tokenizer round trip on test case" $ do
-                 orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex"
+                 orig <- UTF8.readFile "../test/latex-reader.latex"
                 let new = untokenize $ tokenize "../test/latex-reader.latex"
                             orig
                 assertEqual "untokenize . tokenize is identity" orig new