From 6ad7ac1239cb384bf830b1692991030735f43e89 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 25 Sep 2012 19:54:21 -0700 Subject: [PATCH] Removed need for utf8-string package. * Depend on text. * Expose Text.Pandoc.UTF8. * Text.Pandoc.UTF8 now exports toString, fromString, toStringLazy, fromStringLazy. * These are used instead of the old utf8-string functions. --- pandoc.cabal | 8 ++++---- src/Text/Pandoc/SelfContained.hs | 2 +- src/Text/Pandoc/Templates.hs | 5 +++-- src/Text/Pandoc/UTF8.hs | 30 +++++++++++++++++++++++++++--- src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++++----------- src/Text/Pandoc/Writers/EPUB.hs | 20 ++++++++++---------- src/Text/Pandoc/Writers/ODT.hs | 8 ++++---- src/pandoc.hs | 5 ++--- tests/Tests/Old.hs | 4 ++-- 9 files changed, 63 insertions(+), 40 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 038c9fc06..075b33a6b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -209,8 +209,8 @@ Library process >= 1 && < 1.2, directory >= 1 && < 1.3, bytestring >= 0.9 && < 1.0, + text >= 0.11 && < 0.12, zip-archive >= 0.1.1.7 && < 0.2, - utf8-string >= 0.3 && < 0.4, old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, @@ -289,11 +289,11 @@ Library Text.Pandoc.Writers.EPUB, Text.Pandoc.Writers.FB2, Text.Pandoc.PDF, + Text.Pandoc.UTF8, Text.Pandoc.Templates, Text.Pandoc.Biblio, Text.Pandoc.SelfContained Other-Modules: Text.Pandoc.XML, - Text.Pandoc.UTF8, Text.Pandoc.MIME, Text.Pandoc.UUID, Text.Pandoc.ImageSize, @@ -319,8 +319,8 @@ Executable pandoc process >= 1 && < 1.2, directory >= 1 && < 1.3, bytestring >= 0.9 && < 1.0, + text >= 0.11 && < 0.12, zip-archive >= 0.1.1.7 && < 0.2, - utf8-string >= 0.3 && < 0.4, old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, @@ -378,8 +378,8 @@ Test-Suite test-pandoc syb >= 0.1 && < 0.4, pandoc, pandoc-types >= 1.10 && < 1.11, - utf8-string >= 0.3 && < 0.4, bytestring >= 0.9 && < 1.0, + text >= 0.11 && < 0.12, directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.2, diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 7a21f6f3a..e468d504d 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -37,7 +37,6 @@ import Network.HTTP import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import Data.ByteString (ByteString) -import Data.ByteString.UTF8 (toString, fromString) import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip @@ -45,6 +44,7 @@ import qualified Data.ByteString.Lazy as L import Text.Pandoc.Shared (findDataFile, renderTags') import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) +import Text.Pandoc.UTF8 (toString, fromString) getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 4f5ad54bd..e81fd9d14 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -78,7 +78,8 @@ import Text.Blaze.Internal (preEscapedString) #else import Text.Blaze (preEscapedString, Html) #endif -import Data.ByteString.Lazy.UTF8 (ByteString, fromString) +import Text.Pandoc.UTF8 (fromStringLazy) +import Data.ByteString.Lazy (ByteString) import Text.Pandoc.Shared (readDataFile) import qualified Control.Exception.Extensible as E (try, IOException) @@ -118,7 +119,7 @@ instance TemplateTarget String where toTarget = id instance TemplateTarget ByteString where - toTarget = fromString + toTarget = fromStringLazy instance TemplateTarget Html where toTarget = preEscapedString diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 5be7d3c8e..1f2b4b695 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -35,6 +35,10 @@ module Text.Pandoc.UTF8 ( readFile , hPutStr , hPutStrLn , hGetContents + , toString + , fromString + , toStringLazy + , fromStringLazy , encodePath , decodeArg ) @@ -50,6 +54,13 @@ import System.IO hiding (readFile, writeFile, getContents, putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn ) import qualified System.IO as IO +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text.Encoding as T +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TL +import Data.Text.Encoding.Error readFile :: FilePath -> IO String readFile f = do @@ -75,15 +86,28 @@ hPutStrLn :: Handle -> String -> IO () hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s hGetContents :: Handle -> IO String -hGetContents h = hSetEncoding h utf8_bom >> hSetNewlineMode h universalNewlineMode +hGetContents h = hSetEncoding h utf8_bom + >> hSetNewlineMode h universalNewlineMode >> IO.hGetContents h +toString :: B.ByteString -> String +toString = T.unpack . T.decodeUtf8With lenientDecode + +fromString :: String -> B.ByteString +fromString = T.encodeUtf8 . T.pack + +toStringLazy :: BL.ByteString -> String +toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode + +fromStringLazy :: String -> BL.ByteString +fromStringLazy = TL.encodeUtf8 . TL.pack + encodePath :: FilePath -> FilePath decodeArg :: String -> String #if MIN_VERSION_base(4,4,0) encodePath = id decodeArg = id #else -encodePath = encodeString -decodeArg = decodeString +encodePath = B.unpack . fromString +decodeArg = toString . B.pack #endif diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 84bf95dfb..f8e3370e4 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -32,8 +32,7 @@ import Data.List ( intercalate ) import System.FilePath ( () ) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M -import Data.ByteString.Lazy.UTF8 ( fromString, toString ) -import Text.Pandoc.UTF8 as UTF8 +import qualified Text.Pandoc.UTF8 as UTF8 import System.IO ( stderr ) import Codec.Archive.Zip import Data.Time.Clock.POSIX @@ -126,7 +125,7 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do let newrels = map toImgRel imgs let relpath = "word/_rels/document.xml.rels" let reldoc = case findEntryByPath relpath refArchive >>= - parseXMLDoc . toString . fromEntry of + parseXMLDoc . UTF8.toStringLazy . fromEntry of Just d -> d Nothing -> error $ relpath ++ "missing in reference docx" let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels } @@ -138,21 +137,21 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let newrels' = map toLinkRel $ M.toList $ stExternalLinks st let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' } - let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc'' - let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents + let relEntry = toEntry relpath epochtime $ UTF8.fromStringLazy $ showTopElement' reldoc'' + let contentEntry = toEntry "word/document.xml" epochtime $ UTF8.fromStringLazy $ showTopElement' newContents -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" let styledoc = case findEntryByPath stylepath refArchive >>= - parseXMLDoc . toString . fromEntry of + parseXMLDoc . UTF8.toStringLazy . fromEntry of Just d -> d Nothing -> error $ "Unable to parse " ++ stylepath ++ " from reference.docx" let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } - let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc' + let styleEntry = toEntry stylepath epochtime $ UTF8.fromStringLazy $ showTopElement' styledoc' -- construct word/numbering.xml let numpath = "word/numbering.xml" - let numEntry = toEntry numpath epochtime $ fromString $ showTopElement' + let numEntry = toEntry numpath epochtime $ UTF8.fromStringLazy $ showTopElement' $ mkNumbering (stNumStyles st) (stLists st) let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" @@ -166,16 +165,16 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do (maybe "" id $ normalizeDate $ stringify date) : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here : map (mknode "dc:creator" [] . stringify) auths - let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps + let docPropsEntry = toEntry docPropsPath epochtime $ UTF8.fromStringLazy $ showTopElement' docProps let relsPath = "_rels/.rels" rels <- case findEntryByPath relsPath refArchive of - Just e -> return $ toString $ fromEntry e + Just e -> return $ UTF8.toStringLazy $ fromEntry e Nothing -> err 57 "could not find .rels/_rels in reference docx" -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties" rels - let relsEntry = toEntry relsPath epochtime $ fromString rels' + let relsEntry = toEntry relsPath epochtime $ UTF8.fromStringLazy rels' let archive = foldr addEntryToArchive refArchive $ relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries return $ fromArchive archive diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 6f8931caa..3fac93c05 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -35,7 +35,7 @@ import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (), (<.>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 ( fromString ) +import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Shared hiding ( Element ) @@ -82,7 +82,7 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img - let cpContent = fromString $ writeHtmlString + let cpContent = fromStringLazy $ writeHtmlString opts'{writerTemplate = coverImageTemplate, writerVariables = ("coverimage",coverImage):vars} (Pandoc meta []) @@ -91,7 +91,7 @@ writeEPUB opts doc@(Pandoc meta _) = do , [mkEntry coverImage imgContent] ) -- title page - let tpContent = fromString $ writeHtmlString + let tpContent = fromStringLazy $ writeHtmlString opts'{writerTemplate = titlePageTemplate} (Pandoc meta []) let tpEntry = mkEntry "title_page.xhtml" tpContent @@ -125,7 +125,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let chapterToEntry :: Int -> Pandoc -> Entry chapterToEntry num chap = mkEntry (showChapter num) $ - fromString $ chapToHtml chap + fromStringLazy $ chapToHtml chap let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf @@ -157,7 +157,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let plainTitle = plainify $ docTitle meta let plainAuthors = map plainify $ docAuthors meta let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta - let contentsData = fromString $ ppTopElement $ + let contentsData = fromStringLazy $ ppTopElement $ unode "package" ! [("version","2.0") ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ @@ -189,7 +189,7 @@ writeEPUB opts doc@(Pandoc meta _) = do , unode "content" ! [("src", eRelativePath ent)] $ () ] - let tocData = fromString $ ppTopElement $ + let tocData = fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -214,10 +214,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let tocEntry = mkEntry "toc.ncx" tocData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip" + let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip" -- container.xml - let containerData = fromString $ ppTopElement $ + let containerData = fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ @@ -226,7 +226,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let containerEntry = mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = fromString $ ppTopElement $ + let apple = fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" @@ -236,7 +236,7 @@ writeEPUB opts doc@(Pandoc meta _) = do stylesheet <- case writerEpubStylesheet opts of Just s -> return s Nothing -> readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet + let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index f43d0a087..cbff88be5 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -32,7 +32,7 @@ import Data.IORef import Data.List ( isPrefixOf ) import System.FilePath ( (), takeExtension ) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 ( fromString ) +import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Paths_pandoc ( getDataFileName ) @@ -74,7 +74,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime - let contentEntry = toEntry "content.xml" epochtime $ fromString newContents + let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents picEntries <- readIORef picEntriesRef let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive @@ -86,7 +86,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do ] let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime - $ fromString $ show + $ fromStringLazy $ show $ text "" $$ ( inTags True "manifest:manifest" @@ -100,7 +100,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do ) let archive' = addEntryToArchive manifestEntry archive let metaEntry = toEntry "meta.xml" epochtime - $ fromString $ show + $ fromStringLazy $ show $ text "" $$ ( inTags True "office:document-meta" diff --git a/src/pandoc.hs b/src/pandoc.hs index 305557f6a..484f7ca44 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -57,7 +57,6 @@ import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString) import Text.CSL.Reference (Reference(..)) copyrightMessage :: String @@ -988,7 +987,7 @@ main = do readURI u _ -> UTF8.readFile src readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>= - return . toString -- treat all as UTF8 + return . UTF8.toStringLazy -- treat all as UTF8 let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) @@ -1038,7 +1037,7 @@ main = do res <- tex2pdf latexEngine $ f writerOptions doc2 case res of Right pdf -> writeBinary pdf - Left err' -> err 43 $ toString err' + Left err' -> err 43 $ UTF8.toStringLazy err' | otherwise -> selfcontain (f writerOptions doc2 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 15b6b354e..484d61561 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -16,11 +16,11 @@ import Text.Pandoc.Writers.Native ( writeNative ) import Text.Pandoc.Readers.Native ( readNative ) import Prelude hiding ( readFile ) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString) +import Text.Pandoc.UTF8 (toStringLazy) import Text.Printf readFileUTF8 :: FilePath -> IO String -readFileUTF8 f = B.readFile f >>= return . toString +readFileUTF8 f = B.readFile f >>= return . toStringLazy pandocPath :: FilePath pandocPath = ".." "dist" "build" "pandoc" "pandoc"