Removed need for utf8-string package.

* Depend on text.
* Expose Text.Pandoc.UTF8.
* Text.Pandoc.UTF8 now exports toString, fromString,
  toStringLazy, fromStringLazy.
* These are used instead of the old utf8-string functions.
This commit is contained in:
John MacFarlane 2012-09-25 19:54:21 -07:00
parent 833977416f
commit 6ad7ac1239
9 changed files with 63 additions and 40 deletions

View file

@ -209,8 +209,8 @@ Library
process >= 1 && < 1.2,
directory >= 1 && < 1.3,
bytestring >= 0.9 && < 1.0,
text >= 0.11 && < 0.12,
zip-archive >= 0.1.1.7 && < 0.2,
utf8-string >= 0.3 && < 0.4,
old-locale >= 1 && < 1.1,
time >= 1.2 && < 1.5,
HTTP >= 4000.0.5 && < 4000.3,
@ -289,11 +289,11 @@ Library
Text.Pandoc.Writers.EPUB,
Text.Pandoc.Writers.FB2,
Text.Pandoc.PDF,
Text.Pandoc.UTF8,
Text.Pandoc.Templates,
Text.Pandoc.Biblio,
Text.Pandoc.SelfContained
Other-Modules: Text.Pandoc.XML,
Text.Pandoc.UTF8,
Text.Pandoc.MIME,
Text.Pandoc.UUID,
Text.Pandoc.ImageSize,
@ -319,8 +319,8 @@ Executable pandoc
process >= 1 && < 1.2,
directory >= 1 && < 1.3,
bytestring >= 0.9 && < 1.0,
text >= 0.11 && < 0.12,
zip-archive >= 0.1.1.7 && < 0.2,
utf8-string >= 0.3 && < 0.4,
old-locale >= 1 && < 1.1,
time >= 1.2 && < 1.5,
HTTP >= 4000.0.5 && < 4000.3,
@ -378,8 +378,8 @@ Test-Suite test-pandoc
syb >= 0.1 && < 0.4,
pandoc,
pandoc-types >= 1.10 && < 1.11,
utf8-string >= 0.3 && < 0.4,
bytestring >= 0.9 && < 1.0,
text >= 0.11 && < 0.12,
directory >= 1 && < 1.3,
filepath >= 1.1 && < 1.4,
process >= 1 && < 1.2,

View file

@ -37,7 +37,6 @@ import Network.HTTP
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
import Data.ByteString (ByteString)
import Data.ByteString.UTF8 (toString, fromString)
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
@ -45,6 +44,7 @@ import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (findDataFile, renderTags')
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
import Text.Pandoc.UTF8 (toString, fromString)
getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
getItem userdata f =

View file

@ -78,7 +78,8 @@ import Text.Blaze.Internal (preEscapedString)
#else
import Text.Blaze (preEscapedString, Html)
#endif
import Data.ByteString.Lazy.UTF8 (ByteString, fromString)
import Text.Pandoc.UTF8 (fromStringLazy)
import Data.ByteString.Lazy (ByteString)
import Text.Pandoc.Shared (readDataFile)
import qualified Control.Exception.Extensible as E (try, IOException)
@ -118,7 +119,7 @@ instance TemplateTarget String where
toTarget = id
instance TemplateTarget ByteString where
toTarget = fromString
toTarget = fromStringLazy
instance TemplateTarget Html where
toTarget = preEscapedString

View file

@ -35,6 +35,10 @@ module Text.Pandoc.UTF8 ( readFile
, hPutStr
, hPutStrLn
, hGetContents
, toString
, fromString
, toStringLazy
, fromStringLazy
, encodePath
, decodeArg
)
@ -50,6 +54,13 @@ import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
import qualified System.IO as IO
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Data.Text.Encoding.Error
readFile :: FilePath -> IO String
readFile f = do
@ -75,15 +86,28 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
hGetContents h = hSetEncoding h utf8_bom >> hSetNewlineMode h universalNewlineMode
hGetContents h = hSetEncoding h utf8_bom
>> hSetNewlineMode h universalNewlineMode
>> IO.hGetContents h
toString :: B.ByteString -> String
toString = T.unpack . T.decodeUtf8With lenientDecode
fromString :: String -> B.ByteString
fromString = T.encodeUtf8 . T.pack
toStringLazy :: BL.ByteString -> String
toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode
fromStringLazy :: String -> BL.ByteString
fromStringLazy = TL.encodeUtf8 . TL.pack
encodePath :: FilePath -> FilePath
decodeArg :: String -> String
#if MIN_VERSION_base(4,4,0)
encodePath = id
decodeArg = id
#else
encodePath = encodeString
decodeArg = decodeString
encodePath = B.unpack . fromString
decodeArg = toString . B.pack
#endif

View file

@ -32,8 +32,7 @@ import Data.List ( intercalate )
import System.FilePath ( (</>) )
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Data.ByteString.Lazy.UTF8 ( fromString, toString )
import Text.Pandoc.UTF8 as UTF8
import qualified Text.Pandoc.UTF8 as UTF8
import System.IO ( stderr )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
@ -126,7 +125,7 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
let newrels = map toImgRel imgs
let relpath = "word/_rels/document.xml.rels"
let reldoc = case findEntryByPath relpath refArchive >>=
parseXMLDoc . toString . fromEntry of
parseXMLDoc . UTF8.toStringLazy . fromEntry of
Just d -> d
Nothing -> error $ relpath ++ "missing in reference docx"
let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
@ -138,21 +137,21 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
let newrels' = map toLinkRel $ M.toList $ stExternalLinks st
let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' }
let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc''
let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents
let relEntry = toEntry relpath epochtime $ UTF8.fromStringLazy $ showTopElement' reldoc''
let contentEntry = toEntry "word/document.xml" epochtime $ UTF8.fromStringLazy $ showTopElement' newContents
-- styles
let newstyles = styleToOpenXml $ writerHighlightStyle opts
let stylepath = "word/styles.xml"
let styledoc = case findEntryByPath stylepath refArchive >>=
parseXMLDoc . toString . fromEntry of
parseXMLDoc . UTF8.toStringLazy . fromEntry of
Just d -> d
Nothing -> error $ "Unable to parse " ++ stylepath ++
" from reference.docx"
let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc'
let styleEntry = toEntry stylepath epochtime $ UTF8.fromStringLazy $ showTopElement' styledoc'
-- construct word/numbering.xml
let numpath = "word/numbering.xml"
let numEntry = toEntry numpath epochtime $ fromString $ showTopElement'
let numEntry = toEntry numpath epochtime $ UTF8.fromStringLazy $ showTopElement'
$ mkNumbering (stNumStyles st) (stLists st)
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
@ -166,16 +165,16 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
(maybe "" id $ normalizeDate $ stringify date)
: mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
: map (mknode "dc:creator" [] . stringify) auths
let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps
let docPropsEntry = toEntry docPropsPath epochtime $ UTF8.fromStringLazy $ showTopElement' docProps
let relsPath = "_rels/.rels"
rels <- case findEntryByPath relsPath refArchive of
Just e -> return $ toString $ fromEntry e
Just e -> return $ UTF8.toStringLazy $ fromEntry e
Nothing -> err 57 "could not find .rels/_rels in reference docx"
-- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word
let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
"http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"
rels
let relsEntry = toEntry relsPath epochtime $ fromString rels'
let relsEntry = toEntry relsPath epochtime $ UTF8.fromStringLazy rels'
let archive = foldr addEntryToArchive refArchive $
relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries
return $ fromArchive archive

View file

@ -35,7 +35,7 @@ import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Text.Pandoc.Shared hiding ( Element )
@ -82,7 +82,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
let cpContent = fromString $ writeHtmlString
let cpContent = fromStringLazy $ writeHtmlString
opts'{writerTemplate = coverImageTemplate,
writerVariables = ("coverimage",coverImage):vars}
(Pandoc meta [])
@ -91,7 +91,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
, [mkEntry coverImage imgContent] )
-- title page
let tpContent = fromString $ writeHtmlString
let tpContent = fromStringLazy $ writeHtmlString
opts'{writerTemplate = titlePageTemplate}
(Pandoc meta [])
let tpEntry = mkEntry "title_page.xhtml" tpContent
@ -125,7 +125,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let chapterToEntry :: Int -> Pandoc -> Entry
chapterToEntry num chap = mkEntry
(showChapter num) $
fromString $ chapToHtml chap
fromStringLazy $ chapToHtml chap
let chapterEntries = zipWith chapterToEntry [1..] chapters
-- contents.opf
@ -157,7 +157,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let plainTitle = plainify $ docTitle meta
let plainAuthors = map plainify $ docAuthors meta
let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
let contentsData = fromString $ ppTopElement $
let contentsData = fromStringLazy $ ppTopElement $
unode "package" ! [("version","2.0")
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
@ -189,7 +189,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
, unode "content" ! [("src",
eRelativePath ent)] $ ()
]
let tocData = fromString $ ppTopElement $
let tocData = fromStringLazy $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
[ unode "head" $
@ -214,10 +214,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tocEntry = mkEntry "toc.ncx" tocData
-- mimetype
let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip"
let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
-- container.xml
let containerData = fromString $ ppTopElement $
let containerData = fromStringLazy $ ppTopElement $
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
@ -226,7 +226,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let containerEntry = mkEntry "META-INF/container.xml" containerData
-- com.apple.ibooks.display-options.xml
let apple = fromString $ ppTopElement $
let apple = fromStringLazy $ ppTopElement $
unode "display_options" $
unode "platform" ! [("name","*")] $
unode "option" ! [("name","specified-fonts")] $ "true"
@ -236,7 +236,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
stylesheet <- case writerEpubStylesheet opts of
Just s -> return s
Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet
-- construct archive
let archive = foldr addEntryToArchive emptyArchive

View file

@ -32,7 +32,7 @@ import Data.IORef
import Data.List ( isPrefixOf )
import System.FilePath ( (</>), takeExtension )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
@ -74,7 +74,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
let newContents = writeOpenDocument opts{writerWrapText = False} doc'
epochtime <- floor `fmap` getPOSIXTime
let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents
picEntries <- readIORef picEntriesRef
let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
-- construct META-INF/manifest.xml based on archive
@ -86,7 +86,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
]
let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ]
let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
$ fromString $ show
$ fromStringLazy $ show
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
( inTags True "manifest:manifest"
@ -100,7 +100,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
)
let archive' = addEntryToArchive manifestEntry archive
let metaEntry = toEntry "meta.xml" epochtime
$ fromString $ show
$ fromStringLazy $ show
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
( inTags True "office:document-meta"

View file

@ -57,7 +57,6 @@ import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString)
import Text.CSL.Reference (Reference(..))
copyrightMessage :: String
@ -988,7 +987,7 @@ main = do
readURI u
_ -> UTF8.readFile src
readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
return . toString -- treat all as UTF8
return . UTF8.toStringLazy -- treat all as UTF8
let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
@ -1038,7 +1037,7 @@ main = do
res <- tex2pdf latexEngine $ f writerOptions doc2
case res of
Right pdf -> writeBinary pdf
Left err' -> err 43 $ toString err'
Left err' -> err 43 $ UTF8.toStringLazy err'
| otherwise -> selfcontain (f writerOptions doc2 ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities

View file

@ -16,11 +16,11 @@ import Text.Pandoc.Writers.Native ( writeNative )
import Text.Pandoc.Readers.Native ( readNative )
import Prelude hiding ( readFile )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString)
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Printf
readFileUTF8 :: FilePath -> IO String
readFileUTF8 f = B.readFile f >>= return . toString
readFileUTF8 f = B.readFile f >>= return . toStringLazy
pandocPath :: FilePath
pandocPath = ".." </> "dist" </> "build" </> "pandoc" </> "pandoc"