Data files changes.

* Added `embed_data_files` flag. (not yet used) * Shared no longer exports `findDataFile`. * `readDataFile` now returns a strict bytestring. * Shared now exports `readDataFileUTF8` which returns a string like the old `readDataFile`. * Rewrote modules to use new data file functions and to avoid using functions from Paths_pandoc directly.
2012-12-29 17:44:02 -08:00 · 2012-12-29 17:44:02 -08:00 · 1864bb0994
commit 1864bb0994
parent 32c5a8e2dc
9 changed files with 64 additions and 67 deletions
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@ -1,5 +1,5 @@
 import Text.Pandoc
-import Text.Pandoc.Shared (readDataFile, normalize)
+import Text.Pandoc.Shared (readDataFileUTF8, normalize)
 import Criterion.Main
 import Criterion.Config
 import Text.JSON.Generic
@ -35,8 +35,8 @@ main :: IO ()
 main = do
  args <- getArgs
  (conf,_) <- parseArgs defaultConfig{ cfgSamples = Last $ Just 20 }  defaultOptions args
-  inp <- readDataFile (Just ".") "README"
-  inp2 <- readDataFile (Just ".") "tests/testsuite.txt"
+  inp  <- readDataFileUTF8 (Just ".") "README"
+  inp2 <- readDataFileUTF8 (Just ".") "tests/testsuite.txt"
  let opts = def{ readerSmart = True }
  let doc = readMarkdown opts $ inp ++ unlines (drop 3 $ lines inp2)
  let readerBs = map (readerBench doc) readers
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -187,6 +187,9 @@ Source-repository head
 Flag blaze_html_0_5
  Description:   Use blaze-html 0.5 and blaze-markup 0.5
  Default:       True
+Flag embed_data_files
+  Description:   Embed data files in binary for relocatable executable.
+  Default:       False

 Library
  Build-Depends: base >= 4.2 && <5,
@ -224,6 +227,10 @@ Library
  else
    build-depends:
                 blaze-html >= 0.4.3.0 && < 0.5
+  if flag(embed_data_files)
+     build-depends: file-embed >= 0.0.4 && < 0.1,
+                    template-haskell >= 2.4 && < 2.9
+     cpp-options:   -DEMBED_DATA_FILES
  if impl(ghc >= 7.0.1)
    Ghc-Options:   -O2 -rtsopts -Wall -fno-warn-unused-do-bind -dno-debug-output
  else
--- a/pandoc.hs
+++ b/pandoc.hs
@ -33,7 +33,7 @@ module Main where
 import Text.Pandoc
 import Text.Pandoc.PDF (tex2pdf)
 import Text.Pandoc.Readers.LaTeX (handleIncludes)
-import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead,
+import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead,
                            headerShift, normalize, err, warn )
 import Text.Pandoc.XML ( toEntities, fromEntities )
 import Text.Pandoc.SelfContained ( makeSelfContained )
@ -889,24 +889,27 @@ main = do
                           E.catch (UTF8.readFile tp')
                             (\e -> if isDoesNotExistError e
                                       then E.catch
-                                             (readDataFile datadir $
-                                               "templates" </> tp')
+                                             (readDataFileUTF8 datadir
+                                                ("templates" </> tp'))
                                             (\e' -> let _ = (e' :: E.SomeException)
                                                     in throwIO e')
                                       else throwIO e)

  variables' <- case mathMethod of
                      LaTeXMathML Nothing -> do
-                         s <- readDataFile datadir $ "data" </> "LaTeXMathML.js"
+                         s <- readDataFileUTF8 datadir
+                                 ("data" </> "LaTeXMathML.js")
                         return $ ("mathml-script", s) : variables
                      MathML Nothing -> do
-                         s <- readDataFile datadir $ "data"</>"MathMLinHTML.js"
+                         s <- readDataFileUTF8 datadir
+                                 ("data"</>"MathMLinHTML.js")
                         return $ ("mathml-script", s) : variables
                      _ -> return variables

  variables'' <- if "dzslides" `isPrefixOf` writerName'
                    then do
-                        dztempl <- readDataFile datadir $ "dzslides" </> "template.html"
+                        dztempl <- readDataFileUTF8 datadir
+                                     ("dzslides" </> "template.html")
                        let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core")
                                             $ lines dztempl
                        return $ ("dzslides-core", dzcore) : variables'
@ -927,15 +930,16 @@ main = do
              then do
                csl <- CSL.parseCSL =<<
                        case mbCsl of
-                            Nothing      -> readDataFile datadir "default.csl"
+                            Nothing      -> readDataFileUTF8 datadir
+                                              "default.csl"
                            Just cslfile -> do
                                  exists <- doesFileExist cslfile
                                  if exists
                                     then UTF8.readFile cslfile
                                     else do
                                       csldir <- getAppUserDataDirectory "csl"
-                                       readDataFile (Just csldir)
-                                          (replaceExtension cslfile "csl")
+                                       readDataFileUTF8 (Just csldir)
+                                           (replaceExtension cslfile "csl")
                abbrevs <- maybe (return []) CSL.readJsonAbbrevFile cslabbrevs
                return $ Just csl { CSL.styleAbbrevs = abbrevs }
              else return Nothing
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@ -41,7 +41,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (findDataFile, renderTags')
+import Text.Pandoc.Shared (readDataFile, renderTags')
 import Text.Pandoc.MIME (getMimeType)
 import System.Directory (doesFileExist)
 import Text.Pandoc.UTF8 (toString,  fromString)
@ -55,18 +55,8 @@ getItem userdata f =
                      ".gz" -> getMimeType $ dropExtension f
                      x     -> getMimeType x
       exists <- doesFileExist f
-       if exists
-          then do
-            cont <- B.readFile f
-            return (cont, mime)
-          else do
-            res <- findDataFile userdata f
-            exists' <- doesFileExist res
-            if exists'
-               then do
-                 cont <- B.readFile res
-                 return (cont, mime)
-               else error $ "Could not find `" ++ f ++ "'"
+       cont <- if exists then B.readFile f else readDataFile userdata f
+       return (cont, mime)

 -- TODO - have this return mime type too - then it can work for google
 -- chart API, e.g.
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@ -1,4 +1,4 @@
-{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveDataTypeable, CPP #-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>

@ -64,8 +64,8 @@ module Text.Pandoc.Shared (
                     renderTags',
                     -- * File handling
                     inDirectory,
-                     findDataFile,
                     readDataFile,
+                     readDataFileUTF8,
                     -- * Error handling
                     err,
                     warn,
@ -89,13 +89,18 @@ import System.FilePath ( (</>) )
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State as S
 import Control.Monad (msum)
-import Paths_pandoc (getDataFileName)
 import Text.Pandoc.Pretty (charWidth)
 import System.Locale (defaultTimeLocale)
 import Data.Time
 import System.IO (stderr)
 import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
         renderOptions)
+import qualified Data.ByteString as B
+#ifdef EMBED_DATA_FILES
+import Data.FileEmbed
+#else
+import Paths_pandoc (getDataFileName)
+#endif

 --
 -- List processing
@ -499,20 +504,28 @@ inDirectory path action = do
  setCurrentDirectory oldDir
  return result

-- | Get file path for data file, either from specified user data directory,
-- or, if not found there, from Cabal data directory.
-findDataFile :: Maybe FilePath -> FilePath -> IO FilePath
-findDataFile Nothing f = getDataFileName f
-findDataFile (Just u) f = do
-  ex <- doesFileExist (u </> f)
-  if ex
-     then return (u </> f)
-     else getDataFileName f
+readDefaultDataFile :: FilePath -> IO B.ByteString
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+  TODO
+#else
+  getDataFileName fname >>= B.readFile
+#endif

 -- | Read file from specified user data directory or, if not found there, from
 -- Cabal data directory.
-readDataFile :: Maybe FilePath -> FilePath -> IO String
-readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
+readDataFile :: Maybe FilePath -> FilePath -> IO B.ByteString
+readDataFile Nothing fname = readDefaultDataFile fname
+readDataFile (Just userDir) fname = do
+  exists <- doesFileExist (userDir </> fname)
+  if exists
+     then B.readFile (userDir </> fname)
+     else readDefaultDataFile fname
+
+-- | Same as 'readDataFile' but returns a String instead of a ByteString.
+readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
+readDataFileUTF8 userDir fname =
+  UTF8.toString `fmap` readDataFile userDir fname

 --
 -- Error reporting
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@ -80,7 +80,7 @@ import Text.Blaze (preEscapedString, Html)
 #endif
 import Text.Pandoc.UTF8 (fromStringLazy)
 import Data.ByteString.Lazy (ByteString)
-import Text.Pandoc.Shared (readDataFile)
+import Text.Pandoc.Shared (readDataFileUTF8)
 import qualified Control.Exception.Extensible as E (try, IOException)

 -- | Get default template for the specified writer.
@ -98,7 +98,7 @@ getDefaultTemplate user writer = do
       "multimarkdown"   -> getDefaultTemplate user "markdown"
       "markdown_github" -> getDefaultTemplate user "markdown"
       _        -> let fname = "templates" </> "default" <.> format
-                   in  E.try $ readDataFile user fname
+                   in  E.try $ readDataFileUTF8 user fname

 data TemplateState = TemplateState Int [(String,String)]

--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@ -29,14 +29,12 @@ Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
 import Data.List ( intercalate )
-import System.FilePath ( (</>) )
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
 import qualified Text.Pandoc.UTF8 as UTF8
 import System.IO ( stderr )
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
-import Paths_pandoc ( getDataFileName )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import System.Directory
@ -104,15 +102,8 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
  refArchive <- liftM toArchive $
       case writerReferenceDocx opts of
             Just f -> B.readFile f
-             Nothing -> do
-               let defaultDocx = getDataFileName "reference.docx" >>= B.readFile
-               case datadir of
-                     Nothing  -> defaultDocx
-                     Just d   -> do
-                        exists <- doesFileExist (d </> "reference.docx")
-                        if exists
-                           then B.readFile (d </> "reference.docx")
-                           else defaultDocx
+             Nothing -> (B.fromChunks . (:[])) `fmap`
+                          readDataFile datadir "reference.docx"

  (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc)
                       defaultWriterState
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@ -30,13 +30,13 @@ Conversion of 'Pandoc' documents to EPUB.
 module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
 import Data.IORef
 import Data.Maybe ( fromMaybe, isNothing )
-import Data.List ( isPrefixOf, isInfixOf, intercalate )
+import Data.List ( isInfixOf, intercalate )
 import System.Environment ( getEnv )
 import Text.Printf (printf)
 import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName )
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Text.Pandoc.UTF8 ( fromStringLazy )
+import Text.Pandoc.UTF8 ( fromStringLazy, toString )
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
 import Data.Time
@ -321,7 +321,8 @@ writeEPUB version opts doc@(Pandoc meta _) = do
  -- stylesheet
  stylesheet <- case writerEpubStylesheet opts of
                   Just s  -> return s
-                   Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
+                   Nothing -> toString `fmap`
+                              readDataFile (writerUserDataDir opts) "epub.css"
  let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet

  -- construct archive
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@ -35,15 +35,13 @@ import qualified Data.ByteString.Lazy as B
 import Text.Pandoc.UTF8 ( fromStringLazy )
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
-import Paths_pandoc ( getDataFileName )
 import Text.Pandoc.Options ( WriterOptions(..) )
-import Text.Pandoc.Shared ( stringify )
+import Text.Pandoc.Shared ( stringify, readDataFile )
 import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints )
 import Text.Pandoc.MIME ( getMimeType )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
-import System.Directory
 import Control.Monad (liftM)
 import Network.URI ( unEscapeString )
 import Text.Pandoc.XML
@ -59,15 +57,8 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
  refArchive <- liftM toArchive $
       case writerReferenceODT opts of
             Just f -> B.readFile f
-             Nothing -> do
-               let defaultODT = getDataFileName "reference.odt" >>= B.readFile
-               case datadir of
-                     Nothing  -> defaultODT
-                     Just d   -> do
-                        exists <- doesFileExist (d </> "reference.odt")
-                        if exists
-                           then B.readFile (d </> "reference.odt")
-                           else defaultODT
+             Nothing -> (B.fromChunks . (:[])) `fmap`
+                           readDataFile datadir "reference.odt"
  -- handle pictures
  picEntriesRef <- newIORef ([] :: [Entry])
  let sourceDir = writerSourceDirectory opts