Added --extract-media option.
This has been documented to affect the epub and docx readers, so we should either add the epub reader before the next release or change the documentation.
This commit is contained in:
parent
b12d2ea20a
commit
b5210bc175
2 changed files with 54 additions and 17 deletions
8
README
8
README
|
@ -318,7 +318,13 @@ Reader options
|
|||
classes, respectively. The author and time of change is
|
||||
included. *all* is useful for scripting: only accepting changes
|
||||
from a certain reviewer, say, or before a certain date. This
|
||||
option only affects the Docx reader.
|
||||
option only affects the docx reader.
|
||||
|
||||
`--extract-media=`*DIR*
|
||||
: Extract images and other media contained in a docx or epub container
|
||||
to the path *DIR*, creating it if necessary, and adjust the images
|
||||
references in the document so they point to the extracted files.
|
||||
This option only affects the docx and epub readers.
|
||||
|
||||
General writer options
|
||||
----------------------
|
||||
|
|
63
pandoc.hs
63
pandoc.hs
|
@ -33,6 +33,7 @@ module Main where
|
|||
import Text.Pandoc
|
||||
import Text.Pandoc.Builder (setMeta)
|
||||
import Text.Pandoc.PDF (makePDF)
|
||||
import Text.Pandoc.Walk (walk)
|
||||
import Text.Pandoc.Readers.LaTeX (handleIncludes)
|
||||
import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
|
||||
safeRead, headerShift, normalize, err, warn,
|
||||
|
@ -49,7 +50,8 @@ import System.Console.GetOpt
|
|||
import Data.Char ( toLower )
|
||||
import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )
|
||||
import System.Directory ( getAppUserDataDirectory, findExecutable,
|
||||
doesFileExist, Permissions(..), getPermissions )
|
||||
doesFileExist, Permissions(..), getPermissions,
|
||||
createDirectoryIfMissing )
|
||||
import System.IO ( stdout, stderr )
|
||||
import System.IO.Error ( isDoesNotExistError )
|
||||
import qualified Control.Exception as E
|
||||
|
@ -182,6 +184,7 @@ data Opt = Opt
|
|||
, optAscii :: Bool -- ^ Use ascii characters only in html
|
||||
, optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes
|
||||
, optDefaultImageExtension :: String -- ^ Default image extension
|
||||
, optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
|
||||
, optTrace :: Bool -- ^ Print debug information
|
||||
, optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
|
||||
}
|
||||
|
@ -239,6 +242,7 @@ defaultOpts = Opt
|
|||
, optAscii = False
|
||||
, optTeXLigatures = True
|
||||
, optDefaultImageExtension = ""
|
||||
, optExtractMedia = Nothing
|
||||
, optTrace = False
|
||||
, optTrackChanges = AcceptChanges
|
||||
}
|
||||
|
@ -343,6 +347,26 @@ options =
|
|||
"NUMBER")
|
||||
"" -- "Tab stop (default 4)"
|
||||
|
||||
, Option "" ["track-changes"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
action <- case arg of
|
||||
"accept" -> return AcceptChanges
|
||||
"reject" -> return RejectChanges
|
||||
"all" -> return AllChanges
|
||||
_ -> err 6
|
||||
("Unknown option for track-changes: " ++ arg)
|
||||
return opt { optTrackChanges = action })
|
||||
"accept|reject|all")
|
||||
"" -- "Accepting or reject MS Word track-changes.""
|
||||
|
||||
, Option "" ["extract-media"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
return opt { optExtractMedia = Just arg })
|
||||
"PATH")
|
||||
"" -- "Directory to which to extract embedded media"
|
||||
|
||||
, Option "s" ["standalone"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optStandalone = True }))
|
||||
|
@ -787,19 +811,6 @@ options =
|
|||
(\opt -> return opt { optTrace = True }))
|
||||
"" -- "Turn on diagnostic tracing in readers."
|
||||
|
||||
, Option "" ["track-changes"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
action <- case arg of
|
||||
"accept" -> return AcceptChanges
|
||||
"reject" -> return RejectChanges
|
||||
"all" -> return AllChanges
|
||||
_ -> err 6
|
||||
("Unknown option for track-changes: " ++ arg)
|
||||
return opt { optTrackChanges = action })
|
||||
"accept|reject|all")
|
||||
"" -- "Accepting or reject MS Word track-changes.""
|
||||
|
||||
, Option "" ["dump-args"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optDumpArgs = True }))
|
||||
|
@ -998,6 +1009,7 @@ main = do
|
|||
, optAscii = ascii
|
||||
, optTeXLigatures = texLigatures
|
||||
, optDefaultImageExtension = defaultImageExtension
|
||||
, optExtractMedia = mbExtractMedia
|
||||
, optTrace = trace
|
||||
, optTrackChanges = trackChanges
|
||||
} = opts
|
||||
|
@ -1196,13 +1208,32 @@ main = do
|
|||
then handleIncludes
|
||||
else return
|
||||
|
||||
let writeMedia :: FilePath -> (FilePath, B.ByteString) -> IO ()
|
||||
writeMedia dir (subpath, bs) = do
|
||||
-- we join and split to convert a/b/c to a\b\c on Windows;
|
||||
-- in zip containers all paths use /
|
||||
let fullpath = dir </> joinPath (splitPath subpath)
|
||||
createDirectoryIfMissing True $ takeDirectory fullpath
|
||||
warn $ "extracting " ++ fullpath
|
||||
B.writeFile fullpath bs
|
||||
|
||||
let adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
||||
adjustImagePath dir paths (Image lab (src, tit))
|
||||
| src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
|
||||
adjustImagePath _ _ x = x
|
||||
|
||||
doc <- case reader of
|
||||
StringReader r->
|
||||
readSources sources >>=
|
||||
handleIncludes' . convertTabs . intercalate "\n" >>=
|
||||
r readerOpts
|
||||
ByteStringReader r -> readFiles sources >>= r readerOpts >>=
|
||||
(return . fst)
|
||||
ByteStringReader r -> do
|
||||
(d, media) <- readFiles sources >>= r readerOpts
|
||||
case mbExtractMedia of
|
||||
Just dir | not (M.null media) -> do
|
||||
mapM_ (writeMedia dir) $ M.toList media
|
||||
return $ walk (adjustImagePath dir (M.keys media)) d
|
||||
_ -> return d
|
||||
|
||||
|
||||
let doc0 = M.foldWithKey setMeta doc metadata
|
||||
|
|
Loading…
Add table
Reference in a new issue