pandoc/src/Text/Pandoc/Shared.hs

694 lines
26 KiB
Haskell
Raw Normal View History

2013-08-28 08:43:51 -07:00
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
FlexibleContexts #-}
{-
Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Shared
Copyright : Copyright (C) 2006-2013 John MacFarlane
2012-07-26 22:32:53 -07:00
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
-- * List processing
splitBy,
splitByIndices,
splitStringByIndices,
substitute,
-- * Text processing
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
trim,
triml,
trimr,
stripFirstAndLast,
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
2010-07-06 23:17:06 -07:00
tabFilter,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
normalize,
stringify,
compactify,
compactify',
Element (..),
hierarchicalize,
uniqueIdent,
isHeaderBlock,
headerShift,
2013-01-07 20:12:05 -08:00
isTightList,
addMetaField,
makeMeta,
-- * TagSoup HTML handling
renderTags',
-- * File handling
inDirectory,
readDataFile,
readDataFileUTF8,
fetchItem,
openURL,
-- * Error handling
err,
2012-08-09 07:52:39 -07:00
warn,
-- * Safe read
safeRead
) where
import Text.Pandoc.Definition
2013-08-10 18:13:38 -07:00
import Text.Pandoc.Walk
import Text.Pandoc.Generic
import Text.Pandoc.Builder (Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, unEscapeString )
import System.Directory
import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
import Control.Monad (msum, unless)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
2013-08-10 18:13:38 -07:00
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
import System.FilePath ( joinPath, splitDirectories )
#else
import Paths_pandoc (getDataFileName)
#endif
#ifdef HTTP_CONDUIT
import Data.ByteString.Lazy (toChunks)
import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
responseBody, responseHeaders)
import Network.HTTP.Types.Header ( hContentType)
import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
RequestMethod(..), HeaderName(..), mkRequest)
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
#endif
--
-- List processing
--
-- | Split list by groups of one or more sep.
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
let (first, rest) = break isSep lst
rest' = dropWhile isSep rest
in first:(splitBy isSep rest')
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
where (first, rest) = splitAt x lst
-- | Split string into chunks divided at specified indices.
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
let (first, rest) = splitAt' x lst in
first : (splitStringByIndices (map (\y -> y - x) xs) rest)
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs) = (x:ys,zs)
where (ys,zs) = splitAt' (n - charWidth x) xs
-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
if target `isPrefixOf` lst
then replacement ++ substitute target replacement (drop (length target) lst)
else x : substitute target replacement xs
--
-- Text processing
--
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
-> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
2012-07-26 22:32:53 -07:00
escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr
-- | Remove leading space (including newlines) from string.
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")
-- | Remove trailing space (including newlines) from string.
trimr :: String -> String
trimr = reverse . triml . reverse
-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) - 1) str
2012-07-26 22:32:53 -07:00
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
toRomanNumeral x =
if x >= 4000 || x < 0
then "?"
else case x of
_ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
_ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
_ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
_ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
_ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
_ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
_ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
_ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> ""
-- | Escape whitespace in URI.
escapeURI :: String -> String
escapeURI = escapeURIString (not . isSpace)
2010-03-23 15:34:53 -07:00
2010-07-06 23:17:06 -07:00
-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int -- ^ Tab stop
-> String -- ^ Input
-> String
tabFilter tabStop =
let go _ [] = ""
go _ ('\n':xs) = '\n' : go tabStop xs
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
go _ ('\r':xs) = '\n' : go tabStop xs
go spsToNextStop ('\t':xs) =
if tabStop == 0
then '\t' : go tabStop xs
else replicate spsToNextStop ' ' ++ go tabStop xs
go 1 (x:xs) =
x : go tabStop xs
go spsToNextStop (x:xs) =
x : go (spsToNextStop - 1) xs
in go tabStop
--
-- Date/time
--
-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
where parsetimeWith = parseTime defaultTimeLocale
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
--
-- Pandoc block and inline list processing
--
-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
2012-07-26 22:32:53 -07:00
orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
Example -> map show [start..]
Decimal -> map show [start..]
2012-07-26 22:32:53 -07:00
UpperAlpha -> drop (start - 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start - 1) $ cycle $
map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
DefaultDelim -> str ++ "."
Period -> str ++ "."
OneParen -> str ++ ")"
TwoParens -> "(" ++ str ++ ")"
in map inDelim nums
-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, collapse double @Space@s into singles, and
-- remove empty Str elements.
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
where cleanup [] = []
cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
[] -> []
(x:xs) -> Space : x : cleanup xs
cleanup ((Str ""):rest) = cleanup rest
cleanup (x:rest) = x : cleanup rest
isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
normalize :: (Eq a, Data a) => a -> a
normalize = topDown removeEmptyBlocks .
topDown consolidateInlines .
bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
removeEmptyBlocks :: [Block] -> [Block]
removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
removeEmptyBlocks [] = []
removeEmptyInlines :: [Inline] -> [Inline]
removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
removeEmptyInlines [] = []
removeTrailingInlineSpaces :: [Inline] -> [Inline]
2011-02-04 18:32:54 -08:00
removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
removeLeadingInlineSpaces :: [Inline] -> [Inline]
removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
consolidateInlines :: [Inline] -> [Inline]
consolidateInlines (Str x : ys) =
case concat (x : map fromStr strs) of
"" -> consolidateInlines rest
n -> Str n : consolidateInlines rest
where
(strs, rest) = span isStr ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str"
2010-12-26 12:01:33 -08:00
consolidateInlines (Space : ys) = Space : rest
where isSp Space = True
isSp _ = False
rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
Strong (xs ++ ys) : zs
consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
Subscript (xs ++ ys) : zs
consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
Superscript (xs ++ ys) : zs
consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
SmallCaps (xs ++ ys) : zs
consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
Strikeout (xs ++ ys) : zs
consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
consolidateInlines $ RawInline f (x ++ y) : zs
consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
consolidateInlines $ Code a1 (x ++ y) : zs
consolidateInlines (x : xs) = x : consolidateInlines xs
consolidateInlines [] = []
2013-08-28 08:43:51 -07:00
-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
2013-08-28 08:43:51 -07:00
stringify :: Walkable Inline a => a -> String
stringify = query go . walk deNote
where go :: Inline -> [Char]
go Space = " "
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
go LineBreak = " "
go _ = ""
deNote (Note _) = Str ""
deNote x = x
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
-> [[Block]]
compactify [] = []
compactify items =
case (init items, last items) of
(_,[]) -> items
(others, final) ->
case last final of
Para a -> case (filter isPara $ concat items) of
-- if this is only Para, change to Plain
[_] -> others ++ [init final ++ [Plain a]]
_ -> items
_ -> items
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
-- than @[Block]@.
compactify' :: [Blocks] -- ^ List of list items (each a list of blocks)
-> [Blocks]
compactify' [] = []
compactify' items =
let (others, final) = (init items, last items)
in case reverse (B.toList final) of
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
-- if this is only Para, change to Plain
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
_ -> items
_ -> items
isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
-- | Data structure for defining hierarchical Pandoc documents
2012-07-26 22:32:53 -07:00
data Element = Blk Block
| Sec Int [Int] Attr [Inline] [Element]
-- lvl num attributes label contents
deriving (Eq, Read, Show, Typeable, Data)
2013-08-10 18:13:38 -07:00
instance Walkable Inline Element where
walk f (Blk x) = Blk (walk f x)
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
walkM f (Blk x) = Blk `fmap` walkM f x
walkM f (Sec lev nums attr ils elts) = do
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
instance Walkable Block Element where
walk f (Blk x) = Blk (walk f x)
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
walkM f (Blk x) = Blk `fmap` walkM f x
walkM f (Sec lev nums attr ils elts) = do
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-- | Convert Pandoc inline list to plain text identifier. HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
dropWhile (not . isAlpha) . intercalate "-" . words .
map (nbspToSp . toLower) .
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
stringify
where nbspToSp '\160' = ' '
nbspToSp x = x
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
lastnum <- S.get
let lastnum' = take level lastnum
let newnum = case length lastnum' of
x | "unnumbered" `elem` classes -> []
| x >= level -> init lastnum' ++ [last lastnum' + 1]
| otherwise -> lastnum ++
replicate (level - length lastnum - 1) 0 ++ [1]
unless (null newnum) $ S.put newnum
let (sectionContents, rest) = break (headerLtEq level) xs
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
return $ (Blk x) : rest'
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq _ _ = False
-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
let baseIdent = case inlineListToIdentifier title' of
"" -> "section"
x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
else baseIdent
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False
-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
where shift :: Block -> Block
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
2013-01-07 20:12:05 -08:00
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = and . map firstIsPlain
where firstIsPlain (Plain _ : _) = True
firstIsPlain _ = False
-- | Set a field of a 'Meta' object. If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
=> String
-> a
-> Meta
-> Meta
addMetaField key val (Meta meta) =
Meta $ M.insertWith combine key (toMetaValue val) meta
where combine newval (MetaList xs) = MetaList (xs ++ [newval])
combine newval x = MetaList [x, newval]
-- | Create 'Meta' from old-style title, authors, date. This is
-- provided to ease the transition from the old API.
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
addMetaField "title" (B.fromList title)
$ addMetaField "author" (map B.fromList authors)
$ addMetaField "date" (B.fromList date)
$ nullMeta
--
-- TagSoup HTML handling
--
-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
renderOptions{ optMinimize = \x ->
let y = map toLower x
in y == "hr" || y == "br" ||
y == "img" || y == "meta" ||
y == "link"
, optRawTag = \x ->
let y = map toLower x
in y == "script" || y == "style" }
--
-- File handling
--
-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = do
oldDir <- getCurrentDirectory
setCurrentDirectory path
result <- action
setCurrentDirectory oldDir
return result
readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
case lookup (makeCanonical fname) dataFiles of
Nothing -> err 97 $ "Could not find data file " ++ fname
2012-12-29 18:45:20 -08:00
Just contents -> return contents
where makeCanonical = joinPath . transformPathParts . splitDirectories
transformPathParts = reverse . foldl go []
go as "." = as
go (_:as) ".." = as
go as x = x : as
#else
getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile
where checkExistence fn = do
exists <- doesFileExist fn
if exists
then return fn
else err 97 ("Could not find data file " ++ fname)
#endif
-- | Read file from specified user data directory or, if not found there, from
-- Cabal data directory.
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
readDataFile Nothing fname = readDefaultDataFile fname
readDataFile (Just userDir) fname = do
exists <- doesFileExist (userDir </> fname)
if exists
then BS.readFile (userDir </> fname)
else readDefaultDataFile fname
-- | Same as 'readDataFile' but returns a String instead of a ByteString.
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
readDataFileUTF8 userDir fname =
UTF8.toString `fmap` readDataFile userDir fname
-- | Fetch an image or other item from the local filesystem or the net.
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem sourceURL s
| isURI s = openURL s
| otherwise = case sourceURL of
Just u -> openURL (u ++ "/" ++ s)
Nothing -> E.try readLocalFile
where readLocalFile = do
let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s
x -> getMimeType x
cont <- BS.readFile s
return (cont, mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
openURL u
| "data:" `isPrefixOf` u =
let mime = takeWhile (/=',') $ drop 5 u
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
in return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CONDUIT
| otherwise = withSocketsDo $ E.try $ do
req <- parseUrl u
resp <- withManager $ httpLbs req
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
| otherwise = E.try $ getBodyAndMimeType `fmap` browse
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
setOutHandler $ const (return ())
setAllowRedirects True
request (getRequest' u'))
where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
getRequest' uriString = case parseURI uriString of
Nothing -> error ("Not a valid URL: " ++
uriString)
Just v -> mkRequest GET v
u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
#endif
--
-- Error reporting
--
err :: Int -> String -> IO a
err exitCode msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
exitWith $ ExitFailure exitCode
return undefined
warn :: String -> IO ()
warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
2012-08-09 07:52:39 -07:00
--
-- Safe read
--
safeRead :: (Monad m, Read a) => String -> m a
safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"