Hufflepdf/src/PDF.hs

138 lines
5.0 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF (
Document(..)
, parseDocument
, render
) where
import Control.Monad.Fail (MonadFail(..))
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (
drop, findIndex, head, isPrefixOf, last, length, span, unpack
)
import Data.ByteString.Char8.Util (previous, subBS)
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.Map as Map (lookup)
import PDF.Body (populate)
import PDF.Box (Box(..), edit)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Layer (Layer, unify)
import PDF.Object (
DirectObject(..), InputStructure(..), Name(..), Number(..)
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, line)
import PDF.Output (Output(..))
import PDF.Parser (Parser, evalParser, string, takeAll)
import Prelude hiding (fail)
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
, eolStyle :: EOL.Style
, layers :: [Layer]
} deriving Show
instance Output Document where
output (Document {pdfVersion, layers}) =
Output.line (printf "%%PDF-%s" pdfVersion)
`mappend` output layers
data EOLStyleK = EOLStyleK
data AllLayers = AllLayers
data UnifiedLayers = UnifiedLayers
instance Monad m => Box m EOLStyleK Document EOL.Style where
r EOLStyleK = return . eolStyle
w EOLStyleK document eolStyle = return $ document {eolStyle}
instance Monad m => Box m UnifiedLayers Document Layer where
r UnifiedLayers = return . unify . layers
w UnifiedLayers document layer = w AllLayers document [layer]
instance Monad m => Box m AllLayers Document [Layer] where
r AllLayers = return . layers
w AllLayers document layers = return $ document {layers = layers}
instance MonadFail m => Box m Int Document Layer where
r i = at i . layers
where
at _ [] = fail $ "Layer out of bounds " ++ show i
at 0 (x:_) = return x
at k (_:xs) = at (k+1) xs
w i document layer = edit AllLayers (setAt i) document
where
setAt _ [] = fail $ "Layer out of bounds " ++ show i
setAt 0 (_:xs) = return (layer:xs)
setAt k (x:xs) = (x:)<$>(setAt (k-1) xs)
render :: Document -> Lazy.ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
version :: Parser () String
version = BS.unpack <$>
(string magicNumber *> takeAll (not . (`elem` EOL.charset)))
check :: Bool -> String -> Either String ()
check test errorMessage = if test then return () else Left errorMessage
readStartXref :: EOL.Style -> ByteString -> Either String Int
readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where
(eolOffset, eolLastByte) = case eolStyle of
EOL.CRLF -> (2, '\n')
EOL.CR -> (1, '\r')
_ -> (1, '\n')
eofMarkerPosition =
BS.length input - BS.length eofMarker
- if BS.last input == BS.last eofMarker then 0 else eolOffset
startXrefPosition =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
findNextLine :: ByteString -> Int
findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where
notInEol = not . (`elem` EOL.charset)
findNextSection :: Int -> ByteString -> Int
findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0
Just delta ->
let newInput = BS.drop delta input in
let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
readStructures :: Int -> ByteString -> Either String [InputStructure]
readStructures startXref input =
evalParser structure () (BS.drop startXref input) >>= stopOrFollow
where
stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") trailer of
Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in
(InputStructure startOffset s:) <$> (readStructures offset input)
Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either String Document
parseDocument input = do
(pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input
let layers = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, layers}