Hufflepdf/src/PDF.hs

138 lines
5.0 KiB
Haskell
Raw Normal View History

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF (
2019-05-16 22:41:14 +02:00
Document(..)
, parseDocument
, render
) where
import Control.Monad.Fail (MonadFail(..))
2019-05-24 10:48:09 +02:00
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (
drop, findIndex, head, isPrefixOf, last, length, span, unpack
)
2019-05-24 10:48:09 +02:00
import Data.ByteString.Char8.Util (previous, subBS)
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.Map as Map (lookup)
import PDF.Body (populate)
import PDF.Box (Box(..), edit)
2019-05-16 22:41:14 +02:00
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Layer (Layer, unify)
import PDF.Object (
DirectObject(..), InputStructure(..), Name(..), Number(..)
2019-05-18 09:01:13 +02:00
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, line)
import PDF.Output (Output(..))
import PDF.Parser (Parser, evalParser, string, takeAll)
import Prelude hiding (fail)
2019-05-16 22:41:14 +02:00
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
2019-05-16 22:41:14 +02:00
, eolStyle :: EOL.Style
, layers :: [Layer]
} deriving Show
2019-05-16 22:41:14 +02:00
instance Output Document where
output (Document {pdfVersion, layers}) =
Output.line (printf "%%PDF-%s" pdfVersion)
`mappend` output layers
2019-05-16 22:41:14 +02:00
data EOLStyleK = EOLStyleK
data AllLayers = AllLayers
data UnifiedLayers = UnifiedLayers
instance Monad m => Box m EOLStyleK Document EOL.Style where
r EOLStyleK = return . eolStyle
w EOLStyleK document eolStyle = return $ document {eolStyle}
instance Monad m => Box m UnifiedLayers Document Layer where
r UnifiedLayers = return . unify . layers
w UnifiedLayers document layer = w AllLayers document [layer]
instance Monad m => Box m AllLayers Document [Layer] where
r AllLayers = return . layers
w AllLayers document layers = return $ document {layers = layers}
instance MonadFail m => Box m Int Document Layer where
r i = at i . layers
where
at _ [] = fail $ "Layer out of bounds " ++ show i
at 0 (x:_) = return x
at k (_:xs) = at (k+1) xs
w i document layer = edit AllLayers (setAt i) document
where
setAt _ [] = fail $ "Layer out of bounds " ++ show i
setAt 0 (_:xs) = return (layer:xs)
setAt k (x:xs) = (x:)<$>(setAt (k-1) xs)
2019-05-24 10:48:09 +02:00
render :: Document -> Lazy.ByteString
2019-05-16 22:41:14 +02:00
render document@(Document {eolStyle}) = Output.render eolStyle document
2019-05-24 10:48:09 +02:00
version :: Parser () String
version = BS.unpack <$>
(string magicNumber *> takeAll (not . (`elem` EOL.charset)))
2019-05-24 10:48:09 +02:00
check :: Bool -> String -> Either String ()
check test errorMessage = if test then return () else Left errorMessage
2019-05-24 10:48:09 +02:00
readStartXref :: EOL.Style -> ByteString -> Either String Int
readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where
(eolOffset, eolLastByte) = case eolStyle of
2019-05-16 22:41:14 +02:00
EOL.CRLF -> (2, '\n')
EOL.CR -> (1, '\r')
_ -> (1, '\n')
eofMarkerPosition =
BS.length input - BS.length eofMarker
- if BS.last input == BS.last eofMarker then 0 else eolOffset
startXrefPosition =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
2019-05-24 10:48:09 +02:00
findNextLine :: ByteString -> Int
2019-05-16 22:41:14 +02:00
findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where
2019-05-16 22:41:14 +02:00
notInEol = not . (`elem` EOL.charset)
2019-05-24 10:48:09 +02:00
findNextSection :: Int -> ByteString -> Int
2019-05-16 22:41:14 +02:00
findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0
Just delta ->
let newInput = BS.drop delta input in
let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput
2019-05-16 22:41:14 +02:00
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
2019-05-24 10:48:09 +02:00
readStructures :: Int -> ByteString -> Either String [InputStructure]
readStructures startXref input =
evalParser structure () (BS.drop startXref input) >>= stopOrFollow
where
2019-05-18 09:01:13 +02:00
stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") trailer of
Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in
2019-05-16 22:41:14 +02:00
let startOffset = findNextSection offset (BS.drop offset input) in
2019-05-18 09:01:13 +02:00
(InputStructure startOffset s:) <$> (readStructures offset input)
2019-05-24 10:48:09 +02:00
Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v
2019-05-24 10:48:09 +02:00
parseDocument :: ByteString -> Either String Document
parseDocument input = do
(pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input
let layers = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, layers}