2019-05-13 08:05:28 +02:00
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
2020-02-24 17:28:17 +01:00
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
2019-05-13 08:05:28 +02:00
|
|
|
module PDF (
|
2020-02-24 21:37:09 +01:00
|
|
|
AllLayers(..)
|
|
|
|
, Document(..)
|
|
|
|
, EOLStyleK(..)
|
|
|
|
, UnifiedLayers(..)
|
2019-05-16 22:41:14 +02:00
|
|
|
, parseDocument
|
|
|
|
, render
|
2019-05-13 08:05:28 +02:00
|
|
|
) where
|
|
|
|
|
2020-02-24 17:28:17 +01:00
|
|
|
import Control.Monad.Fail (MonadFail(..))
|
2019-05-24 10:48:09 +02:00
|
|
|
import Data.ByteString.Char8 (ByteString)
|
|
|
|
import qualified Data.ByteString.Char8 as BS (
|
2019-05-16 11:01:50 +02:00
|
|
|
drop, findIndex, head, isPrefixOf, last, length, span, unpack
|
|
|
|
)
|
2019-05-24 10:48:09 +02:00
|
|
|
import Data.ByteString.Char8.Util (previous, subBS)
|
|
|
|
import qualified Data.ByteString.Lazy as Lazy (ByteString)
|
2019-05-13 18:22:05 +02:00
|
|
|
import qualified Data.Map as Map (lookup)
|
2019-05-14 18:42:11 +02:00
|
|
|
import PDF.Body (populate)
|
2020-02-24 17:28:17 +01:00
|
|
|
import PDF.Box (Box(..), edit)
|
2019-05-16 22:41:14 +02:00
|
|
|
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
2020-02-24 17:28:17 +01:00
|
|
|
import PDF.Layer (Layer, unify)
|
2019-05-17 16:14:06 +02:00
|
|
|
import PDF.Object (
|
2020-02-17 15:29:59 +01:00
|
|
|
DirectObject(..), InputStructure(..), Name(..), Number(..)
|
2019-05-18 09:01:13 +02:00
|
|
|
, Structure(..)
|
2019-05-17 16:14:06 +02:00
|
|
|
, eofMarker, magicNumber, structure
|
|
|
|
)
|
2019-09-20 22:39:14 +02:00
|
|
|
import qualified PDF.Output as Output (render, line)
|
|
|
|
import PDF.Output (Output(..))
|
2019-09-24 18:32:23 +02:00
|
|
|
import PDF.Parser (Parser, evalParser, string, takeAll)
|
2020-02-24 17:28:17 +01:00
|
|
|
import Prelude hiding (fail)
|
2019-05-16 22:41:14 +02:00
|
|
|
import Text.Printf (printf)
|
2019-05-13 08:05:28 +02:00
|
|
|
|
|
|
|
data Document = Document {
|
|
|
|
pdfVersion :: String
|
2019-05-16 22:41:14 +02:00
|
|
|
, eolStyle :: EOL.Style
|
2020-02-17 15:29:59 +01:00
|
|
|
, layers :: [Layer]
|
2019-05-13 08:05:28 +02:00
|
|
|
} deriving Show
|
|
|
|
|
2019-05-16 22:41:14 +02:00
|
|
|
instance Output Document where
|
2020-02-17 15:29:59 +01:00
|
|
|
output (Document {pdfVersion, layers}) =
|
2019-09-20 22:39:14 +02:00
|
|
|
Output.line (printf "%%PDF-%s" pdfVersion)
|
2020-02-17 15:29:59 +01:00
|
|
|
`mappend` output layers
|
2019-05-16 22:41:14 +02:00
|
|
|
|
2020-02-24 17:28:17 +01:00
|
|
|
data EOLStyleK = EOLStyleK
|
|
|
|
data AllLayers = AllLayers
|
|
|
|
data UnifiedLayers = UnifiedLayers
|
|
|
|
|
|
|
|
instance Monad m => Box m EOLStyleK Document EOL.Style where
|
|
|
|
r EOLStyleK = return . eolStyle
|
|
|
|
w EOLStyleK document eolStyle = return $ document {eolStyle}
|
|
|
|
|
|
|
|
instance Monad m => Box m UnifiedLayers Document Layer where
|
|
|
|
r UnifiedLayers = return . unify . layers
|
|
|
|
w UnifiedLayers document layer = w AllLayers document [layer]
|
|
|
|
|
|
|
|
instance Monad m => Box m AllLayers Document [Layer] where
|
|
|
|
r AllLayers = return . layers
|
|
|
|
w AllLayers document layers = return $ document {layers = layers}
|
|
|
|
|
|
|
|
instance MonadFail m => Box m Int Document Layer where
|
|
|
|
r i = at i . layers
|
|
|
|
where
|
|
|
|
at _ [] = fail $ "Layer out of bounds " ++ show i
|
|
|
|
at 0 (x:_) = return x
|
|
|
|
at k (_:xs) = at (k+1) xs
|
|
|
|
w i document layer = edit AllLayers (setAt i) document
|
|
|
|
where
|
|
|
|
setAt _ [] = fail $ "Layer out of bounds " ++ show i
|
|
|
|
setAt 0 (_:xs) = return (layer:xs)
|
|
|
|
setAt k (x:xs) = (x:)<$>(setAt (k-1) xs)
|
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
render :: Document -> Lazy.ByteString
|
2019-05-16 22:41:14 +02:00
|
|
|
render document@(Document {eolStyle}) = Output.render eolStyle document
|
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
version :: Parser () String
|
|
|
|
version = BS.unpack <$>
|
|
|
|
(string magicNumber *> takeAll (not . (`elem` EOL.charset)))
|
2019-05-13 08:05:28 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
check :: Bool -> String -> Either String ()
|
|
|
|
check test errorMessage = if test then return () else Left errorMessage
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
readStartXref :: EOL.Style -> ByteString -> Either String Int
|
2019-05-13 08:05:28 +02:00
|
|
|
readStartXref eolStyle input =
|
|
|
|
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
|
|
|
|
"Badly formed document : missing EOF marker at the end"
|
|
|
|
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
|
|
|
|
where
|
|
|
|
(eolOffset, eolLastByte) = case eolStyle of
|
2019-05-16 22:41:14 +02:00
|
|
|
EOL.CRLF -> (2, '\n')
|
|
|
|
EOL.CR -> (1, '\r')
|
2019-05-13 08:05:28 +02:00
|
|
|
_ -> (1, '\n')
|
|
|
|
eofMarkerPosition =
|
|
|
|
BS.length input - BS.length eofMarker
|
|
|
|
- if BS.last input == BS.last eofMarker then 0 else eolOffset
|
|
|
|
startXrefPosition =
|
2019-05-13 11:34:15 +02:00
|
|
|
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
|
2019-05-13 08:05:28 +02:00
|
|
|
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
|
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
findNextLine :: ByteString -> Int
|
2019-05-16 22:41:14 +02:00
|
|
|
findNextLine input =
|
2019-05-16 11:01:50 +02:00
|
|
|
let (line, eolPrefixed) = BS.span notInEol input in
|
|
|
|
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
|
|
|
|
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
|
|
|
|
where
|
2019-05-16 22:41:14 +02:00
|
|
|
notInEol = not . (`elem` EOL.charset)
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
findNextSection :: Int -> ByteString -> Int
|
2019-05-16 22:41:14 +02:00
|
|
|
findNextSection offset input =
|
2019-05-16 11:01:50 +02:00
|
|
|
case BS.findIndex (== BS.head eofMarker) input of
|
|
|
|
Nothing -> 0
|
|
|
|
Just delta ->
|
|
|
|
let newInput = BS.drop delta input in
|
|
|
|
let newOffset = offset + delta in
|
|
|
|
if BS.isPrefixOf eofMarker newInput
|
2019-05-16 22:41:14 +02:00
|
|
|
then newOffset + findNextLine newInput
|
|
|
|
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
2019-05-15 19:12:38 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
readStructures :: Int -> ByteString -> Either String [InputStructure]
|
2019-05-17 16:14:06 +02:00
|
|
|
readStructures startXref input =
|
2019-09-24 18:32:23 +02:00
|
|
|
evalParser structure () (BS.drop startXref input) >>= stopOrFollow
|
2019-05-13 18:22:05 +02:00
|
|
|
where
|
2019-05-18 09:01:13 +02:00
|
|
|
stopOrFollow s@(Structure {trailer}) =
|
|
|
|
case Map.lookup (Name "Prev") trailer of
|
|
|
|
Nothing -> Right [InputStructure (findNextLine input) s]
|
2019-05-17 16:14:06 +02:00
|
|
|
Just (NumberObject (Number newStartXref)) ->
|
2019-05-16 11:01:50 +02:00
|
|
|
let offset = truncate newStartXref in
|
2019-05-16 22:41:14 +02:00
|
|
|
let startOffset = findNextSection offset (BS.drop offset input) in
|
2019-05-18 09:01:13 +02:00
|
|
|
(InputStructure startOffset s:) <$> (readStructures offset input)
|
2019-05-24 10:48:09 +02:00
|
|
|
Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v
|
2019-05-13 08:05:28 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
parseDocument :: ByteString -> Either String Document
|
2019-05-16 11:01:50 +02:00
|
|
|
parseDocument input = do
|
2019-09-24 18:32:23 +02:00
|
|
|
(pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
|
2019-05-16 11:01:50 +02:00
|
|
|
startXref <- readStartXref eolStyle input
|
2019-05-17 16:14:06 +02:00
|
|
|
structuresRead <- readStructures startXref input
|
2020-02-17 15:29:59 +01:00
|
|
|
let layers = populate input <$> structuresRead
|
|
|
|
return $ Document {pdfVersion, eolStyle, layers}
|