Hufflepdf/src/PDF.hs

104 lines
3.8 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module PDF (
Document(..)
, parseDocument
, render
) where
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (
drop, findIndex, head, isPrefixOf, last, length, span, unpack
)
import Data.ByteString.Char8.Util (previous, subBS)
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.Map as Map (lookup)
import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (
Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, line)
import PDF.Output (Output(..))
import PDF.Parser (Parser, evalParser, string, takeAll)
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
, eolStyle :: EOL.Style
, updates :: [Content]
} deriving Show
instance Output Document where
output (Document {pdfVersion, updates}) =
Output.line (printf "%%PDF-%s" pdfVersion)
`mappend` output updates
render :: Document -> Lazy.ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
version :: Parser () String
version = BS.unpack <$>
(string magicNumber *> takeAll (not . (`elem` EOL.charset)))
check :: Bool -> String -> Either String ()
check test errorMessage = if test then return () else Left errorMessage
readStartXref :: EOL.Style -> ByteString -> Either String Int
readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where
(eolOffset, eolLastByte) = case eolStyle of
EOL.CRLF -> (2, '\n')
EOL.CR -> (1, '\r')
_ -> (1, '\n')
eofMarkerPosition =
BS.length input - BS.length eofMarker
- if BS.last input == BS.last eofMarker then 0 else eolOffset
startXrefPosition =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
findNextLine :: ByteString -> Int
findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where
notInEol = not . (`elem` EOL.charset)
findNextSection :: Int -> ByteString -> Int
findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0
Just delta ->
let newInput = BS.drop delta input in
let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
readStructures :: Int -> ByteString -> Either String [InputStructure]
readStructures startXref input =
evalParser structure () (BS.drop startXref input) >>= stopOrFollow
where
stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") trailer of
Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in
(InputStructure startOffset s:) <$> (readStructures offset input)
Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either String Document
parseDocument input = do
(pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input
let updates = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, updates}