Go strict ByteStrings with attoparsec

2019-05-24 10:48:09 +02:00 · 2019-05-24 10:48:09 +02:00 · 11cb6504d7
parent 0daa03d958
commit 11cb6504d7
10 changed files with 237 additions and 134 deletions
--- a/ChangeLog.md
+++ b/ChangeLog.md
@ -1,5 +1,10 @@
 # Revision history for Hufflepdf
 ## 0.1.1.0  -- 2019-05-23
 * Rewrite the parser using attoparsec instead of parsec (about 1.5x faster)
 * Use strict ByteStrings instead of lazy ones for an additional gain in performance, simpler type interface (lazy ByteStrings are now used only for output because that's required by the ByteString Builder) and fewer import lines
 ## 0.1.0.0  -- 2019-05-18
 * First version. Released on an unsuspecting world.
--- a/Hufflepdf.cabal
+++ b/Hufflepdf.cabal
@ -17,17 +17,18 @@ cabal-version:       >=1.10
 library
  exposed-modules:     PDF
                     , PDF.Object
  other-modules:       PDF.Body
                     , PDF.EOL
                     , PDF.Object
                     , PDF.Output
-                     , Data.ByteString.Lazy.Char8.Util
+  other-modules:       Data.ByteString.Char8.Util
                     , PDF.Body
                     , PDF.Parser
  -- other-extensions:    
-  build-depends:       base >=4.9 && <4.13
+  build-depends:       attoparsec
                     , base >=4.9 && <4.13
                     , bytestring
                     , containers
                     , mtl
                     , parsec
  hs-source-dirs:      src
  ghc-options:         -Wall
  default-language:    Haskell2010
--- a/src/Data/ByteString/Char8/Util.hs
+++ b/src/Data/ByteString/Char8/Util.hs
@ -0,0 +1,16 @@
 module Data.ByteString.Char8.Util (
      previous
    , subBS
  ) where
 import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (drop, index, take)
 import Prelude hiding (length)
 previous :: Char -> Int -> ByteString -> Int
 previous char position byteString
  | BS.index byteString position == char = position
  | otherwise = previous char (position - 1) byteString
 subBS :: Int -> Int -> ByteString -> ByteString
 subBS offset length = BS.take length . BS.drop offset
--- a/src/Data/ByteString/Lazy/Char8/Util.hs
+++ b/src/Data/ByteString/Lazy/Char8/Util.hs
@ -1,17 +0,0 @@
 module Data.ByteString.Lazy.Char8.Util (
      previous
    , subBS
  ) where
 import Data.ByteString.Lazy.Char8 (ByteString)
 import qualified Data.ByteString.Lazy.Char8 as BS (drop, index, take)
 import Data.Int (Int64)
 import Prelude hiding (length)
 previous :: Char -> Int64 -> ByteString -> Int64
 previous char position byteString
  | BS.index byteString position == char = position
  | otherwise = previous char (position - 1) byteString
 subBS :: Int64 -> Int64 -> ByteString -> ByteString
 subBS offset length = BS.take length . BS.drop offset
--- a/src/PDF.hs
+++ b/src/PDF.hs
@ -5,12 +5,12 @@ module PDF (
    , render
  ) where
-import Data.ByteString.Lazy.Char8 (ByteString)
+import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Lazy.Char8 as BS (
+import qualified Data.ByteString.Char8 as BS (
    drop, findIndex, head, isPrefixOf, last, length, span, unpack
  )
-import Data.ByteString.Lazy.Char8.Util (previous, subBS)
+import Data.ByteString.Char8.Util (previous, subBS)
-import Data.Int (Int64)
+import qualified Data.ByteString.Lazy as Lazy (ByteString)
 import qualified Data.Map as Map (lookup)
 import PDF.Body (populate)
 import qualified PDF.EOL as EOL (Style(..), charset, parser)
@ -21,10 +21,7 @@ import PDF.Object (
  )
 import qualified PDF.Output as Output (render, string)
 import PDF.Output (Output(..), nextLine)
-import Text.Parsec
+import PDF.Parser (Parser, runParser, string, takeAll)
 import Text.Parsec.ByteString.Lazy (Parser)
 import Text.Parsec.Pos (newPos)
 import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
 import Text.Printf (printf)
 data Document = Document {
@ -38,20 +35,17 @@ instance Output Document where
      Output.string (printf "%%PDF-%s" pdfVersion)
    `nextLine` output contents
-render :: Document -> ByteString
+render :: Document -> Lazy.ByteString
 render document@(Document {eolStyle}) = Output.render eolStyle document
-version :: Parser String
+version :: Parser () String
-version = string magicNumber *> many (noneOf EOL.charset)
+version = BS.unpack <$>
  (string magicNumber *> takeAll (not . (`elem` EOL.charset)))
-parseError :: String -> Either ParseError a
+check :: Bool -> String -> Either String ()
-parseError errorMessage =
+check test errorMessage = if test then return () else Left errorMessage
  Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0)
-check :: Bool -> String -> Either ParseError ()
+readStartXref :: EOL.Style -> ByteString -> Either String Int
 check test errorMessage = if test then return () else parseError errorMessage
 readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64
 readStartXref eolStyle input =
  check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
    "Badly formed document : missing EOF marker at the end"
@ -68,7 +62,7 @@ readStartXref eolStyle input =
      previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
    startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
-findNextLine :: ByteString -> Int64
+findNextLine :: ByteString -> Int
 findNextLine input =
  let (line, eolPrefixed) = BS.span notInEol input in
  let nextNotInEol = BS.findIndex notInEol eolPrefixed in
@ -76,7 +70,7 @@ findNextLine input =
  where
    notInEol = not . (`elem` EOL.charset)
-findNextSection :: Int64 -> ByteString -> Int64
+findNextSection :: Int -> ByteString -> Int
 findNextSection offset input =
  case BS.findIndex (== BS.head eofMarker) input of
    Nothing -> 0
@ -87,9 +81,9 @@ findNextSection offset input =
      then newOffset + findNextLine newInput
      else findNextSection (newOffset + 1) (BS.drop 1 newInput)
-readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure]
+readStructures :: Int -> ByteString -> Either String [InputStructure]
 readStructures startXref input =
-  parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
+  runParser structure () (BS.drop startXref input) >>= stopOrFollow
  where
    stopOrFollow s@(Structure {trailer}) =
      case Map.lookup (Name "Prev") trailer of
@ -98,11 +92,11 @@ readStructures startXref input =
          let offset = truncate newStartXref in
          let startOffset = findNextSection offset (BS.drop offset input) in
          (InputStructure startOffset s:) <$> (readStructures offset input)
-        Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
+        Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v
-parseDocument :: ByteString -> Either ParseError Document
+parseDocument :: ByteString -> Either String Document
 parseDocument input = do
-  (pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
+  (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
  startXref <- readStartXref eolStyle input
  structuresRead <- readStructures startXref input
  let contents = populate input <$> structuresRead
--- a/src/PDF/Body.hs
+++ b/src/PDF/Body.hs
@ -1,21 +1,23 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 module PDF.Body (
    populate
  ) where
-import Data.ByteString.Lazy.Char8 (ByteString)
+import Control.Applicative ((<|>))
-import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
+import Control.Monad.State (get, gets, modify)
-import Data.Int (Int64)
+import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (drop, unpack)
 import Data.Map ((!))
 import qualified Data.Map as Map (empty, insert, lookup)
 import qualified PDF.EOL as EOL (charset, parser)
 import PDF.Object (
      Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
    , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
-    , Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
+    , Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
    , blank, dictionary, directObject, integer, line
  )
-import Text.Parsec
+import PDF.Parser (Parser, block, char, on, runParser, takeAll)
 data UserState = UserState {
      input :: ByteString
@ -26,7 +28,7 @@ data UserState = UserState {
 type SParser = Parser UserState
 modifyFlow :: (Flow -> Flow) -> SParser ()
-modifyFlow f = modifyState $ \state -> state {flow = f $ flow state}
+modifyFlow f = modify $ \state -> state {flow = f $ flow state}
 addObject :: Int -> Object -> SParser ()
 addObject objectId newObject = modifyFlow $ \flow -> flow {
@ -39,9 +41,10 @@ pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
  }
 comment :: Parser u String
-comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
+comment = BS.unpack <$>
  (char '%' *> takeAll (not . (`elem` EOL.charset)) <* EOL.parser)
-lookupOffset :: Int -> XRefSection -> Maybe Int64
+lookupOffset :: Int -> XRefSection -> Maybe Int
 lookupOffset _ [] = Nothing
 lookupOffset objectId (xrefSubSection:others) =
  let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
@ -53,25 +56,20 @@ lookupOffset objectId (xrefSubSection:others) =
      _ -> Nothing
  else lookupOffset objectId others
-getOffset :: Int -> SParser Int64
+getOffset :: Int -> SParser Int
 getOffset objectId = do
-  table <- xreferences <$> getState
+  table <- gets xreferences
  case lookupOffset objectId table of
    Nothing -> fail $
      "obj " ++ show objectId ++ " is referenced but missing in XRef table"
    Just offset -> return offset
 on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
 on parser input = do
  originalInput <- getInput
  setInput input >> parser <* setInput originalInput
 loadNumber :: Int -> SParser Float
 loadNumber objectId = do
  offset <- getOffset objectId
-  objectStart <- BS.drop offset . input <$> getState
+  objectStart <- BS.drop offset <$> gets input
-  indirectObjCoordinates `on` objectStart >> return ()
+  indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
-  objectValue <- (!objectId) . tmpObjects . flow <$> getState
+  objectValue <- (!objectId) . tmpObjects <$> gets flow
  case objectValue of
    Direct (NumberObject (Number n)) -> return n
    obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
@ -83,7 +81,7 @@ getSize :: Maybe DirectObject -> SParser Float
 getSize Nothing = fail "Missing '/Length' key on stream"
 getSize (Just (NumberObject (Number size))) = return size
 getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
-  Flow {tmpObjects} <- flow <$> getState
+  Flow {tmpObjects} <- gets flow
  case Map.lookup objectId tmpObjects of
    Nothing -> loadNumber objectId
    Just (Direct (NumberObject (Number size))) -> return size
@ -92,13 +90,13 @@ getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
 getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
 streamObject :: SParser Object
-streamObject = try $ do
+streamObject = do
  header <- dictionary <* blank
  size <- getSize (Map.lookup (Name "Length") header)
-  streamContent <- BS.pack <$> stream (truncate size)
+  streamContent <- stream (truncate size)
  return $ Stream {header, streamContent}
  where
-    stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
+    stream size = line "stream" *> block size <* blank <* line "endstream"
 object :: SParser Object
 object = streamObject <|> Direct <$> directObject
@ -117,7 +115,7 @@ occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
 populate :: ByteString -> InputStructure -> Content
 populate input structure =
  let bodyInput = BS.drop (startOffset structure) input in
-  case runParser recurseOnOccurrences initialState "" bodyInput of
+  case runParser recurseOnOccurrences initialState bodyInput of
    Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
    Right finalState ->
      let Flow {occurrencesStack, tmpObjects} = flow finalState in
@ -135,4 +133,4 @@ populate input structure =
 recurseOnOccurrences :: SParser UserState
 recurseOnOccurrences =
-  (occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState
+  (occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> get
--- a/src/PDF/EOL.hs
+++ b/src/PDF/EOL.hs
@ -1,19 +1,20 @@
 {-# LANGUAGE OverloadedStrings #-}
 module PDF.EOL (
      Style(..)
    , charset
    , parser
  ) where
-import Data.ByteString.Lazy.Char8 (ByteString)
+import Control.Applicative ((<|>))
-import Text.Parsec ((<|>), Parsec, string, try)
+import PDF.Parser (Parser, string)
 data Style = CR | LF | CRLF deriving Show
 charset :: String
 charset = "\r\n"
-parser :: Parsec ByteString u Style
+parser :: Parser s Style
 parser =
-    try (string "\r\n" >> return CRLF)
+    (string "\r\n" >> return CRLF)
  <|> (string "\r" >> return CR)
  <|> (string "\n" >> return LF)
--- a/src/PDF/Object.hs
+++ b/src/PDF/Object.hs
@ -11,7 +11,6 @@ module PDF.Object (
    , Number(..)
    , Object(..)
    , Occurrence(..)
    , Parser
    , Structure(..)
    , XRefEntry(..)
    , XRefSection
@ -26,8 +25,11 @@ module PDF.Object (
    , structure
  ) where
-import Data.ByteString.Lazy.Char8 (ByteString)
+import Control.Applicative ((<|>))
-import Data.Int (Int64)
+import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (
    concat, cons, pack, singleton, unpack
  )
 import Data.Map (Map, (!), mapWithKey)
 import qualified Data.Map as Map (elems, fromList, toList)
 import qualified PDF.EOL as EOL (charset, parser)
@ -36,15 +38,17 @@ import PDF.Output (
      OBuilder, Offset(..), Output(..)
    , byteString, getOffsets, join, newLine, nextLine, saveOffset
  )
-import Text.Parsec
+import PDF.Parser (
      Parser, (<?>)
    , char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
    , sepBy, string, takeAll, takeAll1
  )
 import Text.Printf (printf)
 type Parser u = Parsec ByteString u
 line :: String -> Parser u ()
-line l = string l *> EOL.parser *> return ()
+line l = string (BS.pack l) *> EOL.parser *> return () <?> printf "line «%s»" l
-magicNumber :: String
+magicNumber :: ByteString
 magicNumber = "%PDF-"
 eofMarker :: ByteString
@ -53,20 +57,17 @@ eofMarker = "%%EOF"
 whiteSpaceCharset :: String
 whiteSpaceCharset = "\0\t\12 "
 whiteSpace :: Parser u ()
 whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return ()
 blank :: Parser u ()
-blank = skipMany whiteSpace
+blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
 delimiterCharset :: String
 delimiterCharset = "()<>[]{}/%"
-regular :: Parser u Char
+regular :: Char -> Bool
-regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
+regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
 integer :: (Read a, Num a) => Parser u a
-integer = read <$> many1 digit <* whiteSpace
+integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
 -------------------------------------
 --          OBJECTS
@ -76,7 +77,8 @@ integer = read <$> many1 digit <* whiteSpace
 -- Boolean
 --
 boolean :: Parser u Bool
-boolean = (string "true" *> return True) <|> (string "false" *> return False)
+boolean =
  (string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
 --
 -- Number
@ -90,12 +92,13 @@ instance Output Number where
      _ -> printf "%f" f
 number :: Parser u Number
-number = Number . read <$>
+number = Number . read . BS.unpack <$>
-  (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
+  (mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
  <?> "number"
  where
    sign = string "-" <|> option "" (char '+' >> return "")
-    integerPart = mappend <$> many1 digit <*> option "" floatPart
+    integerPart = mappend <$> decNumber <*> option "" floatPart
-    floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
+    floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
 --
 -- StringObject
@ -108,14 +111,18 @@ instance Output StringObject where
 stringObject :: Parser u StringObject
 stringObject =
-    Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
+    Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
-  <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
+  <|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
  <?> "string object (literal or hexadecimal)"
  where
-    literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
+    literalString = many literalStringBlock
    literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
    normalChar = not . (`elem` ("\\()" :: String))
    matchingParenthesis =
-      (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
+      mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
-    escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
+    escapedChar =
-    octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
+      BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
    octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
 --
 -- Name
@ -126,13 +133,14 @@ instance Output Name where
  output (Name n) = "/" `mappend` Output.string n
 name :: Parser u Name
-name = Name <$> (char '/' *> many regular)
+name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
 --
 -- Array
 --
 array :: Parser u [DirectObject]
-array = char '[' *> blank *> directObject `endBy` blank <* char ']'
+array =
  char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
 --
 -- Dictionary
@ -149,16 +157,16 @@ instance Output Dictionary where
 dictionary :: Parser u Dictionary
 dictionary =
-  try (string "<<" *> blank *> keyValPairs <* string ">>")
+  string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
  where
    keyVal = (,) <$> name <* blank <*> directObject
-    keyValPairs = Map.fromList <$> keyVal `endBy` blank
+    keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
 --
 -- Null
 --
 nullObject :: Parser u ()
-nullObject = string "null" *> return ()
+nullObject = string "null" *> return () <?> "null object"
 --
 -- Reference
@ -169,7 +177,7 @@ data IndirectObjCoordinates = IndirectObjCoordinates {
  } deriving Show
 reference :: Parser u IndirectObjCoordinates
-reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
+reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
 --
 -- DirectObject
@ -198,14 +206,15 @@ instance Output DirectObject where
 directObject :: Parser u DirectObject
 directObject =
-    Boolean <$> try boolean
+    Boolean <$> boolean
-  <|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
+  <|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
-  <|> NumberObject <$> try number
+  <|> NumberObject <$> number
-  <|> StringObject <$> try stringObject
+  <|> StringObject <$> stringObject
-  <|> NameObject <$> try name
+  <|> NameObject <$> name
-  <|> Array <$> try array
+  <|> Array <$> array
-  <|> Dictionary <$> try dictionary
+  <|> Dictionary <$> dictionary
-  <|> const Null <$> try nullObject
+  <|> const Null <$> nullObject
  <?> "direct object"
 --
 -- Object
@ -226,7 +235,6 @@ instance Output Object where
    `nextLine` byteString streamContent
    `mappend` "endstream"
 --
 -- Occurrence
 --
@ -249,10 +257,10 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
 -- XRefEntry
 --
 data XRefEntry = InUse {
-      offset :: Int64
+      offset :: Int
    , generation :: Int
  } | Free {
-      nextFree :: Int64
+      nextFree :: Int
    , generation :: Int
  } deriving Show
@ -265,14 +273,13 @@ instance Output XRefEntry where
 entry :: Parser u XRefEntry
 entry = do
  (big, small) <- (,) <$> integer <*> integer
-  (inUse big small <|> free big small) <* blank
+  (inUse big small <|> free big small <?> "XRef entry") <* blank
  where
-    inUse :: Int64 -> Int -> Parser u XRefEntry
+    inUse :: Int -> Int -> Parser u XRefEntry
    inUse offset generation = char 'n' *> return (InUse {offset, generation})
-    free :: Int64 -> Int -> Parser u XRefEntry
+    free :: Int -> Int -> Parser u XRefEntry
    free nextFree generation = char 'f' *> return (Free {nextFree, generation})
 --
 -- XRefSubSection
 --
@ -289,7 +296,7 @@ instance Output XRefSubSection where
 xrefSubSection :: Parser u XRefSubSection
 xrefSubSection = do
-  (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
+  (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
  entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
  return $ XRefSubSection {firstObjectId, entriesNumber, entries}
@ -299,7 +306,7 @@ type XRefSection = [XRefSubSection]
 -- Structure
 --
 data InputStructure = InputStructure {
-      startOffset :: Int64
+      startOffset :: Int
    , inputStructure :: Structure
  }
@ -314,7 +321,7 @@ structure =
  <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
  <*> (line "trailer" *> dictionary <* EOL.parser)
-updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64)
+updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
 updateXrefs xrefSection offsets = (
      updateSubSection <$> xrefSection
    , offsets ! StartXRef
--- a/src/PDF/Output.hs
+++ b/src/PDF/Output.hs
@ -11,7 +11,6 @@ module PDF.Output (
    , char
    , getOffsets
    , join
    , lift
    , newLine
    , nextLine
    , saveOffset
@ -19,10 +18,11 @@ module PDF.Output (
    , render
  ) where
-import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
+import Data.ByteString.Builder (Builder, char8, string8, toLazyByteString)
-import Data.ByteString.Lazy.Char8 (ByteString)
+import qualified Data.ByteString.Builder as B (byteString)
-import qualified Data.ByteString.Lazy.Char8 as BS (length)
+import Data.ByteString (ByteString)
-import Data.Int (Int64)
+import qualified Data.ByteString as BS (length)
 import qualified Data.ByteString.Lazy as Lazy (ByteString)
 import Data.Map (Map)
 import qualified Data.Map as Map (singleton)
 import Data.String (IsString(..))
@ -31,7 +31,7 @@ import qualified PDF.EOL as EOL (Style(..))
 data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
-newtype OContext a = OContext (RWS EOL.Style (Map Offset Int64) Int64 a)
+newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
 type OBuilder = OContext Builder
 instance Functor OContext where
@ -51,7 +51,7 @@ saveOffset offset = OContext $
 lift :: (a -> Builder) -> a -> OBuilder
 lift f a = return (f a)
-getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int64)
+getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
 getOffsets (OContext builder) =
  OContext (listen builder >>= \(a, w) -> return (return a, w))
@ -108,9 +108,9 @@ string :: String -> OBuilder
 string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
 byteString :: ByteString -> OBuilder
-byteString bs = lift lazyByteString bs <* OContext (modify (+ BS.length bs))
+byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
-render :: Output a => EOL.Style -> a -> ByteString
+render :: Output a => EOL.Style -> a -> Lazy.ByteString
 render eolStyle a =
  let OContext builder = output a in
  let (outputByteString, _, _) = runRWS builder eolStyle 0 in
--- a/src/PDF/Parser.hs
+++ b/src/PDF/Parser.hs
@ -0,0 +1,98 @@
 module PDF.Parser (
      Parser
    , (<?>)
    , block
    , char
    , choice
    , count
    , decNumber
    , hexNumber
    , many
    , octDigit
    , on
    , oneOf
    , option
    , runParser
    , sepBy
    , string
    , takeAll
    , takeAll1
  ) where
 import Control.Applicative ((<|>), empty)
 import Control.Monad.State (StateT(..), evalStateT)
 import Control.Monad.Trans (lift)
 import qualified Data.Attoparsec.ByteString.Char8 as Atto (
    Parser, char, parseOnly, satisfy, string, take, takeWhile, takeWhile1
  )
 import Data.ByteString (ByteString)
 import Data.Char (toLower)
 import Data.Set (Set)
 import qualified Data.Set as Set (fromList, member, unions)
 type Parser s = StateT s Atto.Parser
 (<?>) :: Parser s a -> String -> Parser s a
 (<?>) parser debugMessage = parser <|> fail debugMessage
 block :: Int -> Parser s ByteString
 block = lift . Atto.take
 char :: Char -> Parser s Char
 char = lift . Atto.char
 choice :: [Parser s a] -> Parser s a
 choice = foldr (<|>) empty
 count :: Int -> Parser s a -> Parser s [a]
 count 0 _ = return []
 count n p = (:) <$> p <*> count (n-1) p
 decNumber :: Parser s ByteString
 decNumber = lift $ Atto.takeWhile1 (`Set.member` digits)
 digits :: Set Char
 digits = Set.fromList ['0'..'9']
 hexDigits :: Set Char
 hexDigits = Set.unions [digits, Set.fromList af, Set.fromList $ toLower <$> af]
  where
    af = ['A'..'F']
 hexNumber :: Parser s ByteString
 hexNumber = lift $ Atto.takeWhile1 (`Set.member` hexDigits)
 many :: Parser s a -> Parser s [a]
 many parser = (:) <$> parser <*> many parser <|> return []
 octDigit :: Parser s Char
 octDigit = oneOf ['0'..'7']
 on :: Parser s a -> ByteString -> Parser s (Either String a)
 on (StateT parserF) input = StateT $ \state ->
  case Atto.parseOnly (parserF state) input of
    Left errorMsg -> return (Left errorMsg, state)
    Right (result, newState) -> return (Right result, newState)
 oneOf :: String -> Parser s Char
 oneOf charSet = lift $ Atto.satisfy (`elem` charSet)
 option :: a -> Parser s a -> Parser s a
 option defaultValue p = p <|> pure defaultValue
 runParser :: Parser s a -> s -> ByteString -> Either String a
 runParser parser initState =
  Atto.parseOnly (evalStateT parser initState)
 sepBy :: Parser s a -> Parser s b -> Parser s [a]
 sepBy parser separator =
  option [] $ (:) <$> parser <*> many (separator *> parser)
 string :: ByteString -> Parser s ByteString
 string = lift . Atto.string
 takeAll :: (Char -> Bool) -> Parser s ByteString
 takeAll = lift . Atto.takeWhile
 takeAll1 :: (Char -> Bool) -> Parser s ByteString
 takeAll1 = lift . Atto.takeWhile1