Fix parsing errors forgetting to make sure there's a space after special operator arguments like names and stringObjects

This commit is contained in:
Tissevert 2019-09-28 09:25:59 +02:00
parent 32efdcdd6b
commit b8ca7281aa
1 changed files with 22 additions and 15 deletions

View File

@ -5,7 +5,7 @@ module PDF.Text {-(
)-} where
import Control.Applicative ((<|>))
import Control.Monad (foldM, join)
import Control.Monad (foldM)
import Control.Monad.Reader (ReaderT, runReaderT, asks)
import Control.Monad.State (get, put)
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
@ -18,7 +18,7 @@ import PDF.Object (
DirectObject(..), StringObject(..)
, array, blank, name, parseBytes, regular, stringObject
)
import PDF.Parser (MonadParser, Parser, evalParser, string, takeAll)
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll)
data StateOperator =
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
@ -40,24 +40,22 @@ stateOperator M = (,) M <$> count 1 argument <* string "M"
stateOperator D = (,) D <$> count 2 argument <* string "d"
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
stateOperator I = (,) I <$> count 1 argument <* string "i"
stateOperator Gs = (,) Gs <$> count 1 argument <* string "gs"
stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs"
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
textOperator Tstar = (,) Td <$> return [] <* string "T*"
textOperator TJ =
(,) TJ <$> sequence [Typed . Array <$> array] <* blank <* string "TJ"
textOperator Tj =
(,) Tj <$> sequence [Typed . StringObject <$> stringObject] <* blank <* string "Tj"
textOperator Quote = (,) Quote <$> count 1 argument <* string "'"
textOperator DQuote = (,) DQuote <$> count 1 argument <* string "\""
textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ"
textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj"
textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'"
textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\""
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
textOperator Tf = (,) Tf <$> sequence [Typed . NameObject <$> name, argument] <* string "Tf"
textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf"
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
@ -67,6 +65,15 @@ a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound]
argument :: MonadParser m => m Argument
argument = Raw <$> takeAll regular <* blank
arrayArg :: MonadParser m => m Argument
arrayArg = Typed . Array <$> array <* blank
nameArg :: MonadParser m => m Argument
nameArg = Typed . NameObject <$> name <* blank
stringArg :: MonadParser m => m Argument
stringArg = Typed . StringObject <$> stringObject <* blank
data PageContents = PageContents {
chunks :: [ByteString]
}
@ -78,20 +85,20 @@ pageContents font input =
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
page :: ParserWithFont [ByteString]
page = graphicState <|> text
page = graphicState <|> text <?> "Text page contents"
graphicState :: ParserWithFont [ByteString]
graphicState =
string "q" *> blank *> insideQ <* string "Q"
(string "q" *> blank *> insideQ <* blank <* string "Q") <?> "Graphic state"
where
insideQ = join <$> ((command <|> page) `sepBy` blank )
insideQ = concat <$> ((command <|> page) `sepBy` blank )
command = a stateOperator *> return []
text :: ParserWithFont [ByteString]
text =
string "BT" *> blank *> commands <* blank <* string "ET"
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
where
commands = join <$> (a textOperator >>= runOperator) `sepBy` blank
commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
runOperator (Tf, [Typed (NameObject fontName), _]) =