Fix parsing errors forgetting to make sure there's a space after special operator arguments like names and stringObjects
This commit is contained in:
parent
32efdcdd6b
commit
b8ca7281aa
1 changed files with 22 additions and 15 deletions
|
@ -5,7 +5,7 @@ module PDF.Text {-(
|
|||
)-} where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (foldM, join)
|
||||
import Control.Monad (foldM)
|
||||
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
||||
import Control.Monad.State (get, put)
|
||||
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
||||
|
@ -18,7 +18,7 @@ import PDF.Object (
|
|||
DirectObject(..), StringObject(..)
|
||||
, array, blank, name, parseBytes, regular, stringObject
|
||||
)
|
||||
import PDF.Parser (MonadParser, Parser, evalParser, string, takeAll)
|
||||
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll)
|
||||
|
||||
data StateOperator =
|
||||
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
|
||||
|
@ -40,24 +40,22 @@ stateOperator M = (,) M <$> count 1 argument <* string "M"
|
|||
stateOperator D = (,) D <$> count 2 argument <* string "d"
|
||||
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
|
||||
stateOperator I = (,) I <$> count 1 argument <* string "i"
|
||||
stateOperator Gs = (,) Gs <$> count 1 argument <* string "gs"
|
||||
stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs"
|
||||
|
||||
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
|
||||
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
|
||||
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
|
||||
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
|
||||
textOperator Tstar = (,) Td <$> return [] <* string "T*"
|
||||
textOperator TJ =
|
||||
(,) TJ <$> sequence [Typed . Array <$> array] <* blank <* string "TJ"
|
||||
textOperator Tj =
|
||||
(,) Tj <$> sequence [Typed . StringObject <$> stringObject] <* blank <* string "Tj"
|
||||
textOperator Quote = (,) Quote <$> count 1 argument <* string "'"
|
||||
textOperator DQuote = (,) DQuote <$> count 1 argument <* string "\""
|
||||
textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ"
|
||||
textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj"
|
||||
textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'"
|
||||
textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\""
|
||||
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
|
||||
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
|
||||
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
|
||||
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
|
||||
textOperator Tf = (,) Tf <$> sequence [Typed . NameObject <$> name, argument] <* string "Tf"
|
||||
textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf"
|
||||
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
|
||||
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
|
||||
|
||||
|
@ -67,6 +65,15 @@ a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound]
|
|||
argument :: MonadParser m => m Argument
|
||||
argument = Raw <$> takeAll regular <* blank
|
||||
|
||||
arrayArg :: MonadParser m => m Argument
|
||||
arrayArg = Typed . Array <$> array <* blank
|
||||
|
||||
nameArg :: MonadParser m => m Argument
|
||||
nameArg = Typed . NameObject <$> name <* blank
|
||||
|
||||
stringArg :: MonadParser m => m Argument
|
||||
stringArg = Typed . StringObject <$> stringObject <* blank
|
||||
|
||||
data PageContents = PageContents {
|
||||
chunks :: [ByteString]
|
||||
}
|
||||
|
@ -78,20 +85,20 @@ pageContents font input =
|
|||
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
|
||||
|
||||
page :: ParserWithFont [ByteString]
|
||||
page = graphicState <|> text
|
||||
page = graphicState <|> text <?> "Text page contents"
|
||||
|
||||
graphicState :: ParserWithFont [ByteString]
|
||||
graphicState =
|
||||
string "q" *> blank *> insideQ <* string "Q"
|
||||
(string "q" *> blank *> insideQ <* blank <* string "Q") <?> "Graphic state"
|
||||
where
|
||||
insideQ = join <$> ((command <|> page) `sepBy` blank )
|
||||
insideQ = concat <$> ((command <|> page) `sepBy` blank )
|
||||
command = a stateOperator *> return []
|
||||
|
||||
text :: ParserWithFont [ByteString]
|
||||
text =
|
||||
string "BT" *> blank *> commands <* blank <* string "ET"
|
||||
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
|
||||
where
|
||||
commands = join <$> (a textOperator >>= runOperator) `sepBy` blank
|
||||
commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank
|
||||
|
||||
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
|
||||
runOperator (Tf, [Typed (NameObject fontName), _]) =
|
||||
|
|
Loading…
Reference in a new issue