Fix parsing errors forgetting to make sure there's a space after special operator arguments like names and stringObjects
This commit is contained in:
parent
32efdcdd6b
commit
b8ca7281aa
1 changed files with 22 additions and 15 deletions
|
@ -5,7 +5,7 @@ module PDF.Text {-(
|
||||||
)-} where
|
)-} where
|
||||||
|
|
||||||
import Control.Applicative ((<|>))
|
import Control.Applicative ((<|>))
|
||||||
import Control.Monad (foldM, join)
|
import Control.Monad (foldM)
|
||||||
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
||||||
import Control.Monad.State (get, put)
|
import Control.Monad.State (get, put)
|
||||||
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
||||||
|
@ -18,7 +18,7 @@ import PDF.Object (
|
||||||
DirectObject(..), StringObject(..)
|
DirectObject(..), StringObject(..)
|
||||||
, array, blank, name, parseBytes, regular, stringObject
|
, array, blank, name, parseBytes, regular, stringObject
|
||||||
)
|
)
|
||||||
import PDF.Parser (MonadParser, Parser, evalParser, string, takeAll)
|
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll)
|
||||||
|
|
||||||
data StateOperator =
|
data StateOperator =
|
||||||
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
|
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
|
||||||
|
@ -40,24 +40,22 @@ stateOperator M = (,) M <$> count 1 argument <* string "M"
|
||||||
stateOperator D = (,) D <$> count 2 argument <* string "d"
|
stateOperator D = (,) D <$> count 2 argument <* string "d"
|
||||||
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
|
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
|
||||||
stateOperator I = (,) I <$> count 1 argument <* string "i"
|
stateOperator I = (,) I <$> count 1 argument <* string "i"
|
||||||
stateOperator Gs = (,) Gs <$> count 1 argument <* string "gs"
|
stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs"
|
||||||
|
|
||||||
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
|
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
|
||||||
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
|
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
|
||||||
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
|
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
|
||||||
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
|
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
|
||||||
textOperator Tstar = (,) Td <$> return [] <* string "T*"
|
textOperator Tstar = (,) Td <$> return [] <* string "T*"
|
||||||
textOperator TJ =
|
textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ"
|
||||||
(,) TJ <$> sequence [Typed . Array <$> array] <* blank <* string "TJ"
|
textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj"
|
||||||
textOperator Tj =
|
textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'"
|
||||||
(,) Tj <$> sequence [Typed . StringObject <$> stringObject] <* blank <* string "Tj"
|
textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\""
|
||||||
textOperator Quote = (,) Quote <$> count 1 argument <* string "'"
|
|
||||||
textOperator DQuote = (,) DQuote <$> count 1 argument <* string "\""
|
|
||||||
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
|
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
|
||||||
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
|
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
|
||||||
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
|
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
|
||||||
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
|
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
|
||||||
textOperator Tf = (,) Tf <$> sequence [Typed . NameObject <$> name, argument] <* string "Tf"
|
textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf"
|
||||||
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
|
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
|
||||||
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
|
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
|
||||||
|
|
||||||
|
@ -67,6 +65,15 @@ a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound]
|
||||||
argument :: MonadParser m => m Argument
|
argument :: MonadParser m => m Argument
|
||||||
argument = Raw <$> takeAll regular <* blank
|
argument = Raw <$> takeAll regular <* blank
|
||||||
|
|
||||||
|
arrayArg :: MonadParser m => m Argument
|
||||||
|
arrayArg = Typed . Array <$> array <* blank
|
||||||
|
|
||||||
|
nameArg :: MonadParser m => m Argument
|
||||||
|
nameArg = Typed . NameObject <$> name <* blank
|
||||||
|
|
||||||
|
stringArg :: MonadParser m => m Argument
|
||||||
|
stringArg = Typed . StringObject <$> stringObject <* blank
|
||||||
|
|
||||||
data PageContents = PageContents {
|
data PageContents = PageContents {
|
||||||
chunks :: [ByteString]
|
chunks :: [ByteString]
|
||||||
}
|
}
|
||||||
|
@ -78,20 +85,20 @@ pageContents font input =
|
||||||
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
|
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
|
||||||
|
|
||||||
page :: ParserWithFont [ByteString]
|
page :: ParserWithFont [ByteString]
|
||||||
page = graphicState <|> text
|
page = graphicState <|> text <?> "Text page contents"
|
||||||
|
|
||||||
graphicState :: ParserWithFont [ByteString]
|
graphicState :: ParserWithFont [ByteString]
|
||||||
graphicState =
|
graphicState =
|
||||||
string "q" *> blank *> insideQ <* string "Q"
|
(string "q" *> blank *> insideQ <* blank <* string "Q") <?> "Graphic state"
|
||||||
where
|
where
|
||||||
insideQ = join <$> ((command <|> page) `sepBy` blank )
|
insideQ = concat <$> ((command <|> page) `sepBy` blank )
|
||||||
command = a stateOperator *> return []
|
command = a stateOperator *> return []
|
||||||
|
|
||||||
text :: ParserWithFont [ByteString]
|
text :: ParserWithFont [ByteString]
|
||||||
text =
|
text =
|
||||||
string "BT" *> blank *> commands <* blank <* string "ET"
|
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
|
||||||
where
|
where
|
||||||
commands = join <$> (a textOperator >>= runOperator) `sepBy` blank
|
commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank
|
||||||
|
|
||||||
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
|
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
|
||||||
runOperator (Tf, [Typed (NameObject fontName), _]) =
|
runOperator (Tf, [Typed (NameObject fontName), _]) =
|
||||||
|
|
Loading…
Reference in a new issue