From 957314143faec08b4687822557dac6ac32216cb9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 20 Mar 2019 12:22:17 -0700
Subject: [PATCH] Improve pdfSize in ImageSize.

Improves fix to #4322.
---
 pandoc.cabal                 |  3 ++-
 src/Text/Pandoc/ImageSize.hs | 45 ++++++++++++++++++++----------------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/pandoc.cabal b/pandoc.cabal
index 8adcf8d47..e3b890f64 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -404,7 +404,8 @@ library
                  case-insensitive >= 1.2 && < 1.3,
                  unicode-transforms >= 0.3 && < 0.4,
                  HsYAML >= 0.1.1.1 && < 0.2,
-                 ipynb >= 0.1 && < 0.2
+                 ipynb >= 0.1 && < 0.2,
+                 attoparsec >= 0.12 && < 0.14
   if impl(ghc < 8.0)
     build-depends: semigroups == 0.18.*,
                    -- basement 0.0.8 and foundation 0.0.21, transitive
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 802959484..c5289bbc2 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -50,7 +50,9 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
 import qualified Data.Map as M
 import Control.Monad.Except
+import Control.Applicative
 import Data.Maybe (fromMaybe)
+import qualified Data.Attoparsec.ByteString.Char8 as A
 
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
@@ -267,26 +269,29 @@ epsSize img = do
 
 pdfSize :: ByteString -> Maybe ImageSize
 pdfSize img =
-  case dropWhile (\l -> not (l == "stream" ||
-                             "/MediaBox" `B.isPrefixOf` l)) (B.lines img) of
-       (x:_)
-         | "/MediaBox" `B.isPrefixOf` x
-         -> case B.words . B.takeWhile (/=']')
-                         . B.drop 1
-                         . B.dropWhile (/='[')
-                         $ x of
-                     [x1, y1, x2, y2] -> do
-                        x1' <- safeRead $ B.unpack x1
-                        x2' <- safeRead $ B.unpack x2
-                        y1' <- safeRead $ B.unpack y1
-                        y2' <- safeRead $ B.unpack y2
-                        return ImageSize{
-                            pxX  = x2' - x1'
-                          , pxY  = y2' - y1'
-                          , dpiX = 72
-                          , dpiY = 72 }
-                     _ -> mzero
-       _    -> mzero
+  case A.parseOnly pPdfSize img of
+    Left _   -> Nothing
+    Right sz -> Just sz
+
+pPdfSize :: A.Parser ImageSize
+pPdfSize = do
+  A.skipWhile (/='/')
+  A.char8 '/'
+  (do A.string "MediaBox"
+      A.char8 '['
+      [x1,y1,x2,y2] <- A.count 4 $ do
+        A.skipWhile (==' ')
+        raw <- A.many1 $ A.satisfy (\c -> isDigit c || c == '.')
+        case safeRead raw of
+          Just (r :: Double) -> return $ floor r
+          Nothing            -> mzero
+      A.char8 ']'
+      return $ ImageSize{
+              pxX  = x2 - x1
+            , pxY  = y2 - y1
+            , dpiX = 72
+            , dpiY = 72 }
+   ) <|> pPdfSize
 
 pngSize :: ByteString -> Maybe ImageSize
 pngSize img = do