From 2dd1cde7158a032e58618c96543ce7db2fa1aa47 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 30 Dec 2021 16:53:34 +0100
Subject: [PATCH] Lua: allow binary (byte string) readers to be used with
 `pandoc.read`

---
 pandoc.cabal                         |   1 +
 src/Text/Pandoc/Lua/Module/Pandoc.hs |  14 +++++++-------
 test/lua/module/pandoc.lua           |   8 ++++++++
 test/lua/module/tiny.epub            | Bin 0 -> 3097 bytes
 4 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 test/lua/module/tiny.epub

diff --git a/pandoc.cabal b/pandoc.cabal
index cd948d253..1c6f3fbd8 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -400,6 +400,7 @@ extra-source-files:
                  test/odt/native/*.native
                  test/lua/*.lua
                  test/lua/module/*.lua
+                 test/lua/module/tiny.epub
 source-repository head
   type:          git
   location:      git://github.com/jgm/pandoc.git
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 20c2f5af5..d2d74aaa8 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -21,7 +21,6 @@ module Text.Pandoc.Lua.Module.Pandoc
 import Prelude hiding (read)
 import Control.Monad (forM_, when)
 import Control.Monad.Catch (catch, throwM)
-import Control.Monad.Except (throwError)
 import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr)
 import Data.Default (Default (..))
 import Data.Maybe (fromMaybe)
@@ -47,6 +46,7 @@ import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BSL
 import qualified Data.Text as T
 import qualified Text.Pandoc.Lua.Util as LuaUtil
+import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Error
 
 -- | Push the "pandoc" package to the Lua stack. Requires the `List`
@@ -170,12 +170,12 @@ functions =
   , defun "read"
     ### (\content mformatspec mreaderOptions -> do
             let formatSpec = fromMaybe "markdown" mformatspec
-                readerOptions = fromMaybe def mreaderOptions
+                readerOpts = fromMaybe def mreaderOptions
             res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case
-              (TextReader r, es) -> r readerOptions{ readerExtensions = es }
-                                      content
-              _ -> throwError $ PandocSomeError
-                   "Only textual formats are supported"
+              (TextReader r, es)      ->
+                r readerOpts{ readerExtensions = es } (UTF8.toText content)
+              (ByteStringReader r, es) ->
+                r readerOpts{ readerExtensions = es } (BSL.fromStrict content)
             case res of
               Right pd -> return pd -- success, got a Pandoc document
               Left  (PandocUnknownReaderError f) ->
@@ -185,7 +185,7 @@ functions =
                 "Extension " <> e <> " not supported for " <> f
               Left e ->
                 throwM e)
-    <#> parameter peekText "string" "content" "text to parse"
+    <#> parameter peekByteString "string" "content" "text to parse"
     <#> optionalParameter peekText "string" "formatspec" "format and extensions"
     <#> optionalParameter peekReaderOptions "ReaderOptions" "reader_options"
           "reader options"
diff --git a/test/lua/module/pandoc.lua b/test/lua/module/pandoc.lua
index 892ffee03..397182438 100644
--- a/test/lua/module/pandoc.lua
+++ b/test/lua/module/pandoc.lua
@@ -279,6 +279,14 @@ return {
         pandoc.read(indented_code, 'markdown', {indented_code_classes={'foo'}})
       )
     end),
+    test('can read epub', function ()
+      local epub = io.open('lua/module/tiny.epub', 'rb')
+      local blocks = pandoc.read(epub:read'a', 'epub').blocks
+      assert.are_equal(
+        blocks[#blocks],
+        pandoc.Para { pandoc.Emph 'EPUB' }
+      )
+    end),
     test('failing read', function ()
       assert.error_matches(
         function () pandoc.read('foo', 'nosuchreader') end,
diff --git a/test/lua/module/tiny.epub b/test/lua/module/tiny.epub
new file mode 100644
index 0000000000000000000000000000000000000000..9e92202b7cbbc1e28d0409956a9c88de38a75c7c
GIT binary patch
literal 3097
zcmdT`O>f*p7~VD%XoP5yDkP9V#vFrp_JeG9jn`EvsRX5ErECr$A(*wtS*!lS9w(bh
zNc11@2k41YkvR1R5?nxBxghZmxN+f9ArQ|y_S#Nxq;O;-$79d?;rE&6d1q#~HlEn*
z(C_2V9_+^atJkhzOxM%6LO&GpC=onK0^i}%k0U0Me6e-UPiB?p-G>i$VO;$#V6-#a
zdy~~B@4)2K=<(;TZge_yU0$0waU?|~yK&-K*H&Sm9f&kTnyKFJ_H<1|PVD;8{#4(a
z-!#VhHG9)axO129i`sHXmhg?TslJplF&R5NJnZ^z;&tP6pACAws|*QrEsy+9a$)!`
zQu>}R(y316;DyoG5gOHUmrGtmGhL?&nB-}og1HV8fd~;GWBqQQ>9*xMW*t%U-D5%X
zG&1wtcg?HbWGDucp)nW@$Hwd4#5ERUXJU+cW9|&L$KJ>rS?pLW6<Yc-5Oq}bNMAb_
znN;@bjWP{*w4VbSTSUg*u9~b~soJz8jT4c|qp9vXQlw#KhQJp{xVk;)4f;lZU<`Zn
z{>bbN&0g;VixDJtaU*1GE98;yA-8S$QiSTmB4?#*%T#z%UB*sVJ%+;dxk2Qa>P`Br
z3|B5-v4sWGHP(#657tp4e$ZVlC0U#rho*}lWl_Gs`omLi2IU`>amvpVE6MoTV<Qx+
z3(gWh5*kWG9+X^sJIV$Ks`zy>@=~_8wU4D2wuNbtg3Rz%h#-cpsfC;Bw2=QUoMM#D
ztYo~sSqp0Kgyu-YN^_DbJZ_i#^zBPm&Oz0<o;zk&B5_vbz4F$3H|Fopb~M5sa-my`
z@D7JcDgDW0!b<d&I!)4ArKmQEW=h8<krpB%5(DI=;4Wwf9%{~#r<ssbJ(nK30Wd1a
zUudBe0xKLAv3sOZMzesNo<lCddc;g@ON2H7W-$ybh8K)1a4U+dA*A;9!x!Id{08{w
zdZvP~;QCq717+%-g`Ttx6}O=0az8s#0Nxu;#v|%p+YG+cX&O*OJcP2KB@+zOwX%-_
zng7yn2tfsG9nBSqTsGqMTWn-3c`!qxxe@0PxLYOfwHSwdMW%Q1Z~^E~*p_SM#?GAD
z9M%)`EnAx!qKQ>=(DrSPg~ELn`;jbqPx(MCk!IfI3lX%%sprbi)v^>qR(e@?1QOjk
zdNEW93I3+|hG4`S-+%ezF4#cVvuj4cx9BZr<;4G5QC^D8yOI=YZ^}(|INA$oU|TWP
zB=BvEulI>&BeuE4Xp<oWMwYY)_<xEZUp&$KE&~4BJM-(t+jnj@S|6Ex_xG=V{*Gz7
zo^P5F2ROQU+C^(R>uOa_wT{&(r969(^kSa*nMrcWOzD`|Z6gpkr}KhYG{GteP)Xxh
zdVV0XdZ>AM5Eu!U%c-s!ddXHgA)US(#$u6V@KPruwJQ>p8{t`%mWX`r{L34spZb84
z`aQZ&xAhsY>D6}PD|{|@{<>~rSf2mtL~SR)!ll!BbpOM(OQYIm!nW<MPWUxWl+Der
z&PskKx4$~;FUS1KCeTdV*7|Gr7jV!m(iR&*)3dEb)Sg>Um7Z0@X!^GCyDs)24(1KE
j*R{6sy-xXa)2$){v43{!!joio5r3D?cRGKNo;v>k&q9ud

literal 0
HcmV?d00001