Ipynb reader/writer: better handling of cell metadata.

We now handle even complex cell metadata in the Div's attributes.
Simple metadata fields are rendered as a plain string, and complex ones
as JSON.
This commit is contained in:
John MacFarlane 2019-03-09 14:17:21 -08:00
parent bf4a164a6f
commit 873f342f11
5 changed files with 24 additions and 17 deletions

View file

@ -18,6 +18,7 @@ Ipynb (Jupyter notebook JSON format) reader for pandoc.
module Text.Pandoc.Readers.Ipynb ( readIpynb )
where
import Prelude
import Data.Char (isDigit)
import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe)
import Data.Digest.Pure.SHA (sha1, showDigest)
@ -39,6 +40,7 @@ import qualified Data.ByteString.Lazy as BL
import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readIpynb opts t = do
@ -217,10 +219,11 @@ jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue
valueToMetaValue Aeson.Null = MetaString ""
jsonMetaToPairs :: JSONMeta -> [(String, String)]
jsonMetaToPairs = M.toList . M.mapMaybe
(\case
MetaString s -> Just s
MetaBool True -> Just "true"
MetaBool False -> Just "false"
-- for now we skip complex cell metadata:
_ -> Nothing) . jsonMetaToMeta
jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
-> T.unpack t
x -> UTF8.toStringLazy $ Aeson.encode x)

View file

@ -213,10 +213,9 @@ extractData bs = do
pairsToJSONMeta :: [(String, String)] -> JSONMeta
pairsToJSONMeta kvs =
M.fromList [(T.pack k, case v of
"true" -> Bool True
"false" -> Bool False
_ -> case safeRead v of
Just n -> Number n
_ -> String (T.pack v))
| (k,v) <- kvs , k /= "execution_count" ]
M.fromList [(T.pack k, case Aeson.decode (UTF8.fromStringLazy v) of
Just val -> val
Nothing -> String (T.pack v))
| (k,v) <- kvs
, k /= "execution_count"
]

View file

@ -8,6 +8,6 @@ Pandoc (Meta {unMeta = fromList [("jupyter",MetaMap (fromList [("nbformat",MetaI
[Header 2 ("pyout",[],[]) [Str "Pyout"]]
,Div ("",["cell","code"],[])
[CodeBlock ("",["python"],[]) "from IPython.display import HTML\nHTML(\"\"\"\n<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\n\"\"\")"]
,Div ("",["cell","markdown"],[])
,Div ("",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
[Header 2 ("image",[],[]) [Str "Image"]
,Para [Str "This",Space,Str "image",Space,Image ("",[],[]) [Str "the",Space,Str "moon"] ("lalune.jpg",""),Space,Str "will",Space,Str "be",Space,Str "included",Space,Str "as",Space,Str "a",Space,Str "cell",SoftBreak,Str "attachment."]]]

View file

@ -45,7 +45,12 @@
},
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"tags": [
"foo",
"bar"
]
},
"source": [
"Image\n",
"-----\n",

View file

@ -8,6 +8,6 @@ Pandoc (Meta {unMeta = fromList [("jupyter",MetaMap (fromList [("nbformat",MetaS
[Header 2 ("pyout",[],[]) [Str "Pyout"]]
,Div ("",["cell","code"],[])
[CodeBlock ("",["python"],[]) "from IPython.display import HTML\nHTML(\"\"\"\n<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\n\"\"\")"]
,Div ("",["cell","markdown"],[])
,Div ("",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
[Header 2 ("image",[],[]) [Str "Image"]
,Para [Str "This",Space,Str "image",Space,Image ("",[],[]) [Str "the",Space,Str "moon"] ("lalune.jpg",""),Space,Str "will",Space,Str "be",Space,Str "included",Space,Str "as",Space,Str "a",Space,Str "cell",SoftBreak,Str "attachment."]]]