Ipynb reader & writer: properly handle cell "id".

This is passed through if it exists (in Nb4); otherwise
the writer will add a random one so that cells all have
an "id".

Closes #7728.
This commit is contained in:
John MacFarlane 2021-12-06 23:39:08 -08:00
parent 72075423d0
commit 51142c6803
8 changed files with 85 additions and 42 deletions

View file

@ -7,3 +7,8 @@ constraints: aeson >= 2.0.1.0
-- type: git
-- location: https://github.com/jgm/texmath.git
-- tag: 674bcbaec03e5550f155623de6662953bd157625
source-repository-package
type: git
location: https://github.com/jgm/ipynb.git
tag: 62f1a5180a61bc89c06982d8b869b1f254208699

View file

@ -475,7 +475,7 @@ library
http-client >= 0.4.30 && < 0.8,
http-client-tls >= 0.2.4 && < 0.4,
http-types >= 0.8 && < 0.13,
ipynb >= 0.1.0.2 && < 0.2,
ipynb >= 0.2 && < 0.3,
jira-wiki-markup >= 1.4 && < 1.5,
lpeg >= 1.0.1 && < 1.1,
mtl >= 2.2 && < 2.3,

View file

@ -77,7 +77,10 @@ cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
let kvs = jsonMetaToPairs (cellMetadata c)
let attachments = maybe mempty M.toList $ cellAttachments c
let attachments = case cellAttachments c of
Nothing -> mempty
Just (MimeAttachments m) -> M.toList m
let ident = fromMaybe mempty $ cellId c
mapM_ addAttachment attachments
case cellType c of
Ipynb.Markdown -> do
@ -86,12 +89,12 @@ cellToBlocks opts lang c = do
else do
Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
return bs
return $ B.divWith ("",["cell","markdown"],kvs)
return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Heading lev -> do
Pandoc _ bs <- readMarkdown opts
(T.replicate lev "#" <> " " <> source)
return $ B.divWith ("",["cell","markdown"],kvs)
return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Raw -> do
-- we use ipynb to indicate no format given (a wildcard in nbformat)
@ -108,11 +111,12 @@ cellToBlocks opts lang c = do
"text/restructuredtext" -> "rst"
"text/asciidoc" -> "asciidoc"
_ -> format
return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
return $ B.divWith (ident,["cell","raw"],kvs)
$ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
return $ B.divWith ("",["cell","code"],kvs') $
return $ B.divWith (ident,["cell","code"],kvs') $
B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
@ -161,7 +165,7 @@ outputToBlock Err{ errName = ename,
-- the output format.
handleData :: PandocMonad m
=> JSONMeta -> MimeBundle -> m B.Blocks
handleData metadata (MimeBundle mb) =
handleData (JSONMeta metadata) (MimeBundle mb) =
mconcat <$> mapM dataBlock (M.toList mb)
where
@ -209,7 +213,7 @@ handleData metadata (MimeBundle mb) =
dataBlock _ = return mempty
jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
jsonMetaToMeta = M.map valueToMetaValue
jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@Object{} =
@ -228,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue
valueToMetaValue Aeson.Null = MetaString ""
jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
jsonMetaToPairs = M.toList . M.map
jsonMetaToPairs (JSONMeta m) = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
-> t
x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m

View file

@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as BL
import Data.Aeson.Encode.Pretty (Config(..), defConfig,
encodePretty', keyOrder, Indent(Spaces))
import Text.DocLayout (literal)
import Text.Pandoc.UUID (getRandomUUID)
import Data.Char (isAscii, isAlphaNum)
writeIpynb :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeIpynb opts d = do
@ -79,7 +81,7 @@ pandocToNotebook opts (Pandoc meta blocks) = do
let metadata = case fromJSON metadata' of
Error _ -> mempty -- TODO warning here? shouldn't happen
Success x -> x
cells <- extractCells opts blocks
cells <- extractCells nbformat opts blocks
return $ Notebook{
notebookMetadata = metadata
, notebookFormat = nbformat
@ -97,23 +99,26 @@ addAttachment (Image attr lab (src,tit))
return $ Image attr lab ("attachment:" <> src, tit)
addAttachment x = return x
extractCells :: PandocMonad m => WriterOptions -> [Block] -> m [Ipynb.Cell a]
extractCells _ [] = return []
extractCells opts (Div (_id,classes,kvs) xs : bs)
extractCells :: PandocMonad m
=> (Int, Int) -> WriterOptions -> [Block] -> m [Ipynb.Cell a]
extractCells _ _ [] = return []
extractCells nbformat opts (Div (ident,classes,kvs) xs : bs)
| "cell" `elem` classes
, "markdown" `elem` classes = do
let meta = pairsToJSONMeta kvs
(newdoc, attachments) <-
runStateT (walkM addAttachment (Pandoc nullMeta xs)) mempty
source <- writeMarkdown opts{ writerTemplate = Nothing } newdoc
uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Markdown
, cellId = uuid
, cellSource = Source $ breakLines $ T.stripEnd source
, cellMetadata = meta
, cellAttachments = if M.null attachments
then Nothing
else Just attachments } :)
<$> extractCells opts bs
else Just $ MimeAttachments attachments } :)
<$> extractCells nbformat opts bs
| "cell" `elem` classes
, "code" `elem` classes = do
let (codeContent, rest) =
@ -123,14 +128,16 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
let meta = pairsToJSONMeta kvs
outputs <- catMaybes <$> mapM blockToOutput rest
let exeCount = lookup "execution_count" kvs >>= safeRead
uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Ipynb.Code {
codeExecutionCount = exeCount
, codeOutputs = outputs
}
, cellId = uuid
, cellSource = Source $ breakLines codeContent
, cellMetadata = meta
, cellAttachments = Nothing } :) <$> extractCells opts bs
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
| "cell" `elem` classes
, "raw" `elem` classes =
case consolidateAdjacentRawBlocks xs of
@ -150,33 +157,54 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
"rst" -> "text/restructuredtext"
"asciidoc" -> "text/asciidoc"
_ -> f
uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Raw
, cellId = uuid
, cellSource = Source $ breakLines raw
, cellMetadata = if format' == "ipynb" -- means no format given
then mempty
else M.insert "raw_mimetype"
else JSONMeta $ M.insert "raw_mimetype"
(Aeson.String format') mempty
, cellAttachments = Nothing } :) <$> extractCells opts bs
_ -> extractCells opts bs
extractCells opts (CodeBlock (_id,classes,kvs) raw : bs)
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
_ -> extractCells nbformat opts bs
extractCells nbformat opts (CodeBlock (ident,classes,kvs) raw : bs)
| "code" `elem` classes = do
let meta = pairsToJSONMeta kvs
let exeCount = lookup "execution_count" kvs >>= safeRead
uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Ipynb.Code {
codeExecutionCount = exeCount
, codeOutputs = []
}
, cellId = uuid
, cellSource = Source $ breakLines raw
, cellMetadata = meta
, cellAttachments = Nothing } :) <$> extractCells opts bs
extractCells opts (b:bs) = do
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
extractCells nbformat opts (b:bs) = do
let isCodeOrDiv (CodeBlock (_,cl,_) _) = "code" `elem` cl
isCodeOrDiv (Div (_,cl,_) _) = "cell" `elem` cl
isCodeOrDiv _ = False
let (mds, rest) = break isCodeOrDiv bs
extractCells opts (Div ("",["cell","markdown"],[]) (b:mds) : rest)
extractCells nbformat opts
(Div ("",["cell","markdown"],[]) (b:mds) : rest)
-- Return Nothing if nbformat < 4.5.
-- Otherwise construct a UUID, using the existing identifier
-- if it is a valid UUID, otherwise constructing a new one.
uuidFrom :: PandocMonad m => (Int, Int) -> Text -> m (Maybe Text)
uuidFrom nbformat ident =
if nbformat >= (4,5)
then
if isValidUUID ident
then return $ Just ident
else Just . T.pack . drop 9 . show <$> getRandomUUID
else return Nothing
where
isValidUUID t = not (T.null t) && T.length t <= 64 &&
T.all isValidUUIDChar t
isValidUUIDChar c = isAscii c && (isAlphaNum c || c == '-' || c == '_')
blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a))
blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) =
@ -229,7 +257,7 @@ extractData bs = do
go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b)
pairsToJSONMeta :: [(Text, Text)] -> JSONMeta
pairsToJSONMeta kvs =
pairsToJSONMeta kvs = JSONMeta $
M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of
Just val -> val
Nothing -> String v)

View file

@ -32,10 +32,11 @@ extra-deps:
- commonmark-extensions-0.2.2
- citeproc-0.6
- aeson-pretty-0.8.9
- ipynb-0.1.0.2
- texmath-0.12.3.3
- unicode-transforms-0.4.0
- unicode-data-0.2.0
- git: https://github.com/jgm/ipynb.git
commit: 62f1a5180a61bc89c06982d8b869b1f254208699
ghc-options:
"$locals": -fhide-source-paths -Wno-missing-home-modules
resolver: lts-18.10

View file

@ -1,15 +1,15 @@
Pandoc (Meta {unMeta = fromList [("jupyter",MetaMap (fromList [("nbformat",MetaInlines [Str "4"]),("nbformat_minor",MetaInlines [Str "5"])]))]})
[Div ("",["cell","markdown"],[])
[Div ("uid1",["cell","markdown"],[])
[Header 1 ("lorem-ipsum",[],[]) [Str "Lorem",Space,Str "ipsum"]
,Para [Strong [Str "Lorem",Space,Str "ipsum"],Space,Str "dolor",Space,Str "sit",Space,Str "amet,",Space,Str "consectetur",Space,Str "adipiscing",Space,Str "elit.",Space,Str "Nunc",Space,Str "luctus",SoftBreak,Str "bibendum",Space,Str "felis",Space,Str "dictum",Space,Str "sodales."]]
,Div ("",["cell","code"],[])
,Div ("uid2",["cell","code"],[])
[CodeBlock ("",["python"],[]) "print(\"hello\")"]
,Div ("",["cell","markdown"],[])
,Div ("uid3",["cell","markdown"],[])
[Header 2 ("pyout",[],[]) [Str "Pyout"]]
,Div ("",["cell","code"],[("execution_count","2")])
,Div ("uid4",["cell","code"],[("execution_count","2")])
[CodeBlock ("",["python"],[]) "from IPython.display import HTML\nHTML(\"\"\"\n<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\n\"\"\")"
,Div ("",["output","execute_result"],[("execution_count","2")])
,Div ("uid5",["output","execute_result"],[("execution_count","2")])
[RawBlock (Format "html") "<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\nhello"]]
,Div ("",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
,Div ("uid6",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
[Header 2 ("image",[],[]) [Str "Image"]
,Para [Str "This",Space,Str "image",Space,Image ("",[],[]) [Str "the",Space,Str "moon"] ("lalune.jpg",""),Space,Str "will",Space,Str "be",Space,Str "included",Space,Str "as",Space,Str "a",Space,Str "cell",SoftBreak,Str "attachment."]]]

File diff suppressed because one or more lines are too long

View file

@ -12,7 +12,7 @@ Pandoc
]
}
[ Div
( "" , [ "cell" , "markdown" ] , [] )
( "uid1" , [ "cell" , "markdown" ] , [] )
[ Header
1
( "lorem-ipsum" , [] , [] )
@ -46,13 +46,13 @@ Pandoc
]
]
, Div
( "" , [ "cell" , "code" ] , [] )
( "uid2" , [ "cell" , "code" ] , [] )
[ CodeBlock ( "" , [ "python" ] , [] ) "print(\"hello\")" ]
, Div
( "" , [ "cell" , "markdown" ] , [] )
( "uid3" , [ "cell" , "markdown" ] , [] )
[ Header 2 ( "pyout" , [] , [] ) [ Str "Pyout" ] ]
, Div
( ""
( "uid4"
, [ "cell" , "code" ]
, [ ( "execution_count" , "2" ) ]
)
@ -70,7 +70,7 @@ Pandoc
]
]
, Div
( ""
( "uid6"
, [ "cell" , "markdown" ]
, [ ( "tags" , "[\"foo\",\"bar\"]" ) ]
)