Ipynb reader & writer: properly handle cell "id".
This is passed through if it exists (in Nb4); otherwise the writer will add a random one so that cells all have an "id". Closes #7728.
This commit is contained in:
parent
72075423d0
commit
51142c6803
8 changed files with 85 additions and 42 deletions
|
@ -7,3 +7,8 @@ constraints: aeson >= 2.0.1.0
|
|||
-- type: git
|
||||
-- location: https://github.com/jgm/texmath.git
|
||||
-- tag: 674bcbaec03e5550f155623de6662953bd157625
|
||||
|
||||
source-repository-package
|
||||
type: git
|
||||
location: https://github.com/jgm/ipynb.git
|
||||
tag: 62f1a5180a61bc89c06982d8b869b1f254208699
|
||||
|
|
|
@ -475,7 +475,7 @@ library
|
|||
http-client >= 0.4.30 && < 0.8,
|
||||
http-client-tls >= 0.2.4 && < 0.4,
|
||||
http-types >= 0.8 && < 0.13,
|
||||
ipynb >= 0.1.0.2 && < 0.2,
|
||||
ipynb >= 0.2 && < 0.3,
|
||||
jira-wiki-markup >= 1.4 && < 1.5,
|
||||
lpeg >= 1.0.1 && < 1.1,
|
||||
mtl >= 2.2 && < 2.3,
|
||||
|
|
|
@ -77,7 +77,10 @@ cellToBlocks opts lang c = do
|
|||
let Source ts = cellSource c
|
||||
let source = mconcat ts
|
||||
let kvs = jsonMetaToPairs (cellMetadata c)
|
||||
let attachments = maybe mempty M.toList $ cellAttachments c
|
||||
let attachments = case cellAttachments c of
|
||||
Nothing -> mempty
|
||||
Just (MimeAttachments m) -> M.toList m
|
||||
let ident = fromMaybe mempty $ cellId c
|
||||
mapM_ addAttachment attachments
|
||||
case cellType c of
|
||||
Ipynb.Markdown -> do
|
||||
|
@ -86,12 +89,12 @@ cellToBlocks opts lang c = do
|
|||
else do
|
||||
Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
|
||||
return bs
|
||||
return $ B.divWith ("",["cell","markdown"],kvs)
|
||||
return $ B.divWith (ident,["cell","markdown"],kvs)
|
||||
$ B.fromList bs
|
||||
Ipynb.Heading lev -> do
|
||||
Pandoc _ bs <- readMarkdown opts
|
||||
(T.replicate lev "#" <> " " <> source)
|
||||
return $ B.divWith ("",["cell","markdown"],kvs)
|
||||
return $ B.divWith (ident,["cell","markdown"],kvs)
|
||||
$ B.fromList bs
|
||||
Ipynb.Raw -> do
|
||||
-- we use ipynb to indicate no format given (a wildcard in nbformat)
|
||||
|
@ -108,11 +111,12 @@ cellToBlocks opts lang c = do
|
|||
"text/restructuredtext" -> "rst"
|
||||
"text/asciidoc" -> "asciidoc"
|
||||
_ -> format
|
||||
return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
|
||||
return $ B.divWith (ident,["cell","raw"],kvs)
|
||||
$ B.rawBlock format' source
|
||||
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
|
||||
outputBlocks <- mconcat <$> mapM outputToBlock outputs
|
||||
let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
|
||||
return $ B.divWith ("",["cell","code"],kvs') $
|
||||
return $ B.divWith (ident,["cell","code"],kvs') $
|
||||
B.codeBlockWith ("",[lang],[]) source
|
||||
<> outputBlocks
|
||||
|
||||
|
@ -161,7 +165,7 @@ outputToBlock Err{ errName = ename,
|
|||
-- the output format.
|
||||
handleData :: PandocMonad m
|
||||
=> JSONMeta -> MimeBundle -> m B.Blocks
|
||||
handleData metadata (MimeBundle mb) =
|
||||
handleData (JSONMeta metadata) (MimeBundle mb) =
|
||||
mconcat <$> mapM dataBlock (M.toList mb)
|
||||
|
||||
where
|
||||
|
@ -209,7 +213,7 @@ handleData metadata (MimeBundle mb) =
|
|||
dataBlock _ = return mempty
|
||||
|
||||
jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
|
||||
jsonMetaToMeta = M.map valueToMetaValue
|
||||
jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m
|
||||
where
|
||||
valueToMetaValue :: Value -> MetaValue
|
||||
valueToMetaValue x@Object{} =
|
||||
|
@ -228,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue
|
|||
valueToMetaValue Aeson.Null = MetaString ""
|
||||
|
||||
jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
|
||||
jsonMetaToPairs = M.toList . M.map
|
||||
jsonMetaToPairs (JSONMeta m) = M.toList . M.map
|
||||
(\case
|
||||
String t
|
||||
| not (T.all isDigit t)
|
||||
, t /= "true"
|
||||
, t /= "false"
|
||||
-> t
|
||||
x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
|
||||
x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m
|
||||
|
|
|
@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as BL
|
|||
import Data.Aeson.Encode.Pretty (Config(..), defConfig,
|
||||
encodePretty', keyOrder, Indent(Spaces))
|
||||
import Text.DocLayout (literal)
|
||||
import Text.Pandoc.UUID (getRandomUUID)
|
||||
import Data.Char (isAscii, isAlphaNum)
|
||||
|
||||
writeIpynb :: PandocMonad m => WriterOptions -> Pandoc -> m Text
|
||||
writeIpynb opts d = do
|
||||
|
@ -79,7 +81,7 @@ pandocToNotebook opts (Pandoc meta blocks) = do
|
|||
let metadata = case fromJSON metadata' of
|
||||
Error _ -> mempty -- TODO warning here? shouldn't happen
|
||||
Success x -> x
|
||||
cells <- extractCells opts blocks
|
||||
cells <- extractCells nbformat opts blocks
|
||||
return $ Notebook{
|
||||
notebookMetadata = metadata
|
||||
, notebookFormat = nbformat
|
||||
|
@ -97,23 +99,26 @@ addAttachment (Image attr lab (src,tit))
|
|||
return $ Image attr lab ("attachment:" <> src, tit)
|
||||
addAttachment x = return x
|
||||
|
||||
extractCells :: PandocMonad m => WriterOptions -> [Block] -> m [Ipynb.Cell a]
|
||||
extractCells _ [] = return []
|
||||
extractCells opts (Div (_id,classes,kvs) xs : bs)
|
||||
extractCells :: PandocMonad m
|
||||
=> (Int, Int) -> WriterOptions -> [Block] -> m [Ipynb.Cell a]
|
||||
extractCells _ _ [] = return []
|
||||
extractCells nbformat opts (Div (ident,classes,kvs) xs : bs)
|
||||
| "cell" `elem` classes
|
||||
, "markdown" `elem` classes = do
|
||||
let meta = pairsToJSONMeta kvs
|
||||
(newdoc, attachments) <-
|
||||
runStateT (walkM addAttachment (Pandoc nullMeta xs)) mempty
|
||||
source <- writeMarkdown opts{ writerTemplate = Nothing } newdoc
|
||||
uuid <- uuidFrom nbformat ident
|
||||
(Ipynb.Cell{
|
||||
cellType = Markdown
|
||||
, cellId = uuid
|
||||
, cellSource = Source $ breakLines $ T.stripEnd source
|
||||
, cellMetadata = meta
|
||||
, cellAttachments = if M.null attachments
|
||||
then Nothing
|
||||
else Just attachments } :)
|
||||
<$> extractCells opts bs
|
||||
else Just $ MimeAttachments attachments } :)
|
||||
<$> extractCells nbformat opts bs
|
||||
| "cell" `elem` classes
|
||||
, "code" `elem` classes = do
|
||||
let (codeContent, rest) =
|
||||
|
@ -123,14 +128,16 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
|
|||
let meta = pairsToJSONMeta kvs
|
||||
outputs <- catMaybes <$> mapM blockToOutput rest
|
||||
let exeCount = lookup "execution_count" kvs >>= safeRead
|
||||
uuid <- uuidFrom nbformat ident
|
||||
(Ipynb.Cell{
|
||||
cellType = Ipynb.Code {
|
||||
codeExecutionCount = exeCount
|
||||
, codeOutputs = outputs
|
||||
}
|
||||
, cellId = uuid
|
||||
, cellSource = Source $ breakLines codeContent
|
||||
, cellMetadata = meta
|
||||
, cellAttachments = Nothing } :) <$> extractCells opts bs
|
||||
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
|
||||
| "cell" `elem` classes
|
||||
, "raw" `elem` classes =
|
||||
case consolidateAdjacentRawBlocks xs of
|
||||
|
@ -150,33 +157,54 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
|
|||
"rst" -> "text/restructuredtext"
|
||||
"asciidoc" -> "text/asciidoc"
|
||||
_ -> f
|
||||
uuid <- uuidFrom nbformat ident
|
||||
(Ipynb.Cell{
|
||||
cellType = Raw
|
||||
, cellId = uuid
|
||||
, cellSource = Source $ breakLines raw
|
||||
, cellMetadata = if format' == "ipynb" -- means no format given
|
||||
then mempty
|
||||
else M.insert "raw_mimetype"
|
||||
else JSONMeta $ M.insert "raw_mimetype"
|
||||
(Aeson.String format') mempty
|
||||
, cellAttachments = Nothing } :) <$> extractCells opts bs
|
||||
_ -> extractCells opts bs
|
||||
extractCells opts (CodeBlock (_id,classes,kvs) raw : bs)
|
||||
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
|
||||
_ -> extractCells nbformat opts bs
|
||||
extractCells nbformat opts (CodeBlock (ident,classes,kvs) raw : bs)
|
||||
| "code" `elem` classes = do
|
||||
let meta = pairsToJSONMeta kvs
|
||||
let exeCount = lookup "execution_count" kvs >>= safeRead
|
||||
uuid <- uuidFrom nbformat ident
|
||||
(Ipynb.Cell{
|
||||
cellType = Ipynb.Code {
|
||||
codeExecutionCount = exeCount
|
||||
, codeOutputs = []
|
||||
}
|
||||
, cellId = uuid
|
||||
, cellSource = Source $ breakLines raw
|
||||
, cellMetadata = meta
|
||||
, cellAttachments = Nothing } :) <$> extractCells opts bs
|
||||
extractCells opts (b:bs) = do
|
||||
, cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
|
||||
extractCells nbformat opts (b:bs) = do
|
||||
let isCodeOrDiv (CodeBlock (_,cl,_) _) = "code" `elem` cl
|
||||
isCodeOrDiv (Div (_,cl,_) _) = "cell" `elem` cl
|
||||
isCodeOrDiv _ = False
|
||||
let (mds, rest) = break isCodeOrDiv bs
|
||||
extractCells opts (Div ("",["cell","markdown"],[]) (b:mds) : rest)
|
||||
extractCells nbformat opts
|
||||
(Div ("",["cell","markdown"],[]) (b:mds) : rest)
|
||||
|
||||
-- Return Nothing if nbformat < 4.5.
|
||||
-- Otherwise construct a UUID, using the existing identifier
|
||||
-- if it is a valid UUID, otherwise constructing a new one.
|
||||
uuidFrom :: PandocMonad m => (Int, Int) -> Text -> m (Maybe Text)
|
||||
uuidFrom nbformat ident =
|
||||
if nbformat >= (4,5)
|
||||
then
|
||||
if isValidUUID ident
|
||||
then return $ Just ident
|
||||
else Just . T.pack . drop 9 . show <$> getRandomUUID
|
||||
else return Nothing
|
||||
where
|
||||
isValidUUID t = not (T.null t) && T.length t <= 64 &&
|
||||
T.all isValidUUIDChar t
|
||||
isValidUUIDChar c = isAscii c && (isAlphaNum c || c == '-' || c == '_')
|
||||
|
||||
blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a))
|
||||
blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) =
|
||||
|
@ -229,7 +257,7 @@ extractData bs = do
|
|||
go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b)
|
||||
|
||||
pairsToJSONMeta :: [(Text, Text)] -> JSONMeta
|
||||
pairsToJSONMeta kvs =
|
||||
pairsToJSONMeta kvs = JSONMeta $
|
||||
M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of
|
||||
Just val -> val
|
||||
Nothing -> String v)
|
||||
|
|
|
@ -32,10 +32,11 @@ extra-deps:
|
|||
- commonmark-extensions-0.2.2
|
||||
- citeproc-0.6
|
||||
- aeson-pretty-0.8.9
|
||||
- ipynb-0.1.0.2
|
||||
- texmath-0.12.3.3
|
||||
- unicode-transforms-0.4.0
|
||||
- unicode-data-0.2.0
|
||||
- git: https://github.com/jgm/ipynb.git
|
||||
commit: 62f1a5180a61bc89c06982d8b869b1f254208699
|
||||
ghc-options:
|
||||
"$locals": -fhide-source-paths -Wno-missing-home-modules
|
||||
resolver: lts-18.10
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
Pandoc (Meta {unMeta = fromList [("jupyter",MetaMap (fromList [("nbformat",MetaInlines [Str "4"]),("nbformat_minor",MetaInlines [Str "5"])]))]})
|
||||
[Div ("",["cell","markdown"],[])
|
||||
[Div ("uid1",["cell","markdown"],[])
|
||||
[Header 1 ("lorem-ipsum",[],[]) [Str "Lorem",Space,Str "ipsum"]
|
||||
,Para [Strong [Str "Lorem",Space,Str "ipsum"],Space,Str "dolor",Space,Str "sit",Space,Str "amet,",Space,Str "consectetur",Space,Str "adipiscing",Space,Str "elit.",Space,Str "Nunc",Space,Str "luctus",SoftBreak,Str "bibendum",Space,Str "felis",Space,Str "dictum",Space,Str "sodales."]]
|
||||
,Div ("",["cell","code"],[])
|
||||
,Div ("uid2",["cell","code"],[])
|
||||
[CodeBlock ("",["python"],[]) "print(\"hello\")"]
|
||||
,Div ("",["cell","markdown"],[])
|
||||
,Div ("uid3",["cell","markdown"],[])
|
||||
[Header 2 ("pyout",[],[]) [Str "Pyout"]]
|
||||
,Div ("",["cell","code"],[("execution_count","2")])
|
||||
,Div ("uid4",["cell","code"],[("execution_count","2")])
|
||||
[CodeBlock ("",["python"],[]) "from IPython.display import HTML\nHTML(\"\"\"\n<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\n\"\"\")"
|
||||
,Div ("",["output","execute_result"],[("execution_count","2")])
|
||||
,Div ("uid5",["output","execute_result"],[("execution_count","2")])
|
||||
[RawBlock (Format "html") "<script>\nconsole.log(\"hello\");\n</script>\n<b>HTML</b>\nhello"]]
|
||||
,Div ("",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
|
||||
,Div ("uid6",["cell","markdown"],[("tags","[\"foo\",\"bar\"]")])
|
||||
[Header 2 ("image",[],[]) [Str "Image"]
|
||||
,Para [Str "This",Space,Str "image",Space,Image ("",[],[]) [Str "the",Space,Str "moon"] ("lalune.jpg",""),Space,Str "will",Space,Str "be",Space,Str "included",Space,Str "as",Space,Str "a",Space,Str "cell",SoftBreak,Str "attachment."]]]
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -12,7 +12,7 @@ Pandoc
|
|||
]
|
||||
}
|
||||
[ Div
|
||||
( "" , [ "cell" , "markdown" ] , [] )
|
||||
( "uid1" , [ "cell" , "markdown" ] , [] )
|
||||
[ Header
|
||||
1
|
||||
( "lorem-ipsum" , [] , [] )
|
||||
|
@ -46,13 +46,13 @@ Pandoc
|
|||
]
|
||||
]
|
||||
, Div
|
||||
( "" , [ "cell" , "code" ] , [] )
|
||||
( "uid2" , [ "cell" , "code" ] , [] )
|
||||
[ CodeBlock ( "" , [ "python" ] , [] ) "print(\"hello\")" ]
|
||||
, Div
|
||||
( "" , [ "cell" , "markdown" ] , [] )
|
||||
( "uid3" , [ "cell" , "markdown" ] , [] )
|
||||
[ Header 2 ( "pyout" , [] , [] ) [ Str "Pyout" ] ]
|
||||
, Div
|
||||
( ""
|
||||
( "uid4"
|
||||
, [ "cell" , "code" ]
|
||||
, [ ( "execution_count" , "2" ) ]
|
||||
)
|
||||
|
@ -70,7 +70,7 @@ Pandoc
|
|||
]
|
||||
]
|
||||
, Div
|
||||
( ""
|
||||
( "uid6"
|
||||
, [ "cell" , "markdown" ]
|
||||
, [ ( "tags" , "[\"foo\",\"bar\"]" ) ]
|
||||
)
|
||||
|
|
Loading…
Add table
Reference in a new issue