feat: export cell output to ipynb file (#40)

benlubas · Nov 18, 2023 · ef9cb41 · ef9cb41
1 parent 820463d
commit ef9cb41
Show file tree

Hide file tree

Showing 9 changed files with 220 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -112,6 +112,7 @@ kernel
 | `MoltenRestart`           | `[!] [kernel]`        | Shuts down a restarts the kernel. Deletes all outputs if used with a bang |
 | `MoltenSave`              | `[path] [kernel]`     | Save the current cells and evaluated outputs into a JSON file. When path is specified, save the file to `path`, otherwise save to `g:molten_save_path`. _currently only saves one kernel per file_ |
 | `MoltenLoad`              | `["shared"] [path]`   | Loads cell locations and output from a JSON file generated by `MoltenSave`. path functions the same as `MoltenSave`. If `shared` is specified, the buffer shares an already running kernel. |
+| `MoltenExportOutput`      | `[!] [path] [kernel]` | Export outputs from the current buffer and kernel to a jupyter notebook `.ipynb` at the given path. [read more](./docs/Advanced-Functionality.md) |
 
 ## Keybindings
 

diff --git a/docs/Advanced-Functionality.md b/docs/Advanced-Functionality.md
@@ -26,9 +26,11 @@ connect to that kernel. You can then run code on this kernel like normal. When y
 kernel will remain running.
 
 You can also start the server with
+
 ```bash
 jupyter console --kernel=python3 -f /tmp/your_path_here.json
 ```
+
 in order to avoid having to copy paste the file path. But this requires jupyter-console to be
 installed.
 
@@ -40,15 +42,65 @@ installed.
 It's also possible to use this method to connect to remove jupyter kernels.
 
 On the remote machine run:
+
 ```bash
 jupyter console --kernel julia-1.7 --ip 1.2.3.4 -f /tmp/remote-julia.json
 ```
+
 Again, you can also use `jupyter kernel --kernel=<kernel_name>` but the file path will be a lot
 longer
 
 Locally run:
+
 ```bash
 scp 1.2.3.4:/tmp/remote-julia.json /tmp/remote-julia.json
 ```
 
 And finally run `:MoltenInit /tmp/remote-julia.json` in neovim.
+
+## Exporting Outputs
+
+> [!NOTE]
+> This command is considered experimental, and while it works well enough to be used. There are
+> likely still bugs. So if you find them, don't hesitate to create an issue.
+
+With the `:MoltenExportOutput` command, you can export cell outputs to a Jupyter Notebook (`.ipynb` file).
+**This does not create the notebook.**
+
+This command is intended for use with tools like Quarto, or Jupytext, which convert notebooks to
+plaintext, but it's implemented in such a way that the plaintext file format shouldn't matter, as
+long as the code contents of the cells matches up.
+
+### Usage
+
+`:MoltenExportOutput` will create a copy of the notebook, prepended with "copy-of-", while
+`:MoltenExportOutput!` will overwrite the existing notebook (with an identical one that just has new
+outputs). Existing outputs are deleted.
+
+You can specify a file path as the first argument. By default, Molten looks for an existing notebook
+with the same name in the same spot. For example: `/path/to/file.md` exports to
+`/path/to/file.ipynb` by default. If you call `:MoltenExportOutput! /some/other/path/other_file.ipynb`
+then Molten will add outputs to `/some/other/path/other_file.ipynb`.
+
+If there are multiple kernels attached to the buffer when the command is called, you will be
+prompted for which kernel's outputs to export. There is nothing stopping you from exporting outputs
+from multiple kernels to the same notebook if you would like. That might be confusing, so it's not
+the default behavior.
+
+### Bailing
+
+The export will bail if there is a Molten cell with output that doesn't have a corresponding cell in
+the notebook. **Cells are searched for in order.**
+
+If your export is failing, it's probably b/c your notebook and plaintext representation got out of
+sync with each other.
+
+### Shortcomings
+
+#### cell matching
+Cells are matched by code content (comments are ignored). As a result, **if you have two or more
+code cells that have the same code content, and only the second one has output, molten will export
+that output to the first cell in the notebook**.
+
+To avoid this, just don't create cells that are identical. If you must, just execute both before
+exporting, they will be correctly lined up.
diff --git a/flake.nix b/flake.nix
@@ -25,6 +25,7 @@
               cairosvg
               plotly
               matplotlib
+              nbformat
               svgwrite
               sympy
               tqdm

diff --git a/lua/remove_comments.lua b/lua/remove_comments.lua
@@ -0,0 +1,31 @@
+local M = {}
+
+
+---remove comments from the given string of code using treesitter
+---@param str string code to remove comments from
+---@param lang string language of the code
+---@return string
+M.remove_comments = function(str, lang)
+  local parser = vim.treesitter.get_string_parser(str, lang)
+  local tree = parser:parse()
+  local root = tree[1]:root()
+  -- create comment query
+  local query = vim.treesitter.query.parse(lang, [[((comment) @c (#offset! @c 0 0 0 -1))]])
+  -- split content lines
+  local lines = vim.split(str, '\n')
+  -- iterate over query match metadata
+  for _, _, metadata in query:iter_matches(root, str, root:start(), root:end_()) do
+    local region = metadata[1].range
+    local line = region[1] + 1
+    local col_start = region[2]
+    -- remove comment by extracting the text before
+    lines[line] = string.sub(lines[line], 1, col_start)
+  end
+  -- remove blank lines
+  lines = vim.tbl_filter(function(line) return line ~= '' end, lines)
+  -- join lines
+  local result = vim.fn.join(lines, '\n')
+  return result
+end
+
+return M
diff --git a/rplugin/python3/molten/__init__.py b/rplugin/python3/molten/__init__.py
@@ -8,7 +8,8 @@
 from molten.code_cell import CodeCell
 from molten.images import Canvas, get_canvas_given_provider
 from molten.info_window import create_info_window
-from molten.io import MoltenIOError, get_default_save_file, load, save
+from molten.ipynb import export_outputs, get_default_export_file
+from molten.save_load import MoltenIOError, get_default_save_file, load, save
 from molten.moltenbuffer import MoltenKernel
 from molten.options import MoltenOptions
 from molten.outputbuffer import OutputBuffer
@@ -611,7 +612,7 @@ def command_interrupt(self, args) -> None:
 
     @pynvim.command("MoltenRestart", nargs="*", sync=True, bang=True)  # type: ignore
     @nvimui  # type: ignore
-    def command_restart(self, bang: bool, args) -> None:
+    def command_restart(self, args, bang) -> None:
         if len(args) > 0:
             kernel = args[0]
         else:
@@ -676,6 +677,30 @@ def command_hide_output(self) -> None:
 
         self._update_interface()
 
+    @pynvim.command("MoltenExportOutput", nargs="*", sync=True, bang=True)  # type: ignore
+    @nvimui  # type: ignore
+    def command_export(self, args, bang: bool) -> None:
+        self._initialize_if_necessary()
+
+        buf = self.nvim.current.buffer
+        if len(args) > 0:
+            path = args[0]
+        else:
+            path = get_default_export_file(self.nvim, buf)
+
+        if len(args) > 1:
+            kernel = args[1]
+        else:
+            self.kernel_check(f"MoltenExportOutput{'!' if bang else ''}", path, buf, kernel_last=True)
+            return
+
+        kernels = self._get_current_buf_kernels(True)
+        assert kernels is not None
+        for molten in kernels:
+            if molten.kernel_id == kernel:
+                export_outputs(self.nvim, molten, path, bang)
+                break
+
     @pynvim.command("MoltenSave", nargs="*", sync=True)  # type: ignore
     @nvimui  # type: ignore
     def command_save(self, args) -> None:

diff --git a/rplugin/python3/molten/ipynb.py b/rplugin/python3/molten/ipynb.py
@@ -0,0 +1,93 @@
+from pynvim.api import Buffer, Nvim
+from molten.code_cell import CodeCell
+from molten.moltenbuffer import MoltenKernel
+import os
+import nbformat
+
+from molten.utils import MoltenException, notify_error, notify_info, notify_warn
+
+NOTEBOOK_VERSION = 4
+
+
+def get_default_export_file(nvim: Nvim, buffer: Buffer) -> str:
+    # WARN: this is string containment checking, not array containment checking.
+    if "nofile" in buffer.options["buftype"]:
+        raise MoltenException("Buffer does not correspond to a file")
+
+    file_name = nvim.funcs.expand("%")
+    cwd = nvim.funcs.getcwd()
+    full_path = os.path.join(cwd, file_name)
+    return f"{os.path.splitext(full_path)[0]}.ipynb"
+
+
+def export_outputs(nvim: Nvim, kernel: MoltenKernel, filepath: str, overwrite: bool):
+    """Export outputs of the current file/kernel to a .ipynb file with the given name."""
+
+    if not filepath.endswith(".ipynb"):
+        filepath += ".ipynb"
+
+    if not os.path.exists(filepath):
+        notify_warn(nvim, f"Cannot export to file: {filepath} because it does not exist.")
+        return
+
+    nb = nbformat.read(filepath, as_version=NOTEBOOK_VERSION)
+
+    molten_cells = sorted(kernel.outputs.items(), key=lambda x: x[0])
+
+    if len(molten_cells) == 0:
+        notify_warn(nvim, "No cell outputs to export")
+        return
+
+    nb_cells = list(filter(lambda x: x["cell_type"] == "code", nb["cells"]))
+    nb_index = 0
+    lang = kernel.runtime.kernel_manager.kernel_spec.language  # type: ignore
+    for mcell in molten_cells:
+        matched = False
+        while nb_index < len(nb_cells):
+            code_cell, output = mcell
+            nb_cell = nb_cells[nb_index]
+            nb_index += 1
+
+            if compare_contents(nvim, nb_cell, code_cell, lang):
+                matched = True
+                outputs = [
+                    nbformat.v4.new_output(
+                        chunk.output_type,
+                        chunk.jupyter_data,
+                        **chunk.extras,
+                    )
+                    if chunk.jupyter_metadata is None
+                    else nbformat.v4.new_output(
+                        chunk.output_type,
+                        chunk.jupyter_data,
+                        metadata=chunk.jupyter_metadata,
+                        **chunk.extras,
+                    )
+                    for chunk in output.output.chunks
+                ]
+                nb_cell["outputs"] = outputs
+                break  # break out of the while loop
+
+        if not matched:
+            notify_error(
+                nvim,
+                f"No cell matching cell at line: {mcell[0].begin.lineno + 1} in notebook: {filepath}. Bailing.",
+            )
+            return
+
+    if overwrite:
+        write_to = filepath
+    else:
+        head, tail = os.path.split(filepath)
+        write_to = f"{head}/copy-of-{tail}"
+
+    notify_info(nvim, f"Exporting {len(molten_cells)} cell output(s) to {write_to}")
+    nbformat.write(nb, write_to)
+
+
+def compare_contents(nvim: Nvim, nb_cell, code_cell: CodeCell, lang: str) -> bool:
+    molten_contents = code_cell.get_text(nvim)
+    nvim.exec_lua("_remove_comments = require('remove_comments').remove_comments")
+    clean_nb = nvim.lua._remove_comments(nb_cell["source"], lang)
+    clean_molten = nvim.lua._remove_comments(molten_contents + "\n", lang)
+    return clean_nb == clean_molten
diff --git a/rplugin/python3/molten/outputchunks.py b/rplugin/python3/molten/outputchunks.py
@@ -23,6 +23,9 @@
 class OutputChunk(ABC):
     jupyter_data: Optional[Dict[str, Any]] = None
     jupyter_metadata: Optional[Dict[str, Any]] = None
+    # extra keys that are used to write data to jupyter notebook files (ie. for error outputs)
+    extras: Dict[str, Any] = {}
+    output_type: str
 
     @abstractmethod
     def place(
@@ -52,9 +55,7 @@ class TextOutputChunk(OutputChunk):
 
     def __init__(self, text: str):
         self.text = text
-
-    def _cleanup_text(self, text: str) -> str:
-        return clean_up_text(text)
+        self.output_type = "display_data"
 
     def place(
         self,
@@ -65,7 +66,7 @@ def place(
         _canvas: Canvas,
         hard_wrap: bool,
     ) -> Tuple[str, int]:
-        text = self._cleanup_text(self.text)
+        text = clean_up_text(self.text)
         extra_lines = 0
         if options.wrap_output:  # count the number of extra lines this will need when wrapped
             win_width = shape[2]
@@ -113,6 +114,7 @@ def __init__(self, name: str, message: str, traceback: List[str]):
                 + traceback
             )
         )
+        self.output_type = "error"
 
 
 class AbortedOutputChunk(TextLnOutputChunk):
@@ -123,6 +125,7 @@ def __init__(self) -> None:
 class ImageOutputChunk(OutputChunk):
     def __init__(self, img_path: str):
         self.img_path = img_path
+        self.output_type = "display_data"
 
     def place(
         self,

diff --git a/rplugin/python3/molten/runtime.py b/rplugin/python3/molten/runtime.py
@@ -159,9 +159,10 @@ def copy_on_demand(content_ctor):
             return True
         elif message_type == "error":
             output.success = False
-            output.chunks.append(
-                ErrorOutputChunk(content["ename"], content["evalue"], content["traceback"])
-            )
+            chunk = ErrorOutputChunk(content["ename"], content["evalue"], content["traceback"])
+            chunk.extras = content
+            output.chunks.append(chunk)
+
             copy_on_demand(lambda: "\n\n".join(map(clean_up_text, content["traceback"])))
             return True
         elif message_type == "stream":

diff --git a/rplugin/python3/molten/io.py → rplugin/python3/molten/save_load.py b/rplugin/python3/molten/io.py → rplugin/python3/molten/save_load.py
@@ -42,7 +42,6 @@ def get_default_save_file(options: MoltenOptions, buffer: Buffer) -> str:
 def load(nvim: Nvim, moltenbuffer: MoltenKernel, nvim_buffer: Buffer, data: Dict[str, Any]) -> None:
     MoltenIOError.assert_has_key(data, "content_checksum", str)
 
-    # checksums are being calculated differently?
     if moltenbuffer._get_content_checksum() != data["content_checksum"]:
         raise MoltenIOError("Buffer contents' checksum does not match!")
 
@@ -107,12 +106,12 @@ def load(nvim: Nvim, moltenbuffer: MoltenKernel, nvim_buffer: Buffer, data: Dict
         moltenbuffer.outputs[span].output = output
 
 
-def save(moltenbuffer: MoltenKernel, nvim_buffer: int) -> Dict[str, Any]:
+def save(molten_kernel: MoltenKernel, nvim_buffer: int) -> Dict[str, Any]:
     """Save the current kernel state for the given buffer."""
     return {
         "version": 1,
-        "kernel": moltenbuffer.runtime.kernel_name,
-        "content_checksum": moltenbuffer._get_content_checksum(),
+        "kernel": molten_kernel.runtime.kernel_name,
+        "content_checksum": molten_kernel._get_content_checksum(),
         "cells": [
             {
                 "span": {
@@ -137,7 +136,7 @@ def save(moltenbuffer: MoltenKernel, nvim_buffer: int) -> Dict[str, Any]:
                     if chunk.jupyter_data is not None and chunk.jupyter_metadata is not None
                 ],
             }
-            for span, output in moltenbuffer.outputs.items()
+            for span, output in molten_kernel.outputs.items()
             if span.begin.bufno == nvim_buffer
         ],
     }