Skip to content

Commit

Permalink
Remove .ipython dir and introduce kedro Jupyter kernel (#1355)
Browse files Browse the repository at this point in the history
  • Loading branch information
antonymilne authored Mar 30, 2022
1 parent cbef276 commit 8c899a9
Show file tree
Hide file tree
Showing 33 changed files with 361 additions and 923 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ include LICENSE.md
include requirements.txt
include test_requirements.txt
include kedro/config/logging.yml
include kedro/extras/extensions/*.png
recursive-include templates *

This file was deleted.

1 change: 0 additions & 1 deletion docs/source/api_docs/kedro.framework.cli.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ kedro.framework.cli.utils
find_stylesheets
forward_command
get_pkg_version
ipython_message
python_call
split_string

Expand Down
Binary file not shown.
Binary file added docs/source/meta/images/jupyter_new_notebook.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added docs/source/meta/images/jupyter_qtconsole.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/source/meta/images/jupyter_select_kernel.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
438 changes: 156 additions & 282 deletions docs/source/tools_integration/ipython.md

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/source/tutorial/tutorial_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ flake8>=3.7.9, <4.0 # Used for linting code with `kedro lint`
ipython==7.0 # Used for an IPython session with `kedro ipython`
isort~=5.0 # Used for linting code with `kedro lint`
jupyter~=1.0 # Used to open a Kedro-session in Jupyter Notebook & Lab
jupyter_client>=5.1.0, <8.0 # Used to open a Kedro-session in Jupyter Notebook & Lab
jupyterlab~=3.0 # Used to open a Kedro-session in Jupyter Lab
kedro==0.17.7
nbstripout~=0.4 # Strips the output of a Jupyter Notebook and writes the outputless version to the original file
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ flake8>=3.7.9, <4.0
ipython>=7.31.1, <8.0
isort~=5.0
jupyter~=1.0
jupyter_client>=5.1, <8.0
jupyterlab~=3.0
kedro[pandas.CSVDataSet]=={{ cookiecutter.kedro_version }}
kedro-telemetry~=0.1.0; python_version < '3.9'
Expand Down
Binary file added kedro/extras/extensions/logo-32x32.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added kedro/extras/extensions/logo-64x64.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
241 changes: 98 additions & 143 deletions kedro/framework/cli/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,79 +3,35 @@
"""
import json
import os
import re
import shutil
import sys
from collections import Counter
from glob import iglob
from pathlib import Path
from typing import Any, Dict, Iterable, List
from typing import Any, Dict
from warnings import warn

import click
from click import secho
from jupyter_client.kernelspec import NATIVE_KERNEL_NAME, KernelSpecManager
from traitlets import Unicode

from kedro.framework.cli.utils import (
KedroCliError,
_check_module_importable,
command_with_verbosity,
env_option,
forward_command,
ipython_message,
load_entry_points,
python_call,
)
from kedro.framework.project import validate_settings
from kedro.framework.startup import ProjectMetadata

JUPYTER_IP_HELP = "IP address of the Jupyter server."
JUPYTER_ALL_KERNELS_HELP = "Display all available Python kernels."
JUPYTER_IDLE_TIMEOUT_HELP = """When a notebook is closed, Jupyter server will
terminate its kernel after so many seconds of inactivity. This does not affect
any open notebooks."""

CONVERT_ALL_HELP = """Extract the nodes from all notebooks in the Kedro project directory,
including sub-folders."""

OVERWRITE_HELP = """If Python file already exists for the equivalent notebook,
overwrite its contents."""


def collect_line_magic():
"""Interface function for collecting line magic functions from plugin entry points."""
return load_entry_points("line_magic")


class SingleKernelSpecManager(KernelSpecManager):
"""A custom KernelSpec manager to be used by Kedro projects.
It limits the kernels to the default one only,
to make it less confusing for users, and gives it a sensible name.
"""

default_kernel_name = Unicode(
"Kedro", config=True, help="Alternative name for the default kernel"
)
whitelist = [NATIVE_KERNEL_NAME]

def get_kernel_spec(self, kernel_name):
"""
This function will only be called by Jupyter to get a KernelSpec
for the default kernel.
We replace the name by something sensible here.
"""
kernelspec = super().get_kernel_spec(kernel_name)

if kernel_name == NATIVE_KERNEL_NAME:
kernelspec.display_name = self.default_kernel_name

return kernelspec


def _update_ipython_dir(project_path: Path) -> None:
os.environ["IPYTHONDIR"] = str(project_path / ".ipython")


# pylint: disable=missing-function-docstring
@click.group(name="Kedro")
def jupyter_cli(): # pragma: no cover
Expand All @@ -90,87 +46,128 @@ def jupyter():


@forward_command(jupyter, "notebook", forward_help=True)
@click.option(
"--ip",
"ip_address",
type=str,
default="127.0.0.1",
help="IP address of the Jupyter server.",
)
@click.option(
"--all-kernels", is_flag=True, default=False, help=JUPYTER_ALL_KERNELS_HELP
)
@click.option("--idle-timeout", type=int, default=30, help=JUPYTER_IDLE_TIMEOUT_HELP)
@env_option
@click.pass_obj # this will pass the metadata as first argument
def jupyter_notebook(
metadata: ProjectMetadata,
ip_address,
all_kernels,
env,
idle_timeout,
args,
**kwargs,
): # pylint: disable=unused-argument,too-many-arguments
): # pylint: disable=unused-argument
"""Open Jupyter Notebook with project specific variables loaded."""
_check_module_importable("jupyter_core")

_check_module_importable("notebook")
validate_settings()

if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)

_update_ipython_dir(metadata.project_path)
arguments = _build_jupyter_command(
"notebook",
ip_address=ip_address,
all_kernels=all_kernels,
args=args,
idle_timeout=idle_timeout,
project_name=metadata.project_name,
)
kernel_name = f"kedro_{metadata.package_name}"
_create_kernel(kernel_name, f"Kedro ({metadata.package_name})")

python_call_kwargs = _build_jupyter_env(env)
python_call("jupyter", arguments, **python_call_kwargs)
if env:
os.environ["KEDRO_ENV"] = env

python_call(
"jupyter",
["notebook", f"--MultiKernelManager.default_kernel_name={kernel_name}"]
+ list(args),
)


@forward_command(jupyter, "lab", forward_help=True)
@click.option("--ip", "ip_address", type=str, default="127.0.0.1", help=JUPYTER_IP_HELP)
@click.option(
"--all-kernels", is_flag=True, default=False, help=JUPYTER_ALL_KERNELS_HELP
)
@click.option("--idle-timeout", type=int, default=30, help=JUPYTER_IDLE_TIMEOUT_HELP)
@env_option
@click.pass_obj # this will pass the metadata as first argument
def jupyter_lab(
metadata: ProjectMetadata,
ip_address,
all_kernels,
env,
idle_timeout,
args,
**kwargs,
): # pylint: disable=unused-argument,too-many-arguments
): # pylint: disable=unused-argument
"""Open Jupyter Lab with project specific variables loaded."""
_check_module_importable("jupyter_core")

_check_module_importable("jupyterlab")
validate_settings()

if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)

_update_ipython_dir(metadata.project_path)
arguments = _build_jupyter_command(
"lab",
ip_address=ip_address,
all_kernels=all_kernels,
args=args,
idle_timeout=idle_timeout,
project_name=metadata.project_name,
kernel_name = f"kedro_{metadata.package_name}"
_create_kernel(kernel_name, f"Kedro ({metadata.package_name})")

if env:
os.environ["KEDRO_ENV"] = env

python_call(
"jupyter",
["lab", f"--MultiKernelManager.default_kernel_name={kernel_name}"] + list(args),
)

python_call_kwargs = _build_jupyter_env(env)
python_call("jupyter", arguments, **python_call_kwargs)

def _create_kernel(kernel_name: str, display_name: str) -> None:
"""Creates an IPython kernel for the kedro project, if one does not already exist.
Installs the default IPython kernel (which points towards `sys.executable`)
and customises it to make the launch command load the kedro extension.
This is equivalent to the method recommended for creating a custom IPython kernel
on the CLI: https://ipython.readthedocs.io/en/stable/install/kernel_install.html.
On linux this creates a directory ~/.local/share/jupyter/kernels/{kernel_name}
containing kernel.json, logo-32x32.png and logo-64x64.png. An example kernel.json
looks as follows:
{
"argv": [
"/Users/antony_milne/miniconda3/envs/spaceflights/bin/python",
"-m",
"ipykernel_launcher",
"-f",
"{connection_file}",
"--ext",
"kedro.extras.extensions.ipython"
],
"display_name": "Kedro (spaceflights)",
"language": "python",
"metadata": {
"debugger": false
}
}
Args:
kernel_name: Name of the kernel to create.
display_name: Kernel name as it is displayed in the UI.
Raises:
KedroCliError: When kernel cannot be setup.
"""
# These packages are required by jupyter lab and notebook, which we have already
# checked are importable, so we don't run _check_module_importable on them.
# pylint: disable=import-outside-toplevel
from ipykernel.kernelspec import install
from jupyter_client.kernelspec import find_kernel_specs

try:
if kernel_name in find_kernel_specs():
secho(
f"Jupyter kernel {kernel_name} already exists and will be used.",
fg="green",
)
return

# Install with user=True rather than system-wide to minimise footprint and
# ensure that we have permissions to write there.
kernel_path = install(
user=True,
kernel_name=kernel_name,
display_name=display_name,
)

kernel_json = Path(kernel_path) / "kernel.json"
kernel_spec = json.loads(kernel_json.read_text(encoding="utf-8"))
kernel_spec["argv"].extend(["--ext", "kedro.extras.extensions.ipython"])
# indent=1 is to match the default ipykernel style (see
# ipykernel.write_kernel_spec).
kernel_json.write_text(json.dumps(kernel_spec, indent=1), encoding="utf-8")

kedro_extensions_dir = Path(__file__).parents[2] / "extras" / "extensions"
shutil.copy(kedro_extensions_dir / "logo-32x32.png", kernel_path)
shutil.copy(kedro_extensions_dir / "logo-64x64.png", kernel_path)
except Exception as exc:
raise KedroCliError(
f"Cannot setup kedro kernel for Jupyter.\nError: {exc}"
) from exc


@command_with_verbosity(jupyter, "convert")
Expand Down Expand Up @@ -200,8 +197,6 @@ def convert_notebook(
source_path = metadata.source_dir
package_name = metadata.package_name

_update_ipython_dir(project_path)

if not filepath and not all_flag:
secho(
"Please specify a notebook filepath "
Expand Down Expand Up @@ -247,46 +242,6 @@ def convert_notebook(
secho("Done!", color="green") # type: ignore


def _build_jupyter_command( # pylint: disable=too-many-arguments
base: str,
ip_address: str,
all_kernels: bool,
args: Iterable[str],
idle_timeout: int,
project_name: str = "Kedro",
) -> List[str]:
cmd = [
base,
"--ip",
ip_address,
f"--MappingKernelManager.cull_idle_timeout={idle_timeout}",
f"--MappingKernelManager.cull_interval={idle_timeout}",
]

if not all_kernels:
kernel_name = re.sub(r"[^\w]+", "", project_name).strip() or "Kedro"

cmd += [
"--NotebookApp.kernel_spec_manager_class="
"kedro.framework.cli.jupyter.SingleKernelSpecManager",
f"--KernelSpecManager.default_kernel_name='{kernel_name}'",
]

return cmd + list(args)


def _build_jupyter_env(kedro_env: str) -> Dict[str, Any]:
"""Build the environment dictionary that gets injected into the subprocess running
Jupyter. Since the subprocess has access only to the environment variables passed
in, we need to copy the current environment and add ``KEDRO_ENV``.
"""
if not kedro_env:
return {}
jupyter_env = os.environ.copy()
jupyter_env["KEDRO_ENV"] = kedro_env
return {"env": jupyter_env}


def _export_nodes(filepath: Path, output_path: Path) -> None:
"""Copy code from Jupyter cells into nodes in src/<package_name>/nodes/,
under filename with same name as notebook.
Expand Down
2 changes: 0 additions & 2 deletions kedro/framework/cli/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# pylint: disable=too-many-lines

"""A collection of CLI commands for working with Kedro pipelines."""
import re
import shutil
Expand Down
6 changes: 1 addition & 5 deletions kedro/framework/cli/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
command_with_verbosity,
env_option,
forward_command,
ipython_message,
python_call,
split_string,
)
Expand Down Expand Up @@ -124,12 +123,9 @@ def ipython(
"""Open IPython with project specific variables loaded."""
_check_module_importable("IPython")

os.environ["IPYTHONDIR"] = str(metadata.project_path / ".ipython")
if env:
os.environ["KEDRO_ENV"] = env
if "-h" not in args and "--help" not in args:
ipython_message()
call(["ipython"] + list(args))
call(["ipython", "--ext", "kedro.extras.extensions.ipython"] + list(args))


@project_group.command()
Expand Down
Loading

0 comments on commit 8c899a9

Please sign in to comment.