From 9aac0a56f7db98d0f241234f859f4219ff21b634 Mon Sep 17 00:00:00 2001 From: Haifeng Jin <5476582+haifeng-jin@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:42:17 +0000 Subject: [PATCH 1/4] update the benchmarks (#1816) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update the benchmarks * Update benchmarks.md --------- Co-authored-by: François Chollet --- templates/getting_started/benchmarks.md | 53 ++++++++++++++----------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/templates/getting_started/benchmarks.md b/templates/getting_started/benchmarks.md index 1bf07a152b..8da72a5d72 100644 --- a/templates/getting_started/benchmarks.md +++ b/templates/getting_started/benchmarks.md @@ -6,7 +6,7 @@ We benchmark the three backends of Keras 3 alongside native PyTorch implementations ([HuggingFace](https://huggingface.co/) and [Meta Research](https://github.com/facebookresearch/)) and alongside Keras 2 with TensorFlow. Find code and setup details for reproducing our results -[here](https://github.com/haifeng-jin/keras-benchmarks/tree/v0.0.4). +[here](https://github.com/haifeng-jin/keras-benchmarks/tree/v0.0.5). ## Models @@ -21,6 +21,12 @@ selections. | CV | SegmentAnything1 | StableDiffusion2 | | NLP | BERT3 | Gemma4, Mistral5 | +We are not measuring the best possible performance achievable by each framework, +but the out-of-the-box performance of common user workflows. +With this goal in mind, we choose the following +model implementations. They may be subject to further optimization, +but they should represent the most common way developers are using these models today. + We leveraged pre-existing implementations from KerasCV and KerasNLP for the Keras versions of the models. For native PyTorch, we opted for the most popular options online: @@ -75,18 +81,18 @@ better. | | Batch
size | Native
PyTorch
(eager) | Native
PyTorch
(compiled) | Keras 2
(TensorFlow) | Keras 3
(TensorFlow) | Keras 3
(JAX) | Keras 3
(PyTorch)
(eager) | Keras 3
(best) | |:---:|---:|---:|---:|---:|---:|---:|---:|---:| -| **SegmentAnything
(fit)** | 1 | 1,305.52 | 1,233.25 | 386.93 | **355.25** | 361.69 | 1,388.87 | **355.25** | -| **SegmentAnything
(predict)** | 4 | 1,573.48 | 1,476.87 | 1,859.27 | 438.50 | **376.34** | 1,720.96 | **376.34** | -| **Stable Diffusion
(fit)** | 8 | 481.48 | 396.64 | 1,023.21 | 392.24 | **391.21** | 823.44 | **391.21** | -| **Stable Diffusion
(predict)** | 13 | 783.43 | 759.05 | 649.71 | **616.04** | 627.27 | 1,337.17 | **616.04** | -| **BERT
(fit)** | 32 | 693.37 | 214.73 | 486.00 | **214.49** | 222.37 | 808.68 | **214.49** | -| **BERT
(predict)** | 256 | 1849.80 | 739.46 | 470.12 | 466.01 | **418.72** | 1,865.98 | **418.72** | -| **Gemma
(fit)** | 8 | 253.95 | 1,036.83 | NA | **232.52** | 273.67 | 525.15 | **232.52** | -| **Gemma
(generate)** | 32 | 2,759.18 | 2,735.18 | NA | 1,134.91 | **1,128.21** | 7,952.67* | **1,128.21** | -| **Gemma
(generate)** | 1 | 1,721.03 | 1,618.85 | NA | 758.57 | **703.46** | 7,649.40* | **703.46** | -| **Mistral
(fit)** | 8 | 217.56 | 1,225.66 | NA | **185.92** | 213.22 | 452.12 | **185.92** | -| **Mistral
(generate)** | 32 | 1,618.43 | 1,633.50 | NA | 966.06 | **957.25** | 10,932.59* | **957.25** | -| **Mistral
(generate)** | 1 | 1610.84 | 1,554.79 | NA | 743.28 | **679.30** | 11,054.67* | **679.30** | +| **SegmentAnything
(fit)** | 1 | 1,305.52 | 1,222.85 | 386.93 | **355.25** | 361.69 | 1,388.87 | **355.25** | +| **SegmentAnything
(predict)** | 4 | 1,573.48 | 1,463.85 | 1,859.27 | 438.50 | **376.34** | 1,720.96 | **376.34** | +| **Stable Diffusion
(fit)** | 8 | 481.48 | **385.58** | 1,023.21 | 392.24 | 391.21 | 823.44 | 391.21 | +| **Stable Diffusion
(predict)** | 13 | 783.43 | 770.05 | 649.71 | **616.04** | 627.27 | 1,337.17 | **616.04** | +| **BERT
(fit)** | 32 | 693.37 | **202.43** | 486.00 | 214.49 | 222.37 | 808.68 | 214.49 | +| **BERT
(predict)** | 256 | 1849.80 | 733.16 | 470.12 | 466.01 | **418.72** | 1,865.98 | **418.72** | +| **Gemma
(fit)** | 8 | 253.95 | **211.73** | NA | 232.52 | 273.67 | 525.15 | 232.52 | +| **Gemma
(generate)** | 32 | 2,745.18 | 2,735.18 | NA | 1,134.91 | **1,128.21** | 7,952.67* | **1,128.21** | +| **Gemma
(generate)** | 1 | 1,721.03 | 1,669.01 | NA | 758.57 | **703.46** | 7,649.40* | **703.46** | +| **Mistral
(fit)** | 8 | 217.56 | **176.04** | NA | 185.92 | 213.22 | 452.12 | 185.92 | +| **Mistral
(generate)** | 32 | 1,618.43 | 1,640.41 | NA | 966.06 | **957.25** | 10,932.59* | **957.25** | +| **Mistral
(generate)** | 1 | 1610.84 | 1,577.84 | NA | 743.28 | **679.30** | 11,054.67* | **679.30** | \* _LLM inference with the PyTorch backend is abnormally slow at this time because KerasNLP uses static sequence padding, unlike HuggingFace. This will be @@ -104,7 +110,7 @@ This underscores the value of framework optionality when chasing optimal performance. Keras 3 empowers you to seamlessly switch backends, ensuring you find the ideal match for your model. -### Key Finding 2: Keras 3 is consistently faster than the reference PyTorch implementations +### Key Finding 2: Keras 3 delivers best-in-class "out-of-the-box" performance The following figure compares the best-performing Keras 3 backend for each model with the corresponding reference native PyTorch implementation. We calculated @@ -112,15 +118,14 @@ the throughput (steps/ms) increase of Keras 3 over native PyTorch from Table 2. A 100% increase indicates Keras 3 is twice as fast, while 0% means both frameworks perform equally. -![Figure 1](https://i.imgur.com/03owEcn.png) +![Figure 1](https://i.imgur.com/vO7pxPf.png) **Figure 1**: Keras 3 speedup over PyTorch measured in throughput (steps/ms) -Keras 3 with the best-performing backend outperformed the reference native -PyTorch implementations for all the models. Notably, 5 out of 10 tasks -demonstrated speedups exceeding 50%, with a maximum speedup of 290%. - -### Key Finding 3: Keras 3 delivers best-in-class "out-of-the-box" performance +Keras 3 with the best-performing backend is slightly (1-9%) slower than the +referenced PyTorch implementation for the 4 out of the 10 tasks and faster for +the rest. Notably, for 5 out of 10 tasks, Keras demonstrated speedups exceeding +50%, with a maximum speedup of 290%. All Keras model implementations benchmarked here are plain implementations without any custom performance optimizations: they represent "out-of-the-box @@ -143,7 +148,7 @@ performance gap compared to Keras is wider than most other models. The takeaway here is that Keras offers exceptional out-of-the-box performance. You don't have to know all the tricks to make your model run faster. -### Key Finding 4: Keras 3 is faster than Keras 2 +### Key Finding 3: Keras 3 is faster than Keras 2 We also calculated the throughput (steps/ms) increase of Keras 3 (using its best-performing backend) over Keras 2 with TensorFlow from Table 1. Results are @@ -168,9 +173,9 @@ XLA compilation in certain use cases. Framework performance depends heavily on the specific model. Keras 3 empowers you to select the fastest framework for your task – an option almost always to -outperform both Keras 2 and reference PyTorch implementations. Importantly, -Keras 3 models deliver excellent out-of-the-box performance without requiring -complex, low-level optimizations. +be equally good or outperform both Keras 2 and reference PyTorch +implementations. Importantly, Keras 3 models deliver excellent out-of-the-box +performance without requiring complex, low-level optimizations. ## References From 3826d01ef39869a3f20655c09fcba2eeb8c60cf1 Mon Sep 17 00:00:00 2001 From: Haifeng Jin <5476582+haifeng-jin@users.noreply.github.com> Date: Wed, 3 Apr 2024 22:42:14 +0000 Subject: [PATCH 2/4] update benchmarks (#1819) --- templates/getting_started/benchmarks.md | 78 ++++++++++++++++--------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/templates/getting_started/benchmarks.md b/templates/getting_started/benchmarks.md index 8da72a5d72..53523789e9 100644 --- a/templates/getting_started/benchmarks.md +++ b/templates/getting_started/benchmarks.md @@ -38,18 +38,6 @@ options online: We'll refer to this group as "Native PyTorch" in contrast to Keras 3 with PyTorch backend. -We employed synthetic data for all benchmarks. We used `bfloat16` precision for -all LLM training and inferencing, and LoRA6 for all LLM training -(fine-tuning). Based on the recommendations of the PyTorch team, we used -`torch.compile(model, mode="reduce-overhead")` with native PyTorch -implementations to compile the models. - -To measure out-of-the-box performance, we use high-level APIs (e.g. `Trainer()` -from HuggingFace, plain PyTorch training loops and Keras `model.fit()`) with as -little configuration as possible. Note that this is quite different from -measuring an optimized implementation for a particular hardware/framework/model -combination. - ## Hardware All benchmarks are done with a single NVIDIA A100 GPU with 40GB of GPU memory on @@ -73,8 +61,19 @@ its Python overhead. For large language models (Gemma and Mistral), we also used the same batch size since they are the same model type with similar number of parameters (7B). We also benchmarked text generation with batch size equal to 1 since it is widely -requested by the users. +requested by the users. We used `bfloat16` precision for their training and +inferencing, and LoRA6 for their training (fine-tuning). + +To measure out-of-the-box performance, we try to use all default settings. +For example, use high-level APIs (e.g. `Trainer()` from HuggingFace, plain +PyTorch training loops and Keras `model.fit()`) with as little configuration as +possible. As a reference, we also included the compiled results for native +PyTorch in addition to the default settings (eager mode). +Note that this is quite different from measuring an optimized implementation for +a particular hardware/framework/model combination. Please refer to +[MLPerf](https://mlcommons.org/benchmarks/) for the best optimized results for +different frameworks. **Table 2**: Benchmarking results. The speed is measured in ms/step. Lower is better. @@ -110,22 +109,43 @@ This underscores the value of framework optionality when chasing optimal performance. Keras 3 empowers you to seamlessly switch backends, ensuring you find the ideal match for your model. -### Key Finding 2: Keras 3 delivers best-in-class "out-of-the-box" performance +### Key Finding 2: Keras 3 is faster than the reference PyTorch implementations + +Figure 1 compares the best-performing Keras 3 backend for each model +with the corresponding reference native PyTorch implementation in all default +settings to simulate common developer workflows. We calculated the throughput +(steps/ms) increase of Keras 3 over native PyTorch from Table 2. A 100% increase +indicates Keras 3 is twice as fast, while 0% means both frameworks perform +equally. Note that the native PyTorch results are in eager mode. + +![Figure 1](https://i.imgur.com/S3SLYaN.png) + +**Figure 1**: Keras 3 speedup over PyTorch (default settings) measured in throughput (steps/ms) + +Keras 3 with the best-performing backend outperformed the reference native +PyTorch implementations for all the models. Notably, 5 out of 10 tasks +demonstrated speedups exceeding 100%, with a maximum speedup of 340%. + +If you are more experienced with `torch.compile()`, you can refer to Figure 2. +We did a similar comparison with `torch.compile()` enabled for the native +PyTorch implementations. + +To enable `torch.compile()`, we refactored the training or inferencing process +into a single compiled function, or passed certain arguments to the `Trainer` +class. Based on the recommendations of the PyTorch team, we used +`torch.compile(model, mode="reduce-overhead")`. You are welcome to tweak our +code to explore more optimization techniques. -The following figure compares the best-performing Keras 3 backend for each model -with the corresponding reference native PyTorch implementation. We calculated -the throughput (steps/ms) increase of Keras 3 over native PyTorch from Table 2. -A 100% increase indicates Keras 3 is twice as fast, while 0% means both -frameworks perform equally. +![Figure 2](https://i.imgur.com/VBNsQA9.png) -![Figure 1](https://i.imgur.com/vO7pxPf.png) +**Figure 2**: Keras 3 speedup over PyTorch (compiled) measured in throughput (steps/ms) -**Figure 1**: Keras 3 speedup over PyTorch measured in throughput (steps/ms) +In Figure 2, Keras 3 with the best-performing backend is slightly (1-9%) slower +than the referenced PyTorch implementation for the 4 out of the 10 tasks and +faster for the rest, but still, for 5 out of 10 tasks, Keras demonstrated +speedups exceeding 50%, with a maximum speedup of 290%. -Keras 3 with the best-performing backend is slightly (1-9%) slower than the -referenced PyTorch implementation for the 4 out of the 10 tasks and faster for -the rest. Notably, for 5 out of 10 tasks, Keras demonstrated speedups exceeding -50%, with a maximum speedup of 290%. +### Key Finding 3: Keras 3 delivers best-in-class "out-of-the-box" performance All Keras model implementations benchmarked here are plain implementations without any custom performance optimizations: they represent "out-of-the-box @@ -148,7 +168,7 @@ performance gap compared to Keras is wider than most other models. The takeaway here is that Keras offers exceptional out-of-the-box performance. You don't have to know all the tricks to make your model run faster. -### Key Finding 3: Keras 3 is faster than Keras 2 +### Key Finding 4: Keras 3 is faster than Keras 2 We also calculated the throughput (steps/ms) increase of Keras 3 (using its best-performing backend) over Keras 2 with TensorFlow from Table 1. Results are @@ -173,9 +193,9 @@ XLA compilation in certain use cases. Framework performance depends heavily on the specific model. Keras 3 empowers you to select the fastest framework for your task – an option almost always to -be equally good or outperform both Keras 2 and reference PyTorch -implementations. Importantly, Keras 3 models deliver excellent out-of-the-box -performance without requiring complex, low-level optimizations. +outperform both Keras 2 and reference PyTorch implementations. Importantly, +Keras 3 models deliver excellent out-of-the-box performance without requiring +complex, low-level optimizations. ## References From 793682677df2e75280f4cd77e61882087cbe104a Mon Sep 17 00:00:00 2001 From: Sachin Prasad Date: Wed, 3 Apr 2024 15:42:41 -0700 Subject: [PATCH 3/4] remove keras_core .ipynb dir (#1818) --- .../keras_core/custom_train_step_in_jax.ipynb | 462 ---- .../custom_train_step_in_tensorflow.ipynb | 634 ------ .../custom_train_step_in_torch.ipynb | 666 ------ .../distributed_training_with_jax.ipynb | 341 --- ...distributed_training_with_tensorflow.ipynb | 367 --- .../distributed_training_with_torch.ipynb | 375 ---- guides/ipynb/keras_core/functional_api.ipynb | 1402 ------------ .../getting_started_with_keras_core.ipynb | 667 ------ ...ew_layers_and_models_via_subclassing.ipynb | 1006 --------- .../ipynb/keras_core/sequential_model.ipynb | 674 ------ .../keras_core/serialization_and_saving.ipynb | 1116 ---------- .../training_with_built_in_methods.ipynb | 1965 ----------------- .../ipynb/keras_core/transfer_learning.ipynb | 843 ------- .../understanding_masking_and_padding.ipynb | 601 ----- ...riting_a_custom_training_loop_in_jax.ipynb | 779 ------- ...a_custom_training_loop_in_tensorflow.ipynb | 806 ------- ...ting_a_custom_training_loop_in_torch.ipynb | 575 ----- .../writing_your_own_callbacks.ipynb | 590 ----- 18 files changed, 13869 deletions(-) delete mode 100644 guides/ipynb/keras_core/custom_train_step_in_jax.ipynb delete mode 100644 guides/ipynb/keras_core/custom_train_step_in_tensorflow.ipynb delete mode 100644 guides/ipynb/keras_core/custom_train_step_in_torch.ipynb delete mode 100644 guides/ipynb/keras_core/distributed_training_with_jax.ipynb delete mode 100644 guides/ipynb/keras_core/distributed_training_with_tensorflow.ipynb delete mode 100644 guides/ipynb/keras_core/distributed_training_with_torch.ipynb delete mode 100644 guides/ipynb/keras_core/functional_api.ipynb delete mode 100644 guides/ipynb/keras_core/getting_started_with_keras_core.ipynb delete mode 100644 guides/ipynb/keras_core/making_new_layers_and_models_via_subclassing.ipynb delete mode 100644 guides/ipynb/keras_core/sequential_model.ipynb delete mode 100644 guides/ipynb/keras_core/serialization_and_saving.ipynb delete mode 100644 guides/ipynb/keras_core/training_with_built_in_methods.ipynb delete mode 100644 guides/ipynb/keras_core/transfer_learning.ipynb delete mode 100644 guides/ipynb/keras_core/understanding_masking_and_padding.ipynb delete mode 100644 guides/ipynb/keras_core/writing_a_custom_training_loop_in_jax.ipynb delete mode 100644 guides/ipynb/keras_core/writing_a_custom_training_loop_in_tensorflow.ipynb delete mode 100644 guides/ipynb/keras_core/writing_a_custom_training_loop_in_torch.ipynb delete mode 100644 guides/ipynb/keras_core/writing_your_own_callbacks.ipynb diff --git a/guides/ipynb/keras_core/custom_train_step_in_jax.ipynb b/guides/ipynb/keras_core/custom_train_step_in_jax.ipynb deleted file mode 100644 index f42a7e6197..0000000000 --- a/guides/ipynb/keras_core/custom_train_step_in_jax.ipynb +++ /dev/null @@ -1,462 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Customizing what happens in `fit()` with JAX\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/06/27
\n", - "**Last modified:** 2023/06/27
\n", - "**Description:** Overriding the training step of the Model class with JAX." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "When you're doing supervised learning, you can use `fit()` and everything works\n", - "smoothly.\n", - "\n", - "When you need to take control of every little detail, you can write your own training\n", - "loop entirely from scratch.\n", - "\n", - "But what if you need a custom training algorithm, but you still want to benefit from\n", - "the convenient features of `fit()`, such as callbacks, built-in distribution support,\n", - "or step fusing?\n", - "\n", - "A core principle of Keras is **progressive disclosure of complexity**. You should\n", - "always be able to get into lower-level workflows in a gradual way. You shouldn't fall\n", - "off a cliff if the high-level functionality doesn't exactly match your use case. You\n", - "should be able to gain more control over the small details while retaining a\n", - "commensurate amount of high-level convenience.\n", - "\n", - "When you need to customize what `fit()` does, you should **override the training step\n", - "function of the `Model` class**. This is the function that is called by `fit()` for\n", - "every batch of data. You will then be able to call `fit()` as usual -- and it will be\n", - "running your own learning algorithm.\n", - "\n", - "Note that this pattern does not prevent you from building models with the Functional\n", - "API. You can do this whether you're building `Sequential` models, Functional API\n", - "models, or subclassed models.\n", - "\n", - "Let's see how that works." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# This guide can only be run with the JAX backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", - "\n", - "import jax\n", - "import keras\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first simple example\n", - "\n", - "Let's start from a simple example:\n", - "\n", - "- We create a new class that subclasses `keras.Model`.\n", - "- We implement a fully-stateless `compute_loss_and_updates()` method\n", - "to compute the loss as well as the updated values for the non-trainable\n", - "variables of the model. Internally, it calls `stateless_call()` and\n", - "the built-in `compute_loss()`.\n", - "- We implement a fully-stateless `train_step()` method to compute current\n", - "metric values (including the loss) as well as updated values for the\n", - "trainable variables, the optimizer variables, and the metric variables.\n", - "\n", - "Note that you can also take into account the `sample_weight` argument by:\n", - "\n", - "- Unpacking the data as `x, y, sample_weight = data`\n", - "- Passing `sample_weight` to `compute_loss()`\n", - "- Passing `sample_weight` alongside `y` and `y_pred`\n", - "to metrics in `stateless_update_state()`" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def compute_loss_and_updates(\n", - " self,\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " y,\n", - " training=False,\n", - " ):\n", - " y_pred, non_trainable_variables = self.stateless_call(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " training=training,\n", - " )\n", - " loss = self.compute_loss(x, y, y_pred)\n", - " return loss, (y_pred, non_trainable_variables)\n", - "\n", - " def train_step(self, state, data):\n", - " (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metrics_variables,\n", - " ) = state\n", - " x, y = data\n", - "\n", - " # Get the gradient function.\n", - " grad_fn = jax.value_and_grad(self.compute_loss_and_updates, has_aux=True)\n", - "\n", - " # Compute the gradients.\n", - " (loss, (y_pred, non_trainable_variables)), grads = grad_fn(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " y,\n", - " training=True,\n", - " )\n", - "\n", - " # Update trainable variables and optimizer variables.\n", - " (\n", - " trainable_variables,\n", - " optimizer_variables,\n", - " ) = self.optimizer.stateless_apply(\n", - " optimizer_variables, grads, trainable_variables\n", - " )\n", - "\n", - " # Update metrics.\n", - " new_metrics_vars = []\n", - " for metric in self.metrics:\n", - " this_metric_vars = metrics_variables[\n", - " len(new_metrics_vars) : len(new_metrics_vars) + len(metric.variables)\n", - " ]\n", - " if metric.name == \"loss\":\n", - " this_metric_vars = metric.stateless_update_state(this_metric_vars, loss)\n", - " else:\n", - " this_metric_vars = metric.stateless_update_state(\n", - " this_metric_vars, y, y_pred\n", - " )\n", - " logs = metric.stateless_result(this_metric_vars)\n", - " new_metrics_vars += this_metric_vars\n", - "\n", - " # Return metric logs and updated state variables.\n", - " state = (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " new_metrics_vars,\n", - " )\n", - " return logs, state\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's try this out:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Construct and compile an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Just use `fit` as usual\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=3)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Going lower-level\n", - "\n", - "Naturally, you could just skip passing a loss function in `compile()`, and instead do\n", - "everything *manually* in `train_step`. Likewise for metrics.\n", - "\n", - "Here's a lower-level example, that only uses `compile()` to configure the optimizer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def __init__(self, *args, **kwargs):\n", - " super().__init__(*args, **kwargs)\n", - " self.loss_tracker = keras.metrics.Mean(name=\"loss\")\n", - " self.mae_metric = keras.metrics.MeanAbsoluteError(name=\"mae\")\n", - " self.loss_fn = keras.losses.MeanSquaredError()\n", - "\n", - " def compute_loss_and_updates(\n", - " self,\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " y,\n", - " training=False,\n", - " ):\n", - " y_pred, non_trainable_variables = self.stateless_call(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " training=training,\n", - " )\n", - " loss = self.loss_fn(y, y_pred)\n", - " return loss, (y_pred, non_trainable_variables)\n", - "\n", - " def train_step(self, state, data):\n", - " (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metrics_variables,\n", - " ) = state\n", - " x, y = data\n", - "\n", - " # Get the gradient function.\n", - " grad_fn = jax.value_and_grad(self.compute_loss_and_updates, has_aux=True)\n", - "\n", - " # Compute the gradients.\n", - " (loss, (y_pred, non_trainable_variables)), grads = grad_fn(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " y,\n", - " training=True,\n", - " )\n", - "\n", - " # Update trainable variables and optimizer variables.\n", - " (\n", - " trainable_variables,\n", - " optimizer_variables,\n", - " ) = self.optimizer.stateless_apply(\n", - " optimizer_variables, grads, trainable_variables\n", - " )\n", - "\n", - " # Update metrics.\n", - " loss_tracker_vars = metrics_variables[: len(self.loss_tracker.variables)]\n", - " mae_metric_vars = metrics_variables[len(self.loss_tracker.variables) :]\n", - "\n", - " loss_tracker_vars = self.loss_tracker.stateless_update_state(\n", - " loss_tracker_vars, loss\n", - " )\n", - " mae_metric_vars = self.mae_metric.stateless_update_state(\n", - " mae_metric_vars, y, y_pred\n", - " )\n", - "\n", - " logs = {}\n", - " logs[self.loss_tracker.name] = self.loss_tracker.stateless_result(\n", - " loss_tracker_vars\n", - " )\n", - " logs[self.mae_metric.name] = self.mae_metric.stateless_result(mae_metric_vars)\n", - "\n", - " new_metrics_vars = loss_tracker_vars + mae_metric_vars\n", - "\n", - " # Return metric logs and updated state variables.\n", - " state = (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " new_metrics_vars,\n", - " )\n", - " return logs, state\n", - "\n", - " @property\n", - " def metrics(self):\n", - " # We list our `Metric` objects here so that `reset_states()` can be\n", - " # called automatically at the start of each epoch\n", - " # or at the start of `evaluate()`.\n", - " return [self.loss_tracker, self.mae_metric]\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "\n", - "# We don't passs a loss or metrics here.\n", - "model.compile(optimizer=\"adam\")\n", - "\n", - "# Just use `fit` as usual -- you can use callbacks, etc.\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=5)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Providing your own evaluation step\n", - "\n", - "What if you want to do the same for calls to `model.evaluate()`? Then you would\n", - "override `test_step` in exactly the same way. Here's what it looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def test_step(self, state, data):\n", - " # Unpack the data.\n", - " x, y = data\n", - " (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " metrics_variables,\n", - " ) = state\n", - "\n", - " # Compute predictions and loss.\n", - " y_pred, non_trainable_variables = self.stateless_call(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " x,\n", - " training=False,\n", - " )\n", - " loss = self.compute_loss(x, y, y_pred)\n", - "\n", - " # Update metrics.\n", - " new_metrics_vars = []\n", - " for metric in self.metrics:\n", - " this_metric_vars = metrics_variables[\n", - " len(new_metrics_vars) : len(new_metrics_vars) + len(metric.variables)\n", - " ]\n", - " if metric.name == \"loss\":\n", - " this_metric_vars = metric.stateless_update_state(this_metric_vars, loss)\n", - " else:\n", - " this_metric_vars = metric.stateless_update_state(\n", - " this_metric_vars, y, y_pred\n", - " )\n", - " logs = metric.stateless_result(this_metric_vars)\n", - " new_metrics_vars += this_metric_vars\n", - "\n", - " # Return metric logs and updated state variables.\n", - " state = (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " new_metrics_vars,\n", - " )\n", - " return logs, state\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Evaluate with our custom test_step\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.evaluate(x, y)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "custom_train_step_in_jax", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/custom_train_step_in_tensorflow.ipynb b/guides/ipynb/keras_core/custom_train_step_in_tensorflow.ipynb deleted file mode 100644 index 5022a22819..0000000000 --- a/guides/ipynb/keras_core/custom_train_step_in_tensorflow.ipynb +++ /dev/null @@ -1,634 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Customizing what happens in `fit()` with TensorFlow\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2020/04/15
\n", - "**Last modified:** 2023/06/27
\n", - "**Description:** Overriding the training step of the Model class with TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "When you're doing supervised learning, you can use `fit()` and everything works\n", - "smoothly.\n", - "\n", - "When you need to take control of every little detail, you can write your own training\n", - "loop entirely from scratch.\n", - "\n", - "But what if you need a custom training algorithm, but you still want to benefit from\n", - "the convenient features of `fit()`, such as callbacks, built-in distribution support,\n", - "or step fusing?\n", - "\n", - "A core principle of Keras is **progressive disclosure of complexity**. You should\n", - "always be able to get into lower-level workflows in a gradual way. You shouldn't fall\n", - "off a cliff if the high-level functionality doesn't exactly match your use case. You\n", - "should be able to gain more control over the small details while retaining a\n", - "commensurate amount of high-level convenience.\n", - "\n", - "When you need to customize what `fit()` does, you should **override the training step\n", - "function of the `Model` class**. This is the function that is called by `fit()` for\n", - "every batch of data. You will then be able to call `fit()` as usual -- and it will be\n", - "running your own learning algorithm.\n", - "\n", - "Note that this pattern does not prevent you from building models with the Functional\n", - "API. You can do this whether you're building `Sequential` models, Functional API\n", - "models, or subclassed models.\n", - "\n", - "Let's see how that works." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# This guide can only be run with the TF backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", - "\n", - "import tensorflow as tf\n", - "import keras\n", - "from keras import layers\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first simple example\n", - "\n", - "Let's start from a simple example:\n", - "\n", - "- We create a new class that subclasses `keras.Model`.\n", - "- We just override the method `train_step(self, data)`.\n", - "- We return a dictionary mapping metric names (including the loss) to their current\n", - "value.\n", - "\n", - "The input argument `data` is what gets passed to fit as training data:\n", - "\n", - "- If you pass NumPy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple\n", - "`(x, y)`\n", - "- If you pass a `tf.data.Dataset`, by calling `fit(dataset, ...)`, then `data` will be\n", - "what gets yielded by `dataset` at each batch.\n", - "\n", - "In the body of the `train_step()` method, we implement a regular training update,\n", - "similar to what you are already familiar with. Importantly, **we compute the loss via\n", - "`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to\n", - "`compile()`.\n", - "\n", - "Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`,\n", - "to update the state of the metrics that were passed in `compile()`,\n", - "and we query results from `self.metrics` at the end to retrieve their current value." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def train_step(self, data):\n", - " # Unpack the data. Its structure depends on your model and\n", - " # on what you pass to `fit()`.\n", - " x, y = data\n", - "\n", - " with tf.GradientTape() as tape:\n", - " y_pred = self(x, training=True) # Forward pass\n", - " # Compute the loss value\n", - " # (the loss function is configured in `compile()`)\n", - " loss = self.compute_loss(y=y, y_pred=y_pred)\n", - "\n", - " # Compute gradients\n", - " trainable_vars = self.trainable_variables\n", - " gradients = tape.gradient(loss, trainable_vars)\n", - "\n", - " # Update weights\n", - " self.optimizer.apply(gradients, trainable_vars)\n", - "\n", - " # Update metrics (includes the metric that tracks the loss)\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred)\n", - "\n", - " # Return a dict mapping metric names to current value\n", - " return {m.name: m.result() for m in self.metrics}\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's try this out:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Construct and compile an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Just use `fit` as usual\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Going lower-level\n", - "\n", - "Naturally, you could just skip passing a loss function in `compile()`, and instead do\n", - "everything *manually* in `train_step`. Likewise for metrics.\n", - "\n", - "Here's a lower-level example, that only uses `compile()` to configure the optimizer:\n", - "\n", - "- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`).\n", - "- We implement a custom `train_step()` that updates the state of these metrics\n", - "(by calling `update_state()` on them), then query them (via `result()`) to return their current average value,\n", - "to be displayed by the progress bar and to be pass to any callback.\n", - "- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise\n", - "calling `result()` would return an average since the start of training, whereas we usually work\n", - "with per-epoch averages. Thankfully, the framework can do that for us: just list any metric\n", - "you want to reset in the `metrics` property of the model. The model will call `reset_states()`\n", - "on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to\n", - "`evaluate()`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def __init__(self, *args, **kwargs):\n", - " super().__init__(*args, **kwargs)\n", - " self.loss_tracker = keras.metrics.Mean(name=\"loss\")\n", - " self.mae_metric = keras.metrics.MeanAbsoluteError(name=\"mae\")\n", - " self.loss_fn = keras.losses.MeanSquaredError()\n", - "\n", - " def train_step(self, data):\n", - " x, y = data\n", - "\n", - " with tf.GradientTape() as tape:\n", - " y_pred = self(x, training=True) # Forward pass\n", - " # Compute our own loss\n", - " loss = self.loss_fn(y, y_pred)\n", - "\n", - " # Compute gradients\n", - " trainable_vars = self.trainable_variables\n", - " gradients = tape.gradient(loss, trainable_vars)\n", - "\n", - " # Update weights\n", - " self.optimizer.apply(gradients, trainable_vars)\n", - "\n", - " # Compute our own metrics\n", - " self.loss_tracker.update_state(loss)\n", - " self.mae_metric.update_state(y, y_pred)\n", - " return {\n", - " \"loss\": self.loss_tracker.result(),\n", - " \"mae\": self.mae_metric.result(),\n", - " }\n", - "\n", - " @property\n", - " def metrics(self):\n", - " # We list our `Metric` objects here so that `reset_states()` can be\n", - " # called automatically at the start of each epoch\n", - " # or at the start of `evaluate()`.\n", - " return [self.loss_tracker, self.mae_metric]\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "\n", - "# We don't passs a loss or metrics here.\n", - "model.compile(optimizer=\"adam\")\n", - "\n", - "# Just use `fit` as usual -- you can use callbacks, etc.\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=5)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Supporting `sample_weight` & `class_weight`\n", - "\n", - "You may have noticed that our first basic example didn't make any mention of sample\n", - "weighting. If you want to support the `fit()` arguments `sample_weight` and\n", - "`class_weight`, you'd simply do the following:\n", - "\n", - "- Unpack `sample_weight` from the `data` argument\n", - "- Pass it to `compute_loss` & `update_state` (of course, you could also just apply\n", - "it manually if you don't rely on `compile()` for losses & metrics)\n", - "- That's it." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def train_step(self, data):\n", - " # Unpack the data. Its structure depends on your model and\n", - " # on what you pass to `fit()`.\n", - " if len(data) == 3:\n", - " x, y, sample_weight = data\n", - " else:\n", - " sample_weight = None\n", - " x, y = data\n", - "\n", - " with tf.GradientTape() as tape:\n", - " y_pred = self(x, training=True) # Forward pass\n", - " # Compute the loss value.\n", - " # The loss function is configured in `compile()`.\n", - " loss = self.compute_loss(\n", - " y=y,\n", - " y_pred=y_pred,\n", - " sample_weight=sample_weight,\n", - " )\n", - "\n", - " # Compute gradients\n", - " trainable_vars = self.trainable_variables\n", - " gradients = tape.gradient(loss, trainable_vars)\n", - "\n", - " # Update weights\n", - " self.optimizer.apply(gradients, trainable_vars)\n", - "\n", - " # Update the metrics.\n", - " # Metrics are configured in `compile()`.\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred, sample_weight=sample_weight)\n", - "\n", - " # Return a dict mapping metric names to current value.\n", - " # Note that it will include the loss (tracked in self.metrics).\n", - " return {m.name: m.result() for m in self.metrics}\n", - "\n", - "\n", - "# Construct and compile an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# You can now use sample_weight argument\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "sw = np.random.random((1000, 1))\n", - "model.fit(x, y, sample_weight=sw, epochs=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Providing your own evaluation step\n", - "\n", - "What if you want to do the same for calls to `model.evaluate()`? Then you would\n", - "override `test_step` in exactly the same way. Here's what it looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def test_step(self, data):\n", - " # Unpack the data\n", - " x, y = data\n", - " # Compute predictions\n", - " y_pred = self(x, training=False)\n", - " # Updates the metrics tracking the loss\n", - " loss = self.compute_loss(y=y, y_pred=y_pred)\n", - " # Update the metrics.\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred)\n", - " # Return a dict mapping metric names to current value.\n", - " # Note that it will include the loss (tracked in self.metrics).\n", - " return {m.name: m.result() for m in self.metrics}\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Evaluate with our custom test_step\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.evaluate(x, y)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Wrapping up: an end-to-end GAN example\n", - "\n", - "Let's walk through an end-to-end example that leverages everything you just learned.\n", - "\n", - "Let's consider:\n", - "\n", - "- A generator network meant to generate 28x28x1 images.\n", - "- A discriminator network meant to classify 28x28x1 images into two classes (\"fake\" and\n", - "\"real\").\n", - "- One optimizer for each.\n", - "- A loss function to train the discriminator." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Create the discriminator\n", - "discriminator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(28, 28, 1)),\n", - " layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.GlobalMaxPooling2D(),\n", - " layers.Dense(1),\n", - " ],\n", - " name=\"discriminator\",\n", - ")\n", - "\n", - "# Create the generator\n", - "latent_dim = 128\n", - "generator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(latent_dim,)),\n", - " # We want to generate 128 coefficients to reshape into a 7x7x128 map\n", - " layers.Dense(7 * 7 * 128),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Reshape((7, 7, 128)),\n", - " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", - " ],\n", - " name=\"generator\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's a feature-complete GAN class, overriding `compile()` to use its own signature,\n", - "and implementing the entire GAN algorithm in 17 lines in `train_step`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class GAN(keras.Model):\n", - " def __init__(self, discriminator, generator, latent_dim):\n", - " super().__init__()\n", - " self.discriminator = discriminator\n", - " self.generator = generator\n", - " self.latent_dim = latent_dim\n", - " self.d_loss_tracker = keras.metrics.Mean(name=\"d_loss\")\n", - " self.g_loss_tracker = keras.metrics.Mean(name=\"g_loss\")\n", - " self.seed_generator = keras.random.SeedGenerator(1337)\n", - "\n", - " @property\n", - " def metrics(self):\n", - " return [self.d_loss_tracker, self.g_loss_tracker]\n", - "\n", - " def compile(self, d_optimizer, g_optimizer, loss_fn):\n", - " super().compile()\n", - " self.d_optimizer = d_optimizer\n", - " self.g_optimizer = g_optimizer\n", - " self.loss_fn = loss_fn\n", - "\n", - " def train_step(self, real_images):\n", - " if isinstance(real_images, tuple):\n", - " real_images = real_images[0]\n", - " # Sample random points in the latent space\n", - " batch_size = tf.shape(real_images)[0]\n", - " random_latent_vectors = keras.random.normal(\n", - " shape=(batch_size, self.latent_dim), seed=self.seed_generator\n", - " )\n", - "\n", - " # Decode them to fake images\n", - " generated_images = self.generator(random_latent_vectors)\n", - "\n", - " # Combine them with real images\n", - " combined_images = tf.concat([generated_images, real_images], axis=0)\n", - "\n", - " # Assemble labels discriminating real from fake images\n", - " labels = tf.concat(\n", - " [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0\n", - " )\n", - " # Add random noise to the labels - important trick!\n", - " labels += 0.05 * keras.random.uniform(\n", - " tf.shape(labels), seed=self.seed_generator\n", - " )\n", - "\n", - " # Train the discriminator\n", - " with tf.GradientTape() as tape:\n", - " predictions = self.discriminator(combined_images)\n", - " d_loss = self.loss_fn(labels, predictions)\n", - " grads = tape.gradient(d_loss, self.discriminator.trainable_weights)\n", - " self.d_optimizer.apply(grads, self.discriminator.trainable_weights)\n", - "\n", - " # Sample random points in the latent space\n", - " random_latent_vectors = keras.random.normal(\n", - " shape=(batch_size, self.latent_dim), seed=self.seed_generator\n", - " )\n", - "\n", - " # Assemble labels that say \"all real images\"\n", - " misleading_labels = tf.zeros((batch_size, 1))\n", - "\n", - " # Train the generator (note that we should *not* update the weights\n", - " # of the discriminator)!\n", - " with tf.GradientTape() as tape:\n", - " predictions = self.discriminator(self.generator(random_latent_vectors))\n", - " g_loss = self.loss_fn(misleading_labels, predictions)\n", - " grads = tape.gradient(g_loss, self.generator.trainable_weights)\n", - " self.g_optimizer.apply(grads, self.generator.trainable_weights)\n", - "\n", - " # Update metrics and return their value.\n", - " self.d_loss_tracker.update_state(d_loss)\n", - " self.g_loss_tracker.update_state(g_loss)\n", - " return {\n", - " \"d_loss\": self.d_loss_tracker.result(),\n", - " \"g_loss\": self.g_loss_tracker.result(),\n", - " }\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's test-drive it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Prepare the dataset. We use both the training & test MNIST digits.\n", - "batch_size = 64\n", - "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n", - "all_digits = np.concatenate([x_train, x_test])\n", - "all_digits = all_digits.astype(\"float32\") / 255.0\n", - "all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n", - "dataset = tf.data.Dataset.from_tensor_slices(all_digits)\n", - "dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n", - "\n", - "gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)\n", - "gan.compile(\n", - " d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", - " g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", - " loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),\n", - ")\n", - "\n", - "# To limit the execution time, we only train on 100 batches. You can train on\n", - "# the entire dataset. You will need about 20 epochs to get nice results.\n", - "gan.fit(dataset.take(100), epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The ideas behind deep learning are simple, so why should their implementation be painful?" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "custom_train_step_in_tensorflow", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/custom_train_step_in_torch.ipynb b/guides/ipynb/keras_core/custom_train_step_in_torch.ipynb deleted file mode 100644 index 5a853f6de1..0000000000 --- a/guides/ipynb/keras_core/custom_train_step_in_torch.ipynb +++ /dev/null @@ -1,666 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Customizing what happens in `fit()` with PyTorch\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/06/27
\n", - "**Last modified:** 2023/06/27
\n", - "**Description:** Overriding the training step of the Model class with PyTorch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "When you're doing supervised learning, you can use `fit()` and everything works\n", - "smoothly.\n", - "\n", - "When you need to take control of every little detail, you can write your own training\n", - "loop entirely from scratch.\n", - "\n", - "But what if you need a custom training algorithm, but you still want to benefit from\n", - "the convenient features of `fit()`, such as callbacks, built-in distribution support,\n", - "or step fusing?\n", - "\n", - "A core principle of Keras is **progressive disclosure of complexity**. You should\n", - "always be able to get into lower-level workflows in a gradual way. You shouldn't fall\n", - "off a cliff if the high-level functionality doesn't exactly match your use case. You\n", - "should be able to gain more control over the small details while retaining a\n", - "commensurate amount of high-level convenience.\n", - "\n", - "When you need to customize what `fit()` does, you should **override the training step\n", - "function of the `Model` class**. This is the function that is called by `fit()` for\n", - "every batch of data. You will then be able to call `fit()` as usual -- and it will be\n", - "running your own learning algorithm.\n", - "\n", - "Note that this pattern does not prevent you from building models with the Functional\n", - "API. You can do this whether you're building `Sequential` models, Functional API\n", - "models, or subclassed models.\n", - "\n", - "Let's see how that works." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# This guide can only be run with the torch backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"torch\"\n", - "\n", - "import torch\n", - "import keras\n", - "from keras import layers\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first simple example\n", - "\n", - "Let's start from a simple example:\n", - "\n", - "- We create a new class that subclasses `keras.Model`.\n", - "- We just override the method `train_step(self, data)`.\n", - "- We return a dictionary mapping metric names (including the loss) to their current\n", - "value.\n", - "\n", - "The input argument `data` is what gets passed to fit as training data:\n", - "\n", - "- If you pass NumPy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple\n", - "`(x, y)`\n", - "- If you pass a `torch.utils.data.DataLoader` or a `tf.data.Dataset`,\n", - "by calling `fit(dataset, ...)`, then `data` will be what gets yielded\n", - "by `dataset` at each batch.\n", - "\n", - "In the body of the `train_step()` method, we implement a regular training update,\n", - "similar to what you are already familiar with. Importantly, **we compute the loss via\n", - "`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to\n", - "`compile()`.\n", - "\n", - "Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`,\n", - "to update the state of the metrics that were passed in `compile()`,\n", - "and we query results from `self.metrics` at the end to retrieve their current value." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def train_step(self, data):\n", - " # Unpack the data. Its structure depends on your model and\n", - " # on what you pass to `fit()`.\n", - " x, y = data\n", - "\n", - " # Call torch.nn.Module.zero_grad() to clear the leftover gradients\n", - " # for the weights from the previous train step.\n", - " self.zero_grad()\n", - "\n", - " # Compute loss\n", - " y_pred = self(x, training=True) # Forward pass\n", - " loss = self.compute_loss(y=y, y_pred=y_pred)\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - "\n", - " trainable_weights = [v for v in self.trainable_weights]\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " self.optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Update metrics (includes the metric that tracks the loss)\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred)\n", - "\n", - " # Return a dict mapping metric names to current value\n", - " # Note that it will include the loss (tracked in self.metrics).\n", - " return {m.name: m.result() for m in self.metrics}\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's try this out:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Construct and compile an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Just use `fit` as usual\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Going lower-level\n", - "\n", - "Naturally, you could just skip passing a loss function in `compile()`, and instead do\n", - "everything *manually* in `train_step`. Likewise for metrics.\n", - "\n", - "Here's a lower-level example, that only uses `compile()` to configure the optimizer:\n", - "\n", - "- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`).\n", - "- We implement a custom `train_step()` that updates the state of these metrics\n", - "(by calling `update_state()` on them), then query them (via `result()`) to return their current average value,\n", - "to be displayed by the progress bar and to be pass to any callback.\n", - "- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise\n", - "calling `result()` would return an average since the start of training, whereas we usually work\n", - "with per-epoch averages. Thankfully, the framework can do that for us: just list any metric\n", - "you want to reset in the `metrics` property of the model. The model will call `reset_states()`\n", - "on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to\n", - "`evaluate()`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def __init__(self, *args, **kwargs):\n", - " super().__init__(*args, **kwargs)\n", - " self.loss_tracker = keras.metrics.Mean(name=\"loss\")\n", - " self.mae_metric = keras.metrics.MeanAbsoluteError(name=\"mae\")\n", - " self.loss_fn = keras.losses.MeanSquaredError()\n", - "\n", - " def train_step(self, data):\n", - " x, y = data\n", - "\n", - " # Call torch.nn.Module.zero_grad() to clear the leftover gradients\n", - " # for the weights from the previous train step.\n", - " self.zero_grad()\n", - "\n", - " # Compute loss\n", - " y_pred = self(x, training=True) # Forward pass\n", - " loss = self.loss_fn(y, y_pred)\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - "\n", - " trainable_weights = [v for v in self.trainable_weights]\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " self.optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Compute our own metrics\n", - " self.loss_tracker.update_state(loss)\n", - " self.mae_metric.update_state(y, y_pred)\n", - " return {\n", - " \"loss\": self.loss_tracker.result(),\n", - " \"mae\": self.mae_metric.result(),\n", - " }\n", - "\n", - " @property\n", - " def metrics(self):\n", - " # We list our `Metric` objects here so that `reset_states()` can be\n", - " # called automatically at the start of each epoch\n", - " # or at the start of `evaluate()`.\n", - " return [self.loss_tracker, self.mae_metric]\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "\n", - "# We don't passs a loss or metrics here.\n", - "model.compile(optimizer=\"adam\")\n", - "\n", - "# Just use `fit` as usual -- you can use callbacks, etc.\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.fit(x, y, epochs=5)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Supporting `sample_weight` & `class_weight`\n", - "\n", - "You may have noticed that our first basic example didn't make any mention of sample\n", - "weighting. If you want to support the `fit()` arguments `sample_weight` and\n", - "`class_weight`, you'd simply do the following:\n", - "\n", - "- Unpack `sample_weight` from the `data` argument\n", - "- Pass it to `compute_loss` & `update_state` (of course, you could also just apply\n", - "it manually if you don't rely on `compile()` for losses & metrics)\n", - "- That's it." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def train_step(self, data):\n", - " # Unpack the data. Its structure depends on your model and\n", - " # on what you pass to `fit()`.\n", - " if len(data) == 3:\n", - " x, y, sample_weight = data\n", - " else:\n", - " sample_weight = None\n", - " x, y = data\n", - "\n", - " # Call torch.nn.Module.zero_grad() to clear the leftover gradients\n", - " # for the weights from the previous train step.\n", - " self.zero_grad()\n", - "\n", - " # Compute loss\n", - " y_pred = self(x, training=True) # Forward pass\n", - " loss = self.compute_loss(\n", - " y=y,\n", - " y_pred=y_pred,\n", - " sample_weight=sample_weight,\n", - " )\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - "\n", - " trainable_weights = [v for v in self.trainable_weights]\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " self.optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Update metrics (includes the metric that tracks the loss)\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred, sample_weight=sample_weight)\n", - "\n", - " # Return a dict mapping metric names to current value\n", - " # Note that it will include the loss (tracked in self.metrics).\n", - " return {m.name: m.result() for m in self.metrics}\n", - "\n", - "\n", - "# Construct and compile an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# You can now use sample_weight argument\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "sw = np.random.random((1000, 1))\n", - "model.fit(x, y, sample_weight=sw, epochs=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Providing your own evaluation step\n", - "\n", - "What if you want to do the same for calls to `model.evaluate()`? Then you would\n", - "override `test_step` in exactly the same way. Here's what it looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomModel(keras.Model):\n", - " def test_step(self, data):\n", - " # Unpack the data\n", - " x, y = data\n", - " # Compute predictions\n", - " y_pred = self(x, training=False)\n", - " # Updates the metrics tracking the loss\n", - " loss = self.compute_loss(y=y, y_pred=y_pred)\n", - " # Update the metrics.\n", - " for metric in self.metrics:\n", - " if metric.name == \"loss\":\n", - " metric.update_state(loss)\n", - " else:\n", - " metric.update_state(y, y_pred)\n", - " # Return a dict mapping metric names to current value.\n", - " # Note that it will include the loss (tracked in self.metrics).\n", - " return {m.name: m.result() for m in self.metrics}\n", - "\n", - "\n", - "# Construct an instance of CustomModel\n", - "inputs = keras.Input(shape=(32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = CustomModel(inputs, outputs)\n", - "model.compile(loss=\"mse\", metrics=[\"mae\"])\n", - "\n", - "# Evaluate with our custom test_step\n", - "x = np.random.random((1000, 32))\n", - "y = np.random.random((1000, 1))\n", - "model.evaluate(x, y)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Wrapping up: an end-to-end GAN example\n", - "\n", - "Let's walk through an end-to-end example that leverages everything you just learned.\n", - "\n", - "Let's consider:\n", - "\n", - "- A generator network meant to generate 28x28x1 images.\n", - "- A discriminator network meant to classify 28x28x1 images into two classes (\"fake\" and\n", - "\"real\").\n", - "- One optimizer for each.\n", - "- A loss function to train the discriminator." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Create the discriminator\n", - "discriminator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(28, 28, 1)),\n", - " layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.GlobalMaxPooling2D(),\n", - " layers.Dense(1),\n", - " ],\n", - " name=\"discriminator\",\n", - ")\n", - "\n", - "# Create the generator\n", - "latent_dim = 128\n", - "generator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(latent_dim,)),\n", - " # We want to generate 128 coefficients to reshape into a 7x7x128 map\n", - " layers.Dense(7 * 7 * 128),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Reshape((7, 7, 128)),\n", - " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " layers.LeakyReLU(negative_slope=0.2),\n", - " layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", - " ],\n", - " name=\"generator\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's a feature-complete GAN class, overriding `compile()` to use its own signature,\n", - "and implementing the entire GAN algorithm in 17 lines in `train_step`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class GAN(keras.Model):\n", - " def __init__(self, discriminator, generator, latent_dim):\n", - " super().__init__()\n", - " self.discriminator = discriminator\n", - " self.generator = generator\n", - " self.latent_dim = latent_dim\n", - " self.d_loss_tracker = keras.metrics.Mean(name=\"d_loss\")\n", - " self.g_loss_tracker = keras.metrics.Mean(name=\"g_loss\")\n", - " self.seed_generator = keras.random.SeedGenerator(1337)\n", - " self.built = True\n", - "\n", - " @property\n", - " def metrics(self):\n", - " return [self.d_loss_tracker, self.g_loss_tracker]\n", - "\n", - " def compile(self, d_optimizer, g_optimizer, loss_fn):\n", - " super().compile()\n", - " self.d_optimizer = d_optimizer\n", - " self.g_optimizer = g_optimizer\n", - " self.loss_fn = loss_fn\n", - "\n", - " def train_step(self, real_images):\n", - " device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", - " if isinstance(real_images, tuple):\n", - " real_images = real_images[0]\n", - " # Sample random points in the latent space\n", - " batch_size = real_images.shape[0]\n", - " random_latent_vectors = keras.random.normal(\n", - " shape=(batch_size, self.latent_dim), seed=self.seed_generator\n", - " )\n", - "\n", - " # Decode them to fake images\n", - " generated_images = self.generator(random_latent_vectors)\n", - "\n", - " # Combine them with real images\n", - " real_images = torch.tensor(real_images, device=device)\n", - " combined_images = torch.concat([generated_images, real_images], axis=0)\n", - "\n", - " # Assemble labels discriminating real from fake images\n", - " labels = torch.concat(\n", - " [\n", - " torch.ones((batch_size, 1), device=device),\n", - " torch.zeros((batch_size, 1), device=device),\n", - " ],\n", - " axis=0,\n", - " )\n", - " # Add random noise to the labels - important trick!\n", - " labels += 0.05 * keras.random.uniform(labels.shape, seed=self.seed_generator)\n", - "\n", - " # Train the discriminator\n", - " self.zero_grad()\n", - " predictions = self.discriminator(combined_images)\n", - " d_loss = self.loss_fn(labels, predictions)\n", - " d_loss.backward()\n", - " grads = [v.value.grad for v in self.discriminator.trainable_weights]\n", - " with torch.no_grad():\n", - " self.d_optimizer.apply(grads, self.discriminator.trainable_weights)\n", - "\n", - " # Sample random points in the latent space\n", - " random_latent_vectors = keras.random.normal(\n", - " shape=(batch_size, self.latent_dim), seed=self.seed_generator\n", - " )\n", - "\n", - " # Assemble labels that say \"all real images\"\n", - " misleading_labels = torch.zeros((batch_size, 1), device=device)\n", - "\n", - " # Train the generator (note that we should *not* update the weights\n", - " # of the discriminator)!\n", - " self.zero_grad()\n", - " predictions = self.discriminator(self.generator(random_latent_vectors))\n", - " g_loss = self.loss_fn(misleading_labels, predictions)\n", - " grads = g_loss.backward()\n", - " grads = [v.value.grad for v in self.generator.trainable_weights]\n", - " with torch.no_grad():\n", - " self.g_optimizer.apply(grads, self.generator.trainable_weights)\n", - "\n", - " # Update metrics and return their value.\n", - " self.d_loss_tracker.update_state(d_loss)\n", - " self.g_loss_tracker.update_state(g_loss)\n", - " return {\n", - " \"d_loss\": self.d_loss_tracker.result(),\n", - " \"g_loss\": self.g_loss_tracker.result(),\n", - " }\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's test-drive it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Prepare the dataset. We use both the training & test MNIST digits.\n", - "batch_size = 64\n", - "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n", - "all_digits = np.concatenate([x_train, x_test])\n", - "all_digits = all_digits.astype(\"float32\") / 255.0\n", - "all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n", - "\n", - "# Create a TensorDataset\n", - "dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(all_digits), torch.from_numpy(all_digits)\n", - ")\n", - "# Create a DataLoader\n", - "dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)\n", - "\n", - "gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)\n", - "gan.compile(\n", - " d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", - " g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", - " loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),\n", - ")\n", - "\n", - "gan.fit(dataloader, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The ideas behind deep learning are simple, so why should their implementation be painful?" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "custom_train_step_in_torch", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/guides/ipynb/keras_core/distributed_training_with_jax.ipynb b/guides/ipynb/keras_core/distributed_training_with_jax.ipynb deleted file mode 100644 index e1f2d2bdf8..0000000000 --- a/guides/ipynb/keras_core/distributed_training_with_jax.ipynb +++ /dev/null @@ -1,341 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Multi-GPU distributed training with JAX\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/07/11
\n", - "**Last modified:** 2023/07/11
\n", - "**Description:** Guide to multi-GPU/TPU training for Keras models with JAX." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "There are generally two ways to distribute computation across multiple devices:\n", - "\n", - "**Data parallelism**, where a single model gets replicated on multiple devices or\n", - "multiple machines. Each of them processes different batches of data, then they merge\n", - "their results. There exist many variants of this setup, that differ in how the different\n", - "model replicas merge results, in whether they stay in sync at every batch or whether they\n", - "are more loosely coupled, etc.\n", - "\n", - "**Model parallelism**, where different parts of a single model run on different devices,\n", - "processing a single batch of data together. This works best with models that have a\n", - "naturally-parallel architecture, such as models that feature multiple branches.\n", - "\n", - "This guide focuses on data parallelism, in particular **synchronous data parallelism**,\n", - "where the different replicas of the model stay in sync after each batch they process.\n", - "Synchronicity keeps the model convergence behavior identical to what you would see for\n", - "single-device training.\n", - "\n", - "Specifically, this guide teaches you how to use `jax.sharding` APIs to train Keras\n", - "models, with minimal changes to your code, on multiple GPUs or TPUS (typically 2 to 16)\n", - "installed on a single machine (single host, multi-device training). This is the\n", - "most common setup for researchers and small-scale industry workflows." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup\n", - "\n", - "Let's start by defining the function that creates the model that we will train,\n", - "and the function that creates the dataset we will train on (MNIST in this case)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", - "\n", - "import jax\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "import keras\n", - "\n", - "from jax.experimental import mesh_utils\n", - "from jax.sharding import Mesh\n", - "from jax.sharding import NamedSharding\n", - "from jax.sharding import PartitionSpec as P\n", - "\n", - "\n", - "def get_model():\n", - " # Make a simple convnet with batch normalization and dropout.\n", - " inputs = keras.Input(shape=(28, 28, 1))\n", - " x = keras.layers.Rescaling(1.0 / 255.0)(inputs)\n", - " x = keras.layers.Conv2D(filters=12, kernel_size=3, padding=\"same\", use_bias=False)(\n", - " x\n", - " )\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.Conv2D(\n", - " filters=24,\n", - " kernel_size=6,\n", - " use_bias=False,\n", - " strides=2,\n", - " )(x)\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.Conv2D(\n", - " filters=32,\n", - " kernel_size=6,\n", - " padding=\"same\",\n", - " strides=2,\n", - " name=\"large_k\",\n", - " )(x)\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.GlobalAveragePooling2D()(x)\n", - " x = keras.layers.Dense(256, activation=\"relu\")(x)\n", - " x = keras.layers.Dropout(0.5)(x)\n", - " outputs = keras.layers.Dense(10)(x)\n", - " model = keras.Model(inputs, outputs)\n", - " return model\n", - "\n", - "\n", - "def get_datasets():\n", - " # Load the data and split it between train and test sets\n", - " (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - " # Scale images to the [0, 1] range\n", - " x_train = x_train.astype(\"float32\")\n", - " x_test = x_test.astype(\"float32\")\n", - " # Make sure images have shape (28, 28, 1)\n", - " x_train = np.expand_dims(x_train, -1)\n", - " x_test = np.expand_dims(x_test, -1)\n", - " print(\"x_train shape:\", x_train.shape)\n", - " print(x_train.shape[0], \"train samples\")\n", - " print(x_test.shape[0], \"test samples\")\n", - "\n", - " # Create TF Datasets\n", - " train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - " eval_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", - " return train_data, eval_data\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Single-host, multi-device synchronous training\n", - "\n", - "In this setup, you have one machine with several GPUs or TPUs on it (typically 2 to 16).\n", - "Each device will run a copy of your model (called a **replica**). For simplicity, in\n", - "what follows, we'll assume we're dealing with 8 GPUs, at no loss of generality.\n", - "\n", - "**How it works**\n", - "\n", - "At each step of training:\n", - "\n", - "- The current batch of data (called **global batch**) is split into 8 different\n", - " sub-batches (called **local batches**). For instance, if the global batch has 512\n", - " samples, each of the 8 local batches will have 64 samples.\n", - "- Each of the 8 replicas independently processes a local batch: they run a forward pass,\n", - " then a backward pass, outputting the gradient of the weights with respect to the loss of\n", - " the model on the local batch.\n", - "- The weight updates originating from local gradients are efficiently merged across the 8\n", - " replicas. Because this is done at the end of every step, the replicas always stay in\n", - " sync.\n", - "\n", - "In practice, the process of synchronously updating the weights of the model replicas is\n", - "handled at the level of each individual weight variable. This is done through a using\n", - "a `jax.sharding.NamedSharding` that is configured to replicate the variables.\n", - "\n", - "**How to use it**\n", - "\n", - "To do single-host, multi-device synchronous training with a Keras model, you\n", - "would use the `jax.sharding` features. Here's how it works:\n", - "\n", - "- We first create a device mesh using `mesh_utils.create_device_mesh`.\n", - "- We use `jax.sharding.Mesh`, `jax.sharding.NamedSharding` and\n", - " `jax.sharding.PartitionSpec` to define how to partition JAX arrays.\n", - " - We specify that we want to replicate the model and optimizer variables\n", - " across all devices by using a spec with no axis.\n", - " - We specify that we want to shard the data across devices by using a spec\n", - " that splits along the batch dimension.\n", - "- We use `jax.device_put` to replicate the model and optimizer variables across\n", - " devices. This happens once at the beginning.\n", - "- In the training loop, for each batch that we process, we use `jax.device_put`\n", - " to split the batch across devices before invoking the train step.\n", - "\n", - "Here's the flow, where each step is split into its own utility function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Config\n", - "num_epochs = 2\n", - "batch_size = 64\n", - "\n", - "train_data, eval_data = get_datasets()\n", - "train_data = train_data.batch(batch_size, drop_remainder=True)\n", - "\n", - "model = get_model()\n", - "optimizer = keras.optimizers.Adam(1e-3)\n", - "loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Initialize all state with .build()\n", - "(one_batch, one_batch_labels) = next(iter(train_data))\n", - "model.build(one_batch)\n", - "optimizer.build(model.trainable_variables)\n", - "\n", - "\n", - "# This is the loss function that will be differentiated.\n", - "# Keras provides a pure functional forward pass: model.stateless_call\n", - "def compute_loss(trainable_variables, non_trainable_variables, x, y):\n", - " y_pred, updated_non_trainable_variables = model.stateless_call(\n", - " trainable_variables, non_trainable_variables, x\n", - " )\n", - " loss_value = loss(y, y_pred)\n", - " return loss_value, updated_non_trainable_variables\n", - "\n", - "\n", - "# Function to compute gradients\n", - "compute_gradients = jax.value_and_grad(compute_loss, has_aux=True)\n", - "\n", - "\n", - "# Training step, Keras provides a pure functional optimizer.stateless_apply\n", - "@jax.jit\n", - "def train_step(train_state, x, y):\n", - " trainable_variables, non_trainable_variables, optimizer_variables = train_state\n", - " (loss_value, non_trainable_variables), grads = compute_gradients(\n", - " trainable_variables, non_trainable_variables, x, y\n", - " )\n", - "\n", - " trainable_variables, optimizer_variables = optimizer.stateless_apply(\n", - " optimizer_variables, grads, trainable_variables\n", - " )\n", - "\n", - " return loss_value, (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " )\n", - "\n", - "\n", - "# Replicate the model and optimizer variable on all devices\n", - "def get_replicated_train_state(devices):\n", - " # All variables will be replicated on all devices\n", - " var_mesh = Mesh(devices, axis_names=(\"_\"))\n", - " # In NamedSharding, axes not mentioned are replicated (all axes here)\n", - " var_replication = NamedSharding(var_mesh, P())\n", - "\n", - " # Apply the distribution settings to the model variables\n", - " trainable_variables = jax.device_put(model.trainable_variables, var_replication)\n", - " non_trainable_variables = jax.device_put(\n", - " model.non_trainable_variables, var_replication\n", - " )\n", - " optimizer_variables = jax.device_put(optimizer.variables, var_replication)\n", - "\n", - " # Combine all state in a tuple\n", - " return (trainable_variables, non_trainable_variables, optimizer_variables)\n", - "\n", - "\n", - "num_devices = len(jax.local_devices())\n", - "print(f\"Running on {num_devices} devices: {jax.local_devices()}\")\n", - "devices = mesh_utils.create_device_mesh((num_devices,))\n", - "\n", - "# Data will be split along the batch axis\n", - "data_mesh = Mesh(devices, axis_names=(\"batch\",)) # naming axes of the mesh\n", - "data_sharding = NamedSharding(\n", - " data_mesh,\n", - " P(\n", - " \"batch\",\n", - " ),\n", - ") # naming axes of the sharded partition\n", - "\n", - "# Display data sharding\n", - "x, y = next(iter(train_data))\n", - "sharded_x = jax.device_put(x.numpy(), data_sharding)\n", - "print(\"Data sharding\")\n", - "jax.debug.visualize_array_sharding(jax.numpy.reshape(sharded_x, [-1, 28 * 28]))\n", - "\n", - "train_state = get_replicated_train_state(devices)\n", - "\n", - "# Custom training loop\n", - "for epoch in range(num_epochs):\n", - " data_iter = iter(train_data)\n", - " for data in data_iter:\n", - " x, y = data\n", - " sharded_x = jax.device_put(x.numpy(), data_sharding)\n", - " loss_value, train_state = train_step(train_state, sharded_x, y.numpy())\n", - " print(\"Epoch\", epoch, \"loss:\", loss_value)\n", - "\n", - "# Post-processing model state update to write them back into the model\n", - "trainable_variables, non_trainable_variables, optimizer_variables = train_state\n", - "for variable, value in zip(model.trainable_variables, trainable_variables):\n", - " variable.assign(value)\n", - "for variable, value in zip(model.non_trainable_variables, non_trainable_variables):\n", - " variable.assign(value)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "distributed_training_with_jax", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/distributed_training_with_tensorflow.ipynb b/guides/ipynb/keras_core/distributed_training_with_tensorflow.ipynb deleted file mode 100644 index 9ac9a89bfc..0000000000 --- a/guides/ipynb/keras_core/distributed_training_with_tensorflow.ipynb +++ /dev/null @@ -1,367 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Multi-GPU distributed training with TensorFlow\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2020/04/28
\n", - "**Last modified:** 2023/06/29
\n", - "**Description:** Guide to multi-GPU training for Keras models with TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "There are generally two ways to distribute computation across multiple devices:\n", - "\n", - "**Data parallelism**, where a single model gets replicated on multiple devices or\n", - "multiple machines. Each of them processes different batches of data, then they merge\n", - "their results. There exist many variants of this setup, that differ in how the different\n", - "model replicas merge results, in whether they stay in sync at every batch or whether they\n", - "are more loosely coupled, etc.\n", - "\n", - "**Model parallelism**, where different parts of a single model run on different devices,\n", - "processing a single batch of data together. This works best with models that have a\n", - "naturally-parallel architecture, such as models that feature multiple branches.\n", - "\n", - "This guide focuses on data parallelism, in particular **synchronous data parallelism**,\n", - "where the different replicas of the model stay in sync after each batch they process.\n", - "Synchronicity keeps the model convergence behavior identical to what you would see for\n", - "single-device training.\n", - "\n", - "Specifically, this guide teaches you how to use the `tf.distribute` API to train Keras\n", - "models on multiple GPUs, with minimal changes to your code,\n", - "on multiple GPUs (typically 2 to 16) installed on a single machine (single host,\n", - "multi-device training). This is the most common setup for researchers and small-scale\n", - "industry workflows." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", - "\n", - "import tensorflow as tf\n", - "import keras" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Single-host, multi-device synchronous training\n", - "\n", - "In this setup, you have one machine with several GPUs on it (typically 2 to 16). Each\n", - "device will run a copy of your model (called a **replica**). For simplicity, in what\n", - "follows, we'll assume we're dealing with 8 GPUs, at no loss of generality.\n", - "\n", - "**How it works**\n", - "\n", - "At each step of training:\n", - "\n", - "- The current batch of data (called **global batch**) is split into 8 different\n", - "sub-batches (called **local batches**). For instance, if the global batch has 512\n", - "samples, each of the 8 local batches will have 64 samples.\n", - "- Each of the 8 replicas independently processes a local batch: they run a forward pass,\n", - "then a backward pass, outputting the gradient of the weights with respect to the loss of\n", - "the model on the local batch.\n", - "- The weight updates originating from local gradients are efficiently merged across the 8\n", - "replicas. Because this is done at the end of every step, the replicas always stay in\n", - "sync.\n", - "\n", - "In practice, the process of synchronously updating the weights of the model replicas is\n", - "handled at the level of each individual weight variable. This is done through a **mirrored\n", - "variable** object.\n", - "\n", - "**How to use it**\n", - "\n", - "To do single-host, multi-device synchronous training with a Keras model, you would use\n", - "the [`tf.distribute.MirroredStrategy` API](\n", - " https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy).\n", - "Here's how it works:\n", - "\n", - "- Instantiate a `MirroredStrategy`, optionally configuring which specific devices you\n", - "want to use (by default the strategy will use all GPUs available).\n", - "- Use the strategy object to open a scope, and within this scope, create all the Keras\n", - "objects you need that contain variables. Typically, that means **creating & compiling the\n", - "model** inside the distribution scope. In some cases, the first call to `fit()` may also\n", - "create variables, so it's a good idea to put your `fit()` call in the scope as well.\n", - "- Train the model via `fit()` as usual.\n", - "\n", - "Importantly, we recommend that you use `tf.data.Dataset` objects to load data\n", - "in a multi-device or distributed workflow.\n", - "\n", - "Schematically, it looks like this:\n", - "\n", - "```python\n", - "# Create a MirroredStrategy.\n", - "strategy = tf.distribute.MirroredStrategy()\n", - "print('Number of devices: {}'.format(strategy.num_replicas_in_sync))\n", - "\n", - "# Open a strategy scope.\n", - "with strategy.scope():\n", - " # Everything that creates variables should be under the strategy scope.\n", - " # In general this is only model construction & `compile()`.\n", - " model = Model(...)\n", - " model.compile(...)\n", - "\n", - " # Train the model on all available devices.\n", - " model.fit(train_dataset, validation_data=val_dataset, ...)\n", - "\n", - " # Test the model on all available devices.\n", - " model.evaluate(test_dataset)\n", - "```\n", - "\n", - "Here's a simple end-to-end runnable example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_compiled_model():\n", - " # Make a simple 2-layer densely-connected neural network.\n", - " inputs = keras.Input(shape=(784,))\n", - " x = keras.layers.Dense(256, activation=\"relu\")(inputs)\n", - " x = keras.layers.Dense(256, activation=\"relu\")(x)\n", - " outputs = keras.layers.Dense(10)(x)\n", - " model = keras.Model(inputs, outputs)\n", - " model.compile(\n", - " optimizer=keras.optimizers.Adam(),\n", - " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", - " )\n", - " return model\n", - "\n", - "\n", - "def get_dataset():\n", - " batch_size = 32\n", - " num_val_samples = 10000\n", - "\n", - " # Return the MNIST dataset in the form of a `tf.data.Dataset`.\n", - " (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - " # Preprocess the data (these are Numpy arrays)\n", - " x_train = x_train.reshape(-1, 784).astype(\"float32\") / 255\n", - " x_test = x_test.reshape(-1, 784).astype(\"float32\") / 255\n", - " y_train = y_train.astype(\"float32\")\n", - " y_test = y_test.astype(\"float32\")\n", - "\n", - " # Reserve num_val_samples samples for validation\n", - " x_val = x_train[-num_val_samples:]\n", - " y_val = y_train[-num_val_samples:]\n", - " x_train = x_train[:-num_val_samples]\n", - " y_train = y_train[:-num_val_samples]\n", - " return (\n", - " tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size),\n", - " tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size),\n", - " tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size),\n", - " )\n", - "\n", - "\n", - "# Create a MirroredStrategy.\n", - "strategy = tf.distribute.MirroredStrategy()\n", - "print(\"Number of devices: {}\".format(strategy.num_replicas_in_sync))\n", - "\n", - "# Open a strategy scope.\n", - "with strategy.scope():\n", - " # Everything that creates variables should be under the strategy scope.\n", - " # In general this is only model construction & `compile()`.\n", - " model = get_compiled_model()\n", - "\n", - " # Train the model on all available devices.\n", - " train_dataset, val_dataset, test_dataset = get_dataset()\n", - " model.fit(train_dataset, epochs=2, validation_data=val_dataset)\n", - "\n", - " # Test the model on all available devices.\n", - " model.evaluate(test_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Using callbacks to ensure fault tolerance\n", - "\n", - "When using distributed training, you should always make sure you have a strategy to\n", - "recover from failure (fault tolerance). The simplest way to handle this is to pass\n", - "`ModelCheckpoint` callback to `fit()`, to save your model\n", - "at regular intervals (e.g. every 100 batches or every epoch). You can then restart\n", - "training from your saved model.\n", - "\n", - "Here's a simple example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Prepare a directory to store all the checkpoints.\n", - "checkpoint_dir = \"./ckpt\"\n", - "if not os.path.exists(checkpoint_dir):\n", - " os.makedirs(checkpoint_dir)\n", - "\n", - "\n", - "def make_or_restore_model():\n", - " # Either restore the latest model, or create a fresh one\n", - " # if there is no checkpoint available.\n", - " checkpoints = [checkpoint_dir + \"/\" + name for name in os.listdir(checkpoint_dir)]\n", - " if checkpoints:\n", - " latest_checkpoint = max(checkpoints, key=os.path.getctime)\n", - " print(\"Restoring from\", latest_checkpoint)\n", - " return keras.models.load_model(latest_checkpoint)\n", - " print(\"Creating a new model\")\n", - " return get_compiled_model()\n", - "\n", - "\n", - "def run_training(epochs=1):\n", - " # Create a MirroredStrategy.\n", - " strategy = tf.distribute.MirroredStrategy()\n", - "\n", - " # Open a strategy scope and create/restore the model\n", - " with strategy.scope():\n", - " model = make_or_restore_model()\n", - "\n", - " callbacks = [\n", - " # This callback saves a SavedModel every epoch\n", - " # We include the current epoch in the folder name.\n", - " keras.callbacks.ModelCheckpoint(\n", - " filepath=checkpoint_dir + \"/ckpt-{epoch}.keras\",\n", - " save_freq=\"epoch\",\n", - " )\n", - " ]\n", - " model.fit(\n", - " train_dataset,\n", - " epochs=epochs,\n", - " callbacks=callbacks,\n", - " validation_data=val_dataset,\n", - " verbose=2,\n", - " )\n", - "\n", - "\n", - "# Running the first time creates the model\n", - "run_training(epochs=1)\n", - "\n", - "# Calling the same function again will resume from where we left off\n", - "run_training(epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## `tf.data` performance tips\n", - "\n", - "When doing distributed training, the efficiency with which you load data can often become\n", - "critical. Here are a few tips to make sure your `tf.data` pipelines\n", - "run as fast as possible.\n", - "\n", - "**Note about dataset batching**\n", - "\n", - "When creating your dataset, make sure it is batched with the global batch size.\n", - "For instance, if each of your 8 GPUs is capable of running a batch of 64 samples, you\n", - "call use a global batch size of 512.\n", - "\n", - "**Calling `dataset.cache()`**\n", - "\n", - "If you call `.cache()` on a dataset, its data will be cached after running through the\n", - "first iteration over the data. Every subsequent iteration will use the cached data. The\n", - "cache can be in memory (default) or to a local file you specify.\n", - "\n", - "This can improve performance when:\n", - "\n", - "- Your data is not expected to change from iteration to iteration\n", - "- You are reading data from a remote distributed filesystem\n", - "- You are reading data from local disk, but your data would fit in memory and your\n", - "workflow is significantly IO-bound (e.g. reading & decoding image files).\n", - "\n", - "**Calling `dataset.prefetch(buffer_size)`**\n", - "\n", - "You should almost always call `.prefetch(buffer_size)` after creating a dataset. It means\n", - "your data pipeline will run asynchronously from your model,\n", - "with new samples being preprocessed and stored in a buffer while the current batch\n", - "samples are used to train the model. The next batch will be prefetched in GPU memory by\n", - "the time the current batch is over." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "distributed_training_with_tensorflow", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/distributed_training_with_torch.ipynb b/guides/ipynb/keras_core/distributed_training_with_torch.ipynb deleted file mode 100644 index eed6554714..0000000000 --- a/guides/ipynb/keras_core/distributed_training_with_torch.ipynb +++ /dev/null @@ -1,375 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Multi-GPU distributed training with PyTorch\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/06/29
\n", - "**Last modified:** 2023/06/29
\n", - "**Description:** Guide to multi-GPU training for Keras models with PyTorch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "There are generally two ways to distribute computation across multiple devices:\n", - "\n", - "**Data parallelism**, where a single model gets replicated on multiple devices or\n", - "multiple machines. Each of them processes different batches of data, then they merge\n", - "their results. There exist many variants of this setup, that differ in how the different\n", - "model replicas merge results, in whether they stay in sync at every batch or whether they\n", - "are more loosely coupled, etc.\n", - "\n", - "**Model parallelism**, where different parts of a single model run on different devices,\n", - "processing a single batch of data together. This works best with models that have a\n", - "naturally-parallel architecture, such as models that feature multiple branches.\n", - "\n", - "This guide focuses on data parallelism, in particular **synchronous data parallelism**,\n", - "where the different replicas of the model stay in sync after each batch they process.\n", - "Synchronicity keeps the model convergence behavior identical to what you would see for\n", - "single-device training.\n", - "\n", - "Specifically, this guide teaches you how to use PyTorch's `DistributedDataParallel`\n", - "module wrapper to train Keras, with minimal changes to your code,\n", - "on multiple GPUs (typically 2 to 16) installed on a single machine (single host,\n", - "multi-device training). This is the most common setup for researchers and small-scale\n", - "industry workflows." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup\n", - "\n", - "Let's start by defining the function that creates the model that we will train,\n", - "and the function that creates the dataset we will train on (MNIST in this case)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"KERAS_BACKEND\"] = \"torch\"\n", - "\n", - "import torch\n", - "import numpy as np\n", - "import keras\n", - "\n", - "\n", - "def get_model():\n", - " # Make a simple convnet with batch normalization and dropout.\n", - " inputs = keras.Input(shape=(28, 28, 1))\n", - " x = keras.layers.Rescaling(1.0 / 255.0)(inputs)\n", - " x = keras.layers.Conv2D(filters=12, kernel_size=3, padding=\"same\", use_bias=False)(\n", - " x\n", - " )\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.Conv2D(\n", - " filters=24,\n", - " kernel_size=6,\n", - " use_bias=False,\n", - " strides=2,\n", - " )(x)\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.Conv2D(\n", - " filters=32,\n", - " kernel_size=6,\n", - " padding=\"same\",\n", - " strides=2,\n", - " name=\"large_k\",\n", - " )(x)\n", - " x = keras.layers.BatchNormalization(scale=False, center=True)(x)\n", - " x = keras.layers.ReLU()(x)\n", - " x = keras.layers.GlobalAveragePooling2D()(x)\n", - " x = keras.layers.Dense(256, activation=\"relu\")(x)\n", - " x = keras.layers.Dropout(0.5)(x)\n", - " outputs = keras.layers.Dense(10)(x)\n", - " model = keras.Model(inputs, outputs)\n", - " return model\n", - "\n", - "\n", - "def get_dataset():\n", - " # Load the data and split it between train and test sets\n", - " (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - " # Scale images to the [0, 1] range\n", - " x_train = x_train.astype(\"float32\")\n", - " x_test = x_test.astype(\"float32\")\n", - " # Make sure images have shape (28, 28, 1)\n", - " x_train = np.expand_dims(x_train, -1)\n", - " x_test = np.expand_dims(x_test, -1)\n", - " print(\"x_train shape:\", x_train.shape)\n", - "\n", - " # Create a TensorDataset\n", - " dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(x_train), torch.from_numpy(y_train)\n", - " )\n", - " return dataset\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, let's define a simple PyTorch training loop that targets\n", - "a GPU (note the calls to `.cuda()`)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def train_model(model, dataloader, num_epochs, optimizer, loss_fn):\n", - " for epoch in range(num_epochs):\n", - " running_loss = 0.0\n", - " running_loss_count = 0\n", - " for batch_idx, (inputs, targets) in enumerate(dataloader):\n", - " inputs = inputs.cuda(non_blocking=True)\n", - " targets = targets.cuda(non_blocking=True)\n", - "\n", - " # Forward pass\n", - " outputs = model(inputs)\n", - " loss = loss_fn(outputs, targets)\n", - "\n", - " # Backward and optimize\n", - " optimizer.zero_grad()\n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " running_loss += loss.item()\n", - " running_loss_count += 1\n", - "\n", - " # Print loss statistics\n", - " print(\n", - " f\"Epoch {epoch + 1}/{num_epochs}, \"\n", - " f\"Loss: {running_loss / running_loss_count}\"\n", - " )\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Single-host, multi-device synchronous training\n", - "\n", - "In this setup, you have one machine with several GPUs on it (typically 2 to 16). Each\n", - "device will run a copy of your model (called a **replica**). For simplicity, in what\n", - "follows, we'll assume we're dealing with 8 GPUs, at no loss of generality.\n", - "\n", - "**How it works**\n", - "\n", - "At each step of training:\n", - "\n", - "- The current batch of data (called **global batch**) is split into 8 different\n", - "sub-batches (called **local batches**). For instance, if the global batch has 512\n", - "samples, each of the 8 local batches will have 64 samples.\n", - "- Each of the 8 replicas independently processes a local batch: they run a forward pass,\n", - "then a backward pass, outputting the gradient of the weights with respect to the loss of\n", - "the model on the local batch.\n", - "- The weight updates originating from local gradients are efficiently merged across the 8\n", - "replicas. Because this is done at the end of every step, the replicas always stay in\n", - "sync.\n", - "\n", - "In practice, the process of synchronously updating the weights of the model replicas is\n", - "handled at the level of each individual weight variable. This is done through a **mirrored\n", - "variable** object.\n", - "\n", - "**How to use it**\n", - "\n", - "To do single-host, multi-device synchronous training with a Keras model, you would use\n", - "the `torch.nn.parallel.DistributedDataParallel` module wrapper.\n", - "Here's how it works:\n", - "\n", - "- We use `torch.multiprocessing.start_processes` to start multiple Python processes, one\n", - "per device. Each process will run the `per_device_launch_fn` function.\n", - "- The `per_device_launch_fn` function does the following:\n", - " - It uses `torch.distributed.init_process_group` and `torch.cuda.set_device`\n", - " to configure the device to be used for that process.\n", - " - It uses `torch.utils.data.distributed.DistributedSampler`\n", - " and `torch.utils.data.DataLoader` to turn our data into a distributed data loader.\n", - " - It also uses `torch.nn.parallel.DistributedDataParallel` to turn our model into\n", - " a distributed PyTorch module.\n", - " - It then calls the `train_model` function.\n", - "- The `train_model` function will then run in each process, with the model using\n", - "a separate device in each process.\n", - "\n", - "Here's the flow, where each step is split into its own utility function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Config\n", - "num_gpu = torch.cuda.device_count()\n", - "num_epochs = 2\n", - "batch_size = 64\n", - "print(f\"Running on {num_gpu} GPUs\")\n", - "\n", - "\n", - "def setup_device(current_gpu_index, num_gpus):\n", - " # Device setup\n", - " os.environ[\"MASTER_ADDR\"] = \"localhost\"\n", - " os.environ[\"MASTER_PORT\"] = \"56492\"\n", - " device = torch.device(\"cuda:{}\".format(current_gpu_index))\n", - " torch.distributed.init_process_group(\n", - " backend=\"nccl\",\n", - " init_method=\"env://\",\n", - " world_size=num_gpus,\n", - " rank=current_gpu_index,\n", - " )\n", - " torch.cuda.set_device(device)\n", - "\n", - "\n", - "def cleanup():\n", - " torch.distributed.destroy_process_group()\n", - "\n", - "\n", - "def prepare_dataloader(dataset, current_gpu_index, num_gpus, batch_size):\n", - " sampler = torch.utils.data.distributed.DistributedSampler(\n", - " dataset,\n", - " num_replicas=num_gpus,\n", - " rank=current_gpu_index,\n", - " shuffle=False,\n", - " )\n", - " dataloader = torch.utils.data.DataLoader(\n", - " dataset,\n", - " sampler=sampler,\n", - " batch_size=batch_size,\n", - " shuffle=False,\n", - " )\n", - " return dataloader\n", - "\n", - "\n", - "def per_device_launch_fn(current_gpu_index, num_gpu):\n", - " # Setup the process groups\n", - " setup_device(current_gpu_index, num_gpu)\n", - "\n", - " dataset = get_dataset()\n", - " model = get_model()\n", - "\n", - " # prepare the dataloader\n", - " dataloader = prepare_dataloader(dataset, current_gpu_index, num_gpu, batch_size)\n", - "\n", - " # Instantiate the torch optimizer\n", - " optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n", - "\n", - " # Instantiate the torch loss function\n", - " loss_fn = torch.nn.CrossEntropyLoss()\n", - "\n", - " # Put model on device\n", - " model = model.to(current_gpu_index)\n", - " ddp_model = torch.nn.parallel.DistributedDataParallel(\n", - " model, device_ids=[current_gpu_index], output_device=current_gpu_index\n", - " )\n", - "\n", - " train_model(ddp_model, dataloader, num_epochs, optimizer, loss_fn)\n", - "\n", - " cleanup()\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Time to start multiple processes:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "if __name__ == \"__main__\":\n", - " # We use the \"fork\" method rather than \"spawn\" to support notebooks\n", - " torch.multiprocessing.start_processes(\n", - " per_device_launch_fn,\n", - " args=(num_gpu,),\n", - " nprocs=num_gpu,\n", - " join=True,\n", - " start_method=\"fork\",\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "distributed_training_with_torch", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/functional_api.ipynb b/guides/ipynb/keras_core/functional_api.ipynb deleted file mode 100644 index 85975b3053..0000000000 --- a/guides/ipynb/keras_core/functional_api.ipynb +++ /dev/null @@ -1,1402 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# The Functional API\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2019/03/01
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to the functional API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras\n", - "from keras import layers\n", - "from keras import ops" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "The Keras *functional API* is a way to create models that are more flexible\n", - "than the `keras.Sequential` API. The functional API can handle models\n", - "with non-linear topology, shared layers, and even multiple inputs or outputs.\n", - "\n", - "The main idea is that a deep learning model is usually\n", - "a directed acyclic graph (DAG) of layers.\n", - "So the functional API is a way to build *graphs of layers*.\n", - "\n", - "Consider the following model:\n", - "\n", - "
\n", - "```\n", - "(input: 784-dimensional vectors)\n", - " \u21a7\n", - "[Dense (64 units, relu activation)]\n", - " \u21a7\n", - "[Dense (64 units, relu activation)]\n", - " \u21a7\n", - "[Dense (10 units, softmax activation)]\n", - " \u21a7\n", - "(output: logits of a probability distribution over 10 classes)\n", - "```\n", - "
\n", - "\n", - "This is a basic graph with three layers.\n", - "To build this model using the functional API, start by creating an input node:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The shape of the data is set as a 784-dimensional vector.\n", - "The batch size is always omitted since only the shape of each sample is specified.\n", - "\n", - "If, for example, you have an image input with a shape of `(32, 32, 3)`,\n", - "you would use:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Just for demonstration purposes.\n", - "img_inputs = keras.Input(shape=(32, 32, 3))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The `inputs` that is returned contains information about the shape and `dtype`\n", - "of the input data that you feed to your model.\n", - "Here's the shape:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's the dtype:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs.dtype" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You create a new node in the graph of layers by calling a layer on this `inputs`\n", - "object:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "dense = layers.Dense(64, activation=\"relu\")\n", - "x = dense(inputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The \"layer call\" action is like drawing an arrow from \"inputs\" to this layer\n", - "you created.\n", - "You're \"passing\" the inputs to the `dense` layer, and you get `x` as the output.\n", - "\n", - "Let's add a few more layers to the graph of layers:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "x = layers.Dense(64, activation=\"relu\")(x)\n", - "outputs = layers.Dense(10)(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "At this point, you can create a `Model` by specifying its inputs and outputs\n", - "in the graph of layers:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Model(inputs=inputs, outputs=outputs, name=\"mnist_model\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's check out what the model summary looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can also plot the model as a graph:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.utils.plot_model(model, \"my_first_model.png\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "And, optionally, display the input and output shapes of each layer\n", - "in the plotted graph:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.utils.plot_model(model, \"my_first_model_with_shape_info.png\", show_shapes=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "This figure and the code are almost identical. In the code version,\n", - "the connection arrows are replaced by the call operation.\n", - "\n", - "A \"graph of layers\" is an intuitive mental image for a deep learning model,\n", - "and the functional API is a way to create models that closely mirrors this." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Training, evaluation, and inference\n", - "\n", - "Training, evaluation, and inference work exactly in the same way for models\n", - "built using the functional API as for `Sequential` models.\n", - "\n", - "The `Model` class offers a built-in training loop (the `fit()` method)\n", - "and a built-in evaluation loop (the `evaluate()` method). Note\n", - "that you can easily customize these loops to implement your own training routines.\n", - "See also the guides on customizing what happens in `fit()`:\n", - "\n", - "- [Writing a custom train step with TensorFlow](/guides/custom_train_step_in_tensorflow/)\n", - "- [Writing a custom train step with JAX](/guides/custom_train_step_in_jax/)\n", - "- [Writing a custom train step with PyTorch](/guides/custom_train_step_in_torch/)\n", - "\n", - "Here, load the MNIST image data, reshape it into vectors,\n", - "fit the model on the data (while monitoring performance on a validation split),\n", - "then evaluate the model on the test data:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", - "x_test = x_test.reshape(10000, 784).astype(\"float32\") / 255\n", - "\n", - "model.compile(\n", - " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - " optimizer=keras.optimizers.RMSprop(),\n", - " metrics=[\"accuracy\"],\n", - ")\n", - "\n", - "history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2)\n", - "\n", - "test_scores = model.evaluate(x_test, y_test, verbose=2)\n", - "print(\"Test loss:\", test_scores[0])\n", - "print(\"Test accuracy:\", test_scores[1])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For further reading, see the\n", - "[training and evaluation](/guides/training_with_built_in_methods/) guide." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Save and serialize\n", - "\n", - "Saving the model and serialization work the same way for models built using\n", - "the functional API as they do for `Sequential` models. The standard way\n", - "to save a functional model is to call `model.save()`\n", - "to save the entire model as a single file. You can later recreate the same model\n", - "from this file, even if the code that built the model is no longer available.\n", - "\n", - "This saved file includes the:\n", - "- model architecture\n", - "- model weight values (that were learned during training)\n", - "- model training config, if any (as passed to `compile()`)\n", - "- optimizer and its state, if any (to restart training where you left off)" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.save(\"my_model.keras\")\n", - "del model\n", - "# Recreate the exact same model purely from the file:\n", - "model = keras.models.load_model(\"my_model.keras\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For details, read the model [serialization & saving](\n", - " /guides/serialization_and_saving/) guide." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Use the same graph of layers to define multiple models\n", - "\n", - "In the functional API, models are created by specifying their inputs\n", - "and outputs in a graph of layers. That means that a single\n", - "graph of layers can be used to generate multiple models.\n", - "\n", - "In the example below, you use the same stack of layers to instantiate two models:\n", - "an `encoder` model that turns image inputs into 16-dimensional vectors,\n", - "and an end-to-end `autoencoder` model for training." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "encoder_input = keras.Input(shape=(28, 28, 1), name=\"img\")\n", - "x = layers.Conv2D(16, 3, activation=\"relu\")(encoder_input)\n", - "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", - "x = layers.MaxPooling2D(3)(x)\n", - "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", - "x = layers.Conv2D(16, 3, activation=\"relu\")(x)\n", - "encoder_output = layers.GlobalMaxPooling2D()(x)\n", - "\n", - "encoder = keras.Model(encoder_input, encoder_output, name=\"encoder\")\n", - "encoder.summary()\n", - "\n", - "x = layers.Reshape((4, 4, 1))(encoder_output)\n", - "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", - "x = layers.Conv2DTranspose(32, 3, activation=\"relu\")(x)\n", - "x = layers.UpSampling2D(3)(x)\n", - "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", - "decoder_output = layers.Conv2DTranspose(1, 3, activation=\"relu\")(x)\n", - "\n", - "autoencoder = keras.Model(encoder_input, decoder_output, name=\"autoencoder\")\n", - "autoencoder.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here, the decoding architecture is strictly symmetrical\n", - "to the encoding architecture, so the output shape is the same as\n", - "the input shape `(28, 28, 1)`.\n", - "\n", - "The reverse of a `Conv2D` layer is a `Conv2DTranspose` layer,\n", - "and the reverse of a `MaxPooling2D` layer is an `UpSampling2D` layer." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## All models are callable, just like layers\n", - "\n", - "You can treat any model as if it were a layer by invoking it on an `Input` or\n", - "on the output of another layer. By calling a model you aren't just reusing\n", - "the architecture of the model, you're also reusing its weights.\n", - "\n", - "To see this in action, here's a different take on the autoencoder example that\n", - "creates an encoder model, a decoder model, and chains them in two calls\n", - "to obtain the autoencoder model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "encoder_input = keras.Input(shape=(28, 28, 1), name=\"original_img\")\n", - "x = layers.Conv2D(16, 3, activation=\"relu\")(encoder_input)\n", - "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", - "x = layers.MaxPooling2D(3)(x)\n", - "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", - "x = layers.Conv2D(16, 3, activation=\"relu\")(x)\n", - "encoder_output = layers.GlobalMaxPooling2D()(x)\n", - "\n", - "encoder = keras.Model(encoder_input, encoder_output, name=\"encoder\")\n", - "encoder.summary()\n", - "\n", - "decoder_input = keras.Input(shape=(16,), name=\"encoded_img\")\n", - "x = layers.Reshape((4, 4, 1))(decoder_input)\n", - "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", - "x = layers.Conv2DTranspose(32, 3, activation=\"relu\")(x)\n", - "x = layers.UpSampling2D(3)(x)\n", - "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", - "decoder_output = layers.Conv2DTranspose(1, 3, activation=\"relu\")(x)\n", - "\n", - "decoder = keras.Model(decoder_input, decoder_output, name=\"decoder\")\n", - "decoder.summary()\n", - "\n", - "autoencoder_input = keras.Input(shape=(28, 28, 1), name=\"img\")\n", - "encoded_img = encoder(autoencoder_input)\n", - "decoded_img = decoder(encoded_img)\n", - "autoencoder = keras.Model(autoencoder_input, decoded_img, name=\"autoencoder\")\n", - "autoencoder.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "As you can see, the model can be nested: a model can contain sub-models\n", - "(since a model is just like a layer).\n", - "A common use case for model nesting is *ensembling*.\n", - "For example, here's how to ensemble a set of models into a single model\n", - "that averages their predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_model():\n", - " inputs = keras.Input(shape=(128,))\n", - " outputs = layers.Dense(1)(inputs)\n", - " return keras.Model(inputs, outputs)\n", - "\n", - "\n", - "model1 = get_model()\n", - "model2 = get_model()\n", - "model3 = get_model()\n", - "\n", - "inputs = keras.Input(shape=(128,))\n", - "y1 = model1(inputs)\n", - "y2 = model2(inputs)\n", - "y3 = model3(inputs)\n", - "outputs = layers.average([y1, y2, y3])\n", - "ensemble_model = keras.Model(inputs=inputs, outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Manipulate complex graph topologies\n", - "\n", - "### Models with multiple inputs and outputs\n", - "\n", - "The functional API makes it easy to manipulate multiple inputs and outputs.\n", - "This cannot be handled with the `Sequential` API.\n", - "\n", - "For example, if you're building a system for ranking customer issue tickets by\n", - "priority and routing them to the correct department,\n", - "then the model will have three inputs:\n", - "\n", - "- the title of the ticket (text input),\n", - "- the text body of the ticket (text input), and\n", - "- any tags added by the user (categorical input)\n", - "\n", - "This model will have two outputs:\n", - "\n", - "- the priority score between 0 and 1 (scalar sigmoid output), and\n", - "- the department that should handle the ticket (softmax output\n", - "over the set of departments).\n", - "\n", - "You can build this model in a few lines with the functional API:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "num_tags = 12 # Number of unique issue tags\n", - "num_words = 10000 # Size of vocabulary obtained when preprocessing text data\n", - "num_departments = 4 # Number of departments for predictions\n", - "\n", - "title_input = keras.Input(\n", - " shape=(None,), name=\"title\"\n", - ") # Variable-length sequence of ints\n", - "body_input = keras.Input(shape=(None,), name=\"body\") # Variable-length sequence of ints\n", - "tags_input = keras.Input(\n", - " shape=(num_tags,), name=\"tags\"\n", - ") # Binary vectors of size `num_tags`\n", - "\n", - "# Embed each word in the title into a 64-dimensional vector\n", - "title_features = layers.Embedding(num_words, 64)(title_input)\n", - "# Embed each word in the text into a 64-dimensional vector\n", - "body_features = layers.Embedding(num_words, 64)(body_input)\n", - "\n", - "# Reduce sequence of embedded words in the title into a single 128-dimensional vector\n", - "title_features = layers.LSTM(128)(title_features)\n", - "# Reduce sequence of embedded words in the body into a single 32-dimensional vector\n", - "body_features = layers.LSTM(32)(body_features)\n", - "\n", - "# Merge all available features into a single large vector via concatenation\n", - "x = layers.concatenate([title_features, body_features, tags_input])\n", - "\n", - "# Stick a logistic regression for priority prediction on top of the features\n", - "priority_pred = layers.Dense(1, name=\"priority\")(x)\n", - "# Stick a department classifier on top of the features\n", - "department_pred = layers.Dense(num_departments, name=\"department\")(x)\n", - "\n", - "# Instantiate an end-to-end model predicting both priority and department\n", - "model = keras.Model(\n", - " inputs=[title_input, body_input, tags_input],\n", - " outputs={\"priority\": priority_pred, \"department\": department_pred},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now plot the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.utils.plot_model(model, \"multi_input_and_output_model.png\", show_shapes=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "When compiling this model, you can assign different losses to each output.\n", - "You can even assign different weights to each loss -- to modulate\n", - "their contribution to the total training loss." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=[\n", - " keras.losses.BinaryCrossentropy(from_logits=True),\n", - " keras.losses.CategoricalCrossentropy(from_logits=True),\n", - " ],\n", - " loss_weights=[1.0, 0.2],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Since the output layers have different names, you could also specify\n", - "the losses and loss weights with the corresponding layer names:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss={\n", - " \"priority\": keras.losses.BinaryCrossentropy(from_logits=True),\n", - " \"department\": keras.losses.CategoricalCrossentropy(from_logits=True),\n", - " },\n", - " loss_weights={\"priority\": 1.0, \"department\": 0.2},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Train the model by passing lists of NumPy arrays of inputs and targets:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Dummy input data\n", - "title_data = np.random.randint(num_words, size=(1280, 10))\n", - "body_data = np.random.randint(num_words, size=(1280, 100))\n", - "tags_data = np.random.randint(2, size=(1280, num_tags)).astype(\"float32\")\n", - "\n", - "# Dummy target data\n", - "priority_targets = np.random.random(size=(1280, 1))\n", - "dept_targets = np.random.randint(2, size=(1280, num_departments))\n", - "\n", - "model.fit(\n", - " {\"title\": title_data, \"body\": body_data, \"tags\": tags_data},\n", - " {\"priority\": priority_targets, \"department\": dept_targets},\n", - " epochs=2,\n", - " batch_size=32,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "When calling fit with a `Dataset` object, it should yield either a\n", - "tuple of lists like `([title_data, body_data, tags_data], [priority_targets, dept_targets])`\n", - "or a tuple of dictionaries like\n", - "`({'title': title_data, 'body': body_data, 'tags': tags_data}, {'priority': priority_targets, 'department': dept_targets})`.\n", - "\n", - "For more detailed explanation, refer to the\n", - "[training and evaluation](/guides/training_with_built_in_methods/) guide." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### A toy ResNet model\n", - "\n", - "In addition to models with multiple inputs and outputs,\n", - "the functional API makes it easy to manipulate non-linear connectivity\n", - "topologies -- these are models with layers that are not connected sequentially,\n", - "which the `Sequential` API cannot handle.\n", - "\n", - "A common use case for this is residual connections.\n", - "Let's build a toy ResNet model for CIFAR10 to demonstrate this:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(32, 32, 3), name=\"img\")\n", - "x = layers.Conv2D(32, 3, activation=\"relu\")(inputs)\n", - "x = layers.Conv2D(64, 3, activation=\"relu\")(x)\n", - "block_1_output = layers.MaxPooling2D(3)(x)\n", - "\n", - "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(block_1_output)\n", - "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n", - "block_2_output = layers.add([x, block_1_output])\n", - "\n", - "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(block_2_output)\n", - "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n", - "block_3_output = layers.add([x, block_2_output])\n", - "\n", - "x = layers.Conv2D(64, 3, activation=\"relu\")(block_3_output)\n", - "x = layers.GlobalAveragePooling2D()(x)\n", - "x = layers.Dense(256, activation=\"relu\")(x)\n", - "x = layers.Dropout(0.5)(x)\n", - "outputs = layers.Dense(10)(x)\n", - "\n", - "model = keras.Model(inputs, outputs, name=\"toy_resnet\")\n", - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Plot the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.utils.plot_model(model, \"mini_resnet.png\", show_shapes=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now train the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()\n", - "\n", - "x_train = x_train.astype(\"float32\") / 255.0\n", - "x_test = x_test.astype(\"float32\") / 255.0\n", - "y_train = keras.utils.to_categorical(y_train, 10)\n", - "y_test = keras.utils.to_categorical(y_test, 10)\n", - "\n", - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=keras.losses.CategoricalCrossentropy(from_logits=True),\n", - " metrics=[\"acc\"],\n", - ")\n", - "# We restrict the data to the first 1000 samples so as to limit execution time\n", - "# on Colab. Try to train on the entire dataset until convergence!\n", - "model.fit(\n", - " x_train[:1000],\n", - " y_train[:1000],\n", - " batch_size=64,\n", - " epochs=1,\n", - " validation_split=0.2,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Shared layers\n", - "\n", - "Another good use for the functional API are models that use *shared layers*.\n", - "Shared layers are layer instances that are reused multiple times in the same model --\n", - "they learn features that correspond to multiple paths in the graph-of-layers.\n", - "\n", - "Shared layers are often used to encode inputs from similar spaces\n", - "(say, two different pieces of text that feature similar vocabulary).\n", - "They enable sharing of information across these different inputs,\n", - "and they make it possible to train such a model on less data.\n", - "If a given word is seen in one of the inputs,\n", - "that will benefit the processing of all inputs that pass through the shared layer.\n", - "\n", - "To share a layer in the functional API, call the same layer instance multiple times.\n", - "For instance, here's an `Embedding` layer shared across two different text inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Embedding for 1000 unique words mapped to 128-dimensional vectors\n", - "shared_embedding = layers.Embedding(1000, 128)\n", - "\n", - "# Variable-length sequence of integers\n", - "text_input_a = keras.Input(shape=(None,), dtype=\"int32\")\n", - "\n", - "# Variable-length sequence of integers\n", - "text_input_b = keras.Input(shape=(None,), dtype=\"int32\")\n", - "\n", - "# Reuse the same layer to encode both inputs\n", - "encoded_input_a = shared_embedding(text_input_a)\n", - "encoded_input_b = shared_embedding(text_input_b)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Extract and reuse nodes in the graph of layers\n", - "\n", - "Because the graph of layers you are manipulating is a static data structure,\n", - "it can be accessed and inspected. And this is how you are able to plot\n", - "functional models as images.\n", - "\n", - "This also means that you can access the activations of intermediate layers\n", - "(\"nodes\" in the graph) and reuse them elsewhere --\n", - "which is very useful for something like feature extraction.\n", - "\n", - "Let's look at an example. This is a VGG19 model with weights pretrained on ImageNet:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "vgg19 = keras.applications.VGG19()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "And these are the intermediate activations of the model,\n", - "obtained by querying the graph data structure:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "features_list = [layer.output for layer in vgg19.layers]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Use these features to create a new feature-extraction model that returns\n", - "the values of the intermediate layer activations:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)\n", - "\n", - "img = np.random.random((1, 224, 224, 3)).astype(\"float32\")\n", - "extracted_features = feat_extraction_model(img)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "This comes in handy for tasks like\n", - "[neural style transfer](https://keras.io/examples/generative/neural_style_transfer/),\n", - "among other things." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Extend the API using custom layers\n", - "\n", - "`keras` includes a wide range of built-in layers, for example:\n", - "\n", - "- Convolutional layers: `Conv1D`, `Conv2D`, `Conv3D`, `Conv2DTranspose`\n", - "- Pooling layers: `MaxPooling1D`, `MaxPooling2D`, `MaxPooling3D`, `AveragePooling1D`\n", - "- RNN layers: `GRU`, `LSTM`, `ConvLSTM2D`\n", - "- `BatchNormalization`, `Dropout`, `Embedding`, etc.\n", - "\n", - "But if you don't find what you need, it's easy to extend the API by creating\n", - "your own layers. All layers subclass the `Layer` class and implement:\n", - "\n", - "- `call` method, that specifies the computation done by the layer.\n", - "- `build` method, that creates the weights of the layer (this is just a style\n", - "convention since you can create weights in `__init__`, as well).\n", - "\n", - "To learn more about creating layers from scratch, read\n", - "[custom layers and models](/guides/making_new_layers_and_models_via_subclassing) guide.\n", - "\n", - "The following is a basic implementation of `keras.layers.Dense`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomDense(layers.Layer):\n", - " def __init__(self, units=32):\n", - " super().__init__()\n", - " self.units = units\n", - "\n", - " def build(self, input_shape):\n", - " self.w = self.add_weight(\n", - " shape=(input_shape[-1], self.units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(\n", - " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "\n", - "\n", - "inputs = keras.Input((4,))\n", - "outputs = CustomDense(10)(inputs)\n", - "\n", - "model = keras.Model(inputs, outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For serialization support in your custom layer, define a `get_config()`\n", - "method that returns the constructor arguments of the layer instance:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomDense(layers.Layer):\n", - " def __init__(self, units=32):\n", - " super().__init__()\n", - " self.units = units\n", - "\n", - " def build(self, input_shape):\n", - " self.w = self.add_weight(\n", - " shape=(input_shape[-1], self.units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(\n", - " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "\n", - " def get_config(self):\n", - " return {\"units\": self.units}\n", - "\n", - "\n", - "inputs = keras.Input((4,))\n", - "outputs = CustomDense(10)(inputs)\n", - "\n", - "model = keras.Model(inputs, outputs)\n", - "config = model.get_config()\n", - "\n", - "new_model = keras.Model.from_config(config, custom_objects={\"CustomDense\": CustomDense})" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Optionally, implement the class method `from_config(cls, config)` which is used\n", - "when recreating a layer instance given its config dictionary.\n", - "The default implementation of `from_config` is:\n", - "\n", - "```python\n", - "def from_config(cls, config):\n", - " return cls(**config)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## When to use the functional API\n", - "\n", - "Should you use the Keras functional API to create a new model,\n", - "or just subclass the `Model` class directly? In general, the functional API\n", - "is higher-level, easier and safer, and has a number of\n", - "features that subclassed models do not support.\n", - "\n", - "However, model subclassing provides greater flexibility when building models\n", - "that are not easily expressible as directed acyclic graphs of layers.\n", - "For example, you could not implement a Tree-RNN with the functional API\n", - "and would have to subclass `Model` directly.\n", - "\n", - "For an in-depth look at the differences between the functional API and\n", - "model subclassing, read\n", - "[What are Symbolic and Imperative APIs in TensorFlow 2.0?](https://blog.tensorflow.org/2019/01/what-are-symbolic-and-imperative-apis.html).\n", - "\n", - "### Functional API strengths:\n", - "\n", - "The following properties are also true for Sequential models\n", - "(which are also data structures), but are not true for subclassed models\n", - "(which are Python bytecode, not data structures).\n", - "\n", - "#### Less verbose\n", - "\n", - "There is no `super().__init__(...)`, no `def call(self, ...):`, etc.\n", - "\n", - "Compare:\n", - "\n", - "```python\n", - "inputs = keras.Input(shape=(32,))\n", - "x = layers.Dense(64, activation='relu')(inputs)\n", - "outputs = layers.Dense(10)(x)\n", - "mlp = keras.Model(inputs, outputs)\n", - "```\n", - "\n", - "With the subclassed version:\n", - "\n", - "```python\n", - "class MLP(keras.Model):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.dense_1 = layers.Dense(64, activation='relu')\n", - " self.dense_2 = layers.Dense(10)\n", - "\n", - " def call(self, inputs):\n", - " x = self.dense_1(inputs)\n", - " return self.dense_2(x)\n", - "\n", - "# Instantiate the model.\n", - "mlp = MLP()\n", - "# Necessary to create the model's state.\n", - "# The model doesn't have a state until it's called at least once.\n", - "_ = mlp(ops.zeros((1, 32)))\n", - "```\n", - "\n", - "#### Model validation while defining its connectivity graph\n", - "\n", - "In the functional API, the input specification (shape and dtype) is created\n", - "in advance (using `Input`). Every time you call a layer,\n", - "the layer checks that the specification passed to it matches its assumptions,\n", - "and it will raise a helpful error message if not.\n", - "\n", - "This guarantees that any model you can build with the functional API will run.\n", - "All debugging -- other than convergence-related debugging --\n", - "happens statically during the model construction and not at execution time.\n", - "This is similar to type checking in a compiler.\n", - "\n", - "#### A functional model is plottable and inspectable\n", - "\n", - "You can plot the model as a graph, and you can easily access intermediate nodes\n", - "in this graph. For example, to extract and reuse the activations of intermediate\n", - "layers (as seen in a previous example):\n", - "\n", - "```python\n", - "features_list = [layer.output for layer in vgg19.layers]\n", - "feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)\n", - "```\n", - "\n", - "#### A functional model can be serialized or cloned\n", - "\n", - "Because a functional model is a data structure rather than a piece of code,\n", - "it is safely serializable and can be saved as a single file\n", - "that allows you to recreate the exact same model\n", - "without having access to any of the original code.\n", - "See the [serialization & saving guide](/guides/serialization_and_saving/).\n", - "\n", - "To serialize a subclassed model, it is necessary for the implementer\n", - "to specify a `get_config()`\n", - "and `from_config()` method at the model level.\n", - "\n", - "\n", - "### Functional API weakness:\n", - "\n", - "#### It does not support dynamic architectures\n", - "\n", - "The functional API treats models as DAGs of layers.\n", - "This is true for most deep learning architectures, but not all -- for example,\n", - "recursive networks or Tree RNNs do not follow this assumption and cannot\n", - "be implemented in the functional API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Mix-and-match API styles\n", - "\n", - "Choosing between the functional API or Model subclassing isn't a\n", - "binary decision that restricts you into one category of models.\n", - "All models in the `keras` API can interact with each other, whether they're\n", - "`Sequential` models, functional models, or subclassed models that are written\n", - "from scratch.\n", - "\n", - "You can always use a functional model or `Sequential` model\n", - "as part of a subclassed model or layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "units = 32\n", - "timesteps = 10\n", - "input_dim = 5\n", - "\n", - "# Define a Functional model\n", - "inputs = keras.Input((None, units))\n", - "x = layers.GlobalAveragePooling1D()(inputs)\n", - "outputs = layers.Dense(1)(x)\n", - "model = keras.Model(inputs, outputs)\n", - "\n", - "\n", - "class CustomRNN(layers.Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.units = units\n", - " self.projection_1 = layers.Dense(units=units, activation=\"tanh\")\n", - " self.projection_2 = layers.Dense(units=units, activation=\"tanh\")\n", - " # Our previously-defined Functional model\n", - " self.classifier = model\n", - "\n", - " def call(self, inputs):\n", - " outputs = []\n", - " state = ops.zeros(shape=(inputs.shape[0], self.units))\n", - " for t in range(inputs.shape[1]):\n", - " x = inputs[:, t, :]\n", - " h = self.projection_1(x)\n", - " y = h + self.projection_2(state)\n", - " state = y\n", - " outputs.append(y)\n", - " features = ops.stack(outputs, axis=1)\n", - " print(features.shape)\n", - " return self.classifier(features)\n", - "\n", - "\n", - "rnn_model = CustomRNN()\n", - "_ = rnn_model(ops.zeros((1, timesteps, input_dim)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can use any subclassed layer or model in the functional API\n", - "as long as it implements a `call` method that follows one of the following patterns:\n", - "\n", - "- `call(self, inputs, **kwargs)` --\n", - "Where `inputs` is a tensor or a nested structure of tensors (e.g. a list of tensors),\n", - "and where `**kwargs` are non-tensor arguments (non-inputs).\n", - "- `call(self, inputs, training=None, **kwargs)` --\n", - "Where `training` is a boolean indicating whether the layer should behave\n", - "in training mode and inference mode.\n", - "- `call(self, inputs, mask=None, **kwargs)` --\n", - "Where `mask` is a boolean mask tensor (useful for RNNs, for instance).\n", - "- `call(self, inputs, training=None, mask=None, **kwargs)` --\n", - "Of course, you can have both masking and training-specific behavior at the same time.\n", - "\n", - "Additionally, if you implement the `get_config` method on your custom Layer or model,\n", - "the functional models you create will still be serializable and cloneable.\n", - "\n", - "Here's a quick example of a custom RNN, written from scratch,\n", - "being used in a functional model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "units = 32\n", - "timesteps = 10\n", - "input_dim = 5\n", - "batch_size = 16\n", - "\n", - "\n", - "class CustomRNN(layers.Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.units = units\n", - " self.projection_1 = layers.Dense(units=units, activation=\"tanh\")\n", - " self.projection_2 = layers.Dense(units=units, activation=\"tanh\")\n", - " self.classifier = layers.Dense(1)\n", - "\n", - " def call(self, inputs):\n", - " outputs = []\n", - " state = ops.zeros(shape=(inputs.shape[0], self.units))\n", - " for t in range(inputs.shape[1]):\n", - " x = inputs[:, t, :]\n", - " h = self.projection_1(x)\n", - " y = h + self.projection_2(state)\n", - " state = y\n", - " outputs.append(y)\n", - " features = ops.stack(outputs, axis=1)\n", - " return self.classifier(features)\n", - "\n", - "\n", - "# Note that you specify a static batch size for the inputs with the `batch_shape`\n", - "# arg, because the inner computation of `CustomRNN` requires a static batch size\n", - "# (when you create the `state` zeros tensor).\n", - "inputs = keras.Input(batch_shape=(batch_size, timesteps, input_dim))\n", - "x = layers.Conv1D(32, 3)(inputs)\n", - "outputs = CustomRNN()(x)\n", - "\n", - "model = keras.Model(inputs, outputs)\n", - "\n", - "rnn_model = CustomRNN()\n", - "_ = rnn_model(ops.zeros((1, 10, 5)))" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "functional_api", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/getting_started_with_keras_core.ipynb b/guides/ipynb/keras_core/getting_started_with_keras_core.ipynb deleted file mode 100644 index 0e9835c5d3..0000000000 --- a/guides/ipynb/keras_core/getting_started_with_keras_core.ipynb +++ /dev/null @@ -1,667 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Getting started with Keras 3\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/07/10
\n", - "**Last modified:** 2023/07/10
\n", - "**Description:** First contact with the new multi-backend Keras." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "Keras 3 is a full implementation of the Keras API that\n", - "works with TensorFlow, JAX, and PyTorch interchangeably.\n", - "This notebook will walk you through key Keras 3 workflows.\n", - "\n", - "First, let's install Keras 3:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "!pip install -q keras-core" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup\n", - "\n", - "We're going to be using the JAX backend here -- but you can\n", - "edit the string below to `\"tensorflow\"` or `\"torch\"` and hit\n", - "\"Restart runtime\", and the whole notebook will run just the same!\n", - "This entire guide is backend-agnostic." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import os\n", - "\n", - "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", - "\n", - "# Note that keras should only be imported after the backend\n", - "# has been configured. The backend cannot be changed once the\n", - "# package is imported.\n", - "import keras" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first example: A MNIST convnet\n", - "\n", - "Let's start with the Hello World of ML: training a convnet\n", - "to classify MNIST digits.\n", - "\n", - "Here's the data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Load the data and split it between train and test sets\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - "# Scale images to the [0, 1] range\n", - "x_train = x_train.astype(\"float32\") / 255\n", - "x_test = x_test.astype(\"float32\") / 255\n", - "# Make sure images have shape (28, 28, 1)\n", - "x_train = np.expand_dims(x_train, -1)\n", - "x_test = np.expand_dims(x_test, -1)\n", - "print(\"x_train shape:\", x_train.shape)\n", - "print(\"y_train shape:\", y_train.shape)\n", - "print(x_train.shape[0], \"train samples\")\n", - "print(x_test.shape[0], \"test samples\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's our model.\n", - "\n", - "Different model-building options that Keras offers include:\n", - "\n", - "- [The Sequential API](https://keras.io/guides/sequential_model/) (what we use below)\n", - "- [The Functional API](https://keras.io/guides/functional_api/) (most typical)\n", - "- [Writing your own models yourself via subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) (for advanced use cases)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Model parameters\n", - "num_classes = 10\n", - "input_shape = (28, 28, 1)\n", - "\n", - "model = keras.Sequential(\n", - " [\n", - " keras.layers.Input(shape=input_shape),\n", - " keras.layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.MaxPooling2D(pool_size=(2, 2)),\n", - " keras.layers.Conv2D(128, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.Conv2D(128, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.GlobalAveragePooling2D(),\n", - " keras.layers.Dropout(0.5),\n", - " keras.layers.Dense(num_classes, activation=\"softmax\"),\n", - " ]\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's our model summary:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.summary()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We use the `compile()` method to specify the optimizer, loss function,\n", - "and the metrics to monitor. Note that with the JAX and TensorFlow backends,\n", - "XLA compilation is turned on by default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " optimizer=keras.optimizers.Adam(learning_rate=1e-3),\n", - " metrics=[\n", - " keras.metrics.SparseCategoricalAccuracy(name=\"acc\"),\n", - " ],\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's train and evaluate the model. We'll set aside a validation split of 15%\n", - "of the data during training to monitor generalization on unseen data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "batch_size = 128\n", - "epochs = 20\n", - "\n", - "callbacks = [\n", - " keras.callbacks.ModelCheckpoint(filepath=\"model_at_epoch_{epoch}.keras\"),\n", - " keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=2),\n", - "]\n", - "\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=batch_size,\n", - " epochs=epochs,\n", - " validation_split=0.15,\n", - " callbacks=callbacks,\n", - ")\n", - "score = model.evaluate(x_test, y_test, verbose=0)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "During training, we were saving a model at the end of each epoch. You\n", - "can also save the model in its latest state like this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.save(\"final_model.keras\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "And reload it like this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.saving.load_model(\"final_model.keras\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, you can query predictions of class probabilities with `predict()`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "predictions = model.predict(x_test)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it for the basics!" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Writing cross-framework custom components\n", - "\n", - "Keras 3 enables you to write custom Layers, Models, Metrics, Losses, and Optimizers\n", - "that work across TensorFlow, JAX, and PyTorch with the same codebase. Let's take a look\n", - "at custom layers first.\n", - "\n", - "If you're already familiar with writing custom layers in `tf.keras` -- well, nothing\n", - "has changed. Except one thing: instead of using functions from the `tf` namespace, you should use functions\n", - "from `keras.ops.*`.\n", - "\n", - "The `keras.ops` namespace contains:\n", - "\n", - "- An implementation of the NumPy API, e.g. `keras.ops.stack` or `keras.ops.matmul`.\n", - "- A set of neural network specific ops that are absent from NumPy, such as `keras.ops.conv`\n", - "or `keras.ops.binary_crossentropy`.\n", - "\n", - "Let's make a custom `Dense` layer that works with all backends:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MyDense(keras.layers.Layer):\n", - " def __init__(self, units, activation=None, name=None):\n", - " super().__init__(name=name)\n", - " self.units = units\n", - " self.activation = keras.activations.get(activation)\n", - "\n", - " def build(self, input_shape):\n", - " input_dim = input_shape[-1]\n", - " self.w = self.add_weight(\n", - " shape=(input_dim, self.units),\n", - " initializer=keras.initializers.GlorotNormal(),\n", - " name=\"kernel\",\n", - " trainable=True,\n", - " )\n", - "\n", - " self.b = self.add_weight(\n", - " shape=(self.units,),\n", - " initializer=keras.initializers.Zeros(),\n", - " name=\"bias\",\n", - " trainable=True,\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " # Use Keras ops to create backend-agnostic layers/metrics/etc.\n", - " x = keras.ops.matmul(inputs, self.w) + self.b\n", - " return self.activation(x)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, let's make a custom `Dropout` layer that relies on the `keras.random`\n", - "namespace:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MyDropout(keras.layers.Layer):\n", - " def __init__(self, rate, name=None):\n", - " super().__init__(name=name)\n", - " self.rate = rate\n", - " # Use seed_generator for managing RNG state.\n", - " # It is a state element and its seed variable is\n", - " # tracked as part of `layer.variables`.\n", - " self.seed_generator = keras.random.SeedGenerator(1337)\n", - "\n", - " def call(self, inputs):\n", - " # Use `keras.random` for random ops.\n", - " return keras.random.dropout(inputs, self.rate, seed=self.seed_generator)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, let's write a custom subclassed model that uses our two custom layers:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MyModel(keras.Model):\n", - " def __init__(self, num_classes):\n", - " super().__init__()\n", - " self.conv_base = keras.Sequential(\n", - " [\n", - " keras.layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.MaxPooling2D(pool_size=(2, 2)),\n", - " keras.layers.Conv2D(128, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.Conv2D(128, kernel_size=(3, 3), activation=\"relu\"),\n", - " keras.layers.GlobalAveragePooling2D(),\n", - " ]\n", - " )\n", - " self.dp = MyDropout(0.5)\n", - " self.dense = MyDense(num_classes, activation=\"softmax\")\n", - "\n", - " def call(self, x):\n", - " x = self.conv_base(x)\n", - " x = self.dp(x)\n", - " return self.dense(x)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's compile it and fit it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = MyModel(num_classes=10)\n", - "model.compile(\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " optimizer=keras.optimizers.Adam(learning_rate=1e-3),\n", - " metrics=[\n", - " keras.metrics.SparseCategoricalAccuracy(name=\"acc\"),\n", - " ],\n", - ")\n", - "\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=batch_size,\n", - " epochs=1, # For speed\n", - " validation_split=0.15,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Training models on arbitrary data sources\n", - "\n", - "All Keras models can be trained and evaluated on a wide variety of data sources,\n", - "independently of the backend you're using. This includes:\n", - "\n", - "- NumPy arrays\n", - "- Pandas dataframes\n", - "- TensorFlow`tf.data.Dataset` objects\n", - "- PyTorch `DataLoader` objects\n", - "- Keras `PyDataset` objects\n", - "\n", - "They all work whether you're using TensorFlow, JAX, or PyTorch as your Keras backend.\n", - "\n", - "Let's try it out with PyTorch `DataLoaders`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "# Create a TensorDataset\n", - "train_torch_dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(x_train), torch.from_numpy(y_train)\n", - ")\n", - "val_torch_dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(x_test), torch.from_numpy(y_test)\n", - ")\n", - "\n", - "# Create a DataLoader\n", - "train_dataloader = torch.utils.data.DataLoader(\n", - " train_torch_dataset, batch_size=batch_size, shuffle=True\n", - ")\n", - "val_dataloader = torch.utils.data.DataLoader(\n", - " val_torch_dataset, batch_size=batch_size, shuffle=False\n", - ")\n", - "\n", - "model = MyModel(num_classes=10)\n", - "model.compile(\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " optimizer=keras.optimizers.Adam(learning_rate=1e-3),\n", - " metrics=[\n", - " keras.metrics.SparseCategoricalAccuracy(name=\"acc\"),\n", - " ],\n", - ")\n", - "model.fit(train_dataloader, epochs=1, validation_data=val_dataloader)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now let's try this out with `tf.data`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "\n", - "train_dataset = (\n", - " tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - " .batch(batch_size)\n", - " .prefetch(tf.data.AUTOTUNE)\n", - ")\n", - "test_dataset = (\n", - " tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", - " .batch(batch_size)\n", - " .prefetch(tf.data.AUTOTUNE)\n", - ")\n", - "\n", - "model = MyModel(num_classes=10)\n", - "model.compile(\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " optimizer=keras.optimizers.Adam(learning_rate=1e-3),\n", - " metrics=[\n", - " keras.metrics.SparseCategoricalAccuracy(name=\"acc\"),\n", - " ],\n", - ")\n", - "model.fit(train_dataset, epochs=1, validation_data=test_dataset)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Further reading\n", - "\n", - "This concludes our short overview of the new multi-backend capabilities\n", - "of Keras 3. Next, you can learn about:\n", - "\n", - "### How to customize what happens in `fit()`\n", - "\n", - "Want to implement a non-standard training algorithm yourself\n", - "(e.g. a GAN training routine) but still want to benefit from\n", - "the power and usability of `fit()`? It's really easy to customize\n", - "`fit()` to support arbitrary use cases.\n", - "\n", - "- [Customizing what happens in `fit()` with TensorFlow](http://keras.io/guides/custom_train_step_in_tensorflow/)\n", - "- [Customizing what happens in `fit()` with JAX](http://keras.io/guides/custom_train_step_in_jax/)\n", - "- [Customizing what happens in `fit()` with PyTorch](http://keras.io/guides/custom_train_step_in_pytorch/)\n", - "\n", - "## How to write custom training loops\n", - "\n", - "- [Writing a training loop from scratch in TensorFlow](http://keras.io/guides/writing_a_custom_training_loop_in_tensorflow/)\n", - "- [Writing a training loop from scratch in JAX](http://keras.io/guides/writing_a_custom_training_loop_in_jax/)\n", - "- [Writing a training loop from scratch in PyTorch](http://keras.io/guides/writing_a_custom_training_loop_in_torch/)\n", - "\n", - "\n", - "## How to distribute training\n", - "\n", - "- [Guide to distributed training with TensorFlow](http://keras.io/guides/distributed_training_with_tensorflow/)\n", - "- [JAX distributed training example](https://github.com/keras-team/keras-core/blob/main/examples/demo_jax_distributed.py)\n", - "- [PyTorch distributed training example](https://github.com/keras-team/keras-core/blob/main/examples/demo_torch_multi_gpu.py)\n", - "\n", - "Enjoy the library! 🚀" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "getting_started_with_keras", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/guides/ipynb/keras_core/making_new_layers_and_models_via_subclassing.ipynb b/guides/ipynb/keras_core/making_new_layers_and_models_via_subclassing.ipynb deleted file mode 100644 index d61b4719f2..0000000000 --- a/guides/ipynb/keras_core/making_new_layers_and_models_via_subclassing.ipynb +++ /dev/null @@ -1,1006 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Making new layers and models via subclassing\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2019/03/01
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to writing `Layer` and `Model` objects from scratch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "This guide will cover everything you need to know to build your own\n", - "subclassed layers and models. In particular, you'll learn about the following features:\n", - "\n", - "- The `Layer` class\n", - "- The `add_weight()` method\n", - "- Trainable and non-trainable weights\n", - "- The `build()` method\n", - "- Making sure your layers can be used with any backend\n", - "- The `add_loss()` method\n", - "- The `training` argument in `call()`\n", - "- The `mask` argument in `call()`\n", - "- Making sure your layers can be serialized\n", - "\n", - "Let's dive in." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras\n", - "from keras import ops\n", - "from keras import layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## The `Layer` class: the combination of state (weights) and some computation\n", - "\n", - "One of the central abstractions in Keras is the `Layer` class. A layer\n", - "encapsulates both a state (the layer's \"weights\") and a transformation from\n", - "inputs to outputs (a \"call\", the layer's forward pass).\n", - "\n", - "Here's a densely-connected layer. It has two state variables:\n", - "the variables `w` and `b`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Linear(keras.layers.Layer):\n", - " def __init__(self, units=32, input_dim=32):\n", - " super().__init__()\n", - " self.w = self.add_weight(\n", - " shape=(input_dim, units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(shape=(units,), initializer=\"zeros\", trainable=True)\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You would use a layer by calling it on some tensor input(s), much like a Python\n", - "function." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "x = ops.ones((2, 2))\n", - "linear_layer = Linear(4, 2)\n", - "y = linear_layer(x)\n", - "print(y)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that the weights `w` and `b` are automatically tracked by the layer upon\n", - "being set as layer attributes:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "assert linear_layer.weights == [linear_layer.w, linear_layer.b]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Layers can have non-trainable weights\n", - "\n", - "Besides trainable weights, you can add non-trainable weights to a layer as\n", - "well. Such weights are meant not to be taken into account during\n", - "backpropagation, when you are training the layer.\n", - "\n", - "Here's how to add and use a non-trainable weight:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ComputeSum(keras.layers.Layer):\n", - " def __init__(self, input_dim):\n", - " super().__init__()\n", - " self.total = self.add_weight(\n", - " initializer=\"zeros\", shape=(input_dim,), trainable=False\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " self.total.assign_add(ops.sum(inputs, axis=0))\n", - " return self.total\n", - "\n", - "\n", - "x = ops.ones((2, 2))\n", - "my_sum = ComputeSum(2)\n", - "y = my_sum(x)\n", - "print(y.numpy())\n", - "y = my_sum(x)\n", - "print(y.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "It's part of `layer.weights`, but it gets categorized as a non-trainable weight:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "print(\"weights:\", len(my_sum.weights))\n", - "print(\"non-trainable weights:\", len(my_sum.non_trainable_weights))\n", - "\n", - "# It's not included in the trainable weights:\n", - "print(\"trainable_weights:\", my_sum.trainable_weights)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Best practice: deferring weight creation until the shape of the inputs is known\n", - "\n", - "Our `Linear` layer above took an `input_dim` argument that was used to compute\n", - "the shape of the weights `w` and `b` in `__init__()`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Linear(keras.layers.Layer):\n", - " def __init__(self, units=32, input_dim=32):\n", - " super().__init__()\n", - " self.w = self.add_weight(\n", - " shape=(input_dim, units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(shape=(units,), initializer=\"zeros\", trainable=True)\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "In many cases, you may not know in advance the size of your inputs, and you\n", - "would like to lazily create weights when that value becomes known, some time\n", - "after instantiating the layer.\n", - "\n", - "In the Keras API, we recommend creating layer weights in the\n", - "`build(self, inputs_shape)` method of your layer. Like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Linear(keras.layers.Layer):\n", - " def __init__(self, units=32):\n", - " super().__init__()\n", - " self.units = units\n", - "\n", - " def build(self, input_shape):\n", - " self.w = self.add_weight(\n", - " shape=(input_shape[-1], self.units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(\n", - " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The `__call__()` method of your layer will automatically run build the first time\n", - "it is called. You now have a layer that's lazy and thus easier to use:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# At instantiation, we don't know on what inputs this is going to get called\n", - "linear_layer = Linear(32)\n", - "\n", - "# The layer's weights are created dynamically the first time the layer is called\n", - "y = linear_layer(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Implementing `build()` separately as shown above nicely separates creating weights\n", - "only once from using weights in every call." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Layers are recursively composable\n", - "\n", - "If you assign a Layer instance as an attribute of another Layer, the outer layer\n", - "will start tracking the weights created by the inner layer.\n", - "\n", - "We recommend creating such sublayers in the `__init__()` method and leave it to\n", - "the first `__call__()` to trigger building their weights." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MLPBlock(keras.layers.Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.linear_1 = Linear(32)\n", - " self.linear_2 = Linear(32)\n", - " self.linear_3 = Linear(1)\n", - "\n", - " def call(self, inputs):\n", - " x = self.linear_1(inputs)\n", - " x = keras.activations.relu(x)\n", - " x = self.linear_2(x)\n", - " x = keras.activations.relu(x)\n", - " return self.linear_3(x)\n", - "\n", - "\n", - "mlp = MLPBlock()\n", - "y = mlp(ops.ones(shape=(3, 64))) # The first call to the `mlp` will create the weights\n", - "print(\"weights:\", len(mlp.weights))\n", - "print(\"trainable weights:\", len(mlp.trainable_weights))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Backend-agnostic layers and backend-specific layers\n", - "\n", - "As long as a layer only uses APIs from the `keras.ops` namespace\n", - "(or other Keras namespaces such as `keras.activations`, `keras.random`, or `keras.layers`),\n", - "then it can be used with any backend -- TensorFlow, JAX, or PyTorch.\n", - "\n", - "All layers you've seen so far in this guide work with all Keras backends.\n", - "\n", - "The `keras.ops` namespace gives you access to:\n", - "\n", - "- The NumPy API, e.g. `ops.matmul`, `ops.sum`, `ops.reshape`, `ops.stack`, etc.\n", - "- Neural networks-specific APIs such as `ops.softmax`, `ops`.conv`, `ops.binary_crossentropy`, `ops.relu`, etc.\n", - "\n", - "You can also use backend-native APIs in your layers (such as `tf.nn` functions),\n", - "but if you do this, then your layer will only be usable with the backend in question.\n", - "For instance, you could write the following JAX-specific layer using `jax.numpy`:\n", - "\n", - "```python\n", - "import jax\n", - "\n", - "class Linear(keras.layers.Layer):\n", - " ...\n", - "\n", - " def call(self, inputs):\n", - " return jax.numpy.matmul(inputs, self.w) + self.b\n", - "```\n", - "\n", - "This would be the equivalent TensorFlow-specific layer:\n", - "\n", - "```python\n", - "import tensorflow as tf\n", - "\n", - "class Linear(keras.layers.Layer):\n", - " ...\n", - "\n", - " def call(self, inputs):\n", - " return tf.matmul(inputs, self.w) + self.b\n", - "```\n", - "\n", - "And this would be the equivalent PyTorch-specific layer:\n", - "\n", - "```python\n", - "import torch\n", - "\n", - "class Linear(keras.layers.Layer):\n", - " ...\n", - "\n", - " def call(self, inputs):\n", - " return torch.matmul(inputs, self.w) + self.b\n", - "```\n", - "\n", - "Because cross-backend compatibility is a tremendously useful property, we strongly\n", - "recommend that you seek to always make your layers backend-agnostic by leveraging\n", - "only Keras APIs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## The `add_loss()` method\n", - "\n", - "When writing the `call()` method of a layer, you can create loss tensors that\n", - "you will want to use later, when writing your training loop. This is doable by\n", - "calling `self.add_loss(value)`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "# A layer that creates an activity regularization loss\n", - "class ActivityRegularizationLayer(keras.layers.Layer):\n", - " def __init__(self, rate=1e-2):\n", - " super().__init__()\n", - " self.rate = rate\n", - "\n", - " def call(self, inputs):\n", - " self.add_loss(self.rate * ops.mean(inputs))\n", - " return inputs\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "These losses (including those created by any inner layer) can be retrieved via\n", - "`layer.losses`. This property is reset at the start of every `__call__()` to\n", - "the top-level layer, so that `layer.losses` always contains the loss values\n", - "created during the last forward pass." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class OuterLayer(keras.layers.Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.activity_reg = ActivityRegularizationLayer(1e-2)\n", - "\n", - " def call(self, inputs):\n", - " return self.activity_reg(inputs)\n", - "\n", - "\n", - "layer = OuterLayer()\n", - "assert len(layer.losses) == 0 # No losses yet since the layer has never been called\n", - "\n", - "_ = layer(ops.zeros((1, 1)))\n", - "assert len(layer.losses) == 1 # We created one loss value\n", - "\n", - "# `layer.losses` gets reset at the start of each __call__\n", - "_ = layer(ops.zeros((1, 1)))\n", - "assert len(layer.losses) == 1 # This is the loss created during the call above" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "In addition, the `loss` property also contains regularization losses created\n", - "for the weights of any inner layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class OuterLayerWithKernelRegularizer(keras.layers.Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.dense = keras.layers.Dense(\n", - " 32, kernel_regularizer=keras.regularizers.l2(1e-3)\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return self.dense(inputs)\n", - "\n", - "\n", - "layer = OuterLayerWithKernelRegularizer()\n", - "_ = layer(ops.zeros((1, 1)))\n", - "\n", - "# This is `1e-3 * sum(layer.dense.kernel ** 2)`,\n", - "# created by the `kernel_regularizer` above.\n", - "print(layer.losses)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "These losses are meant to be taken into account when writing custom training loops.\n", - "\n", - "They also work seamlessly with `fit()` (they get automatically summed and added to the main loss, if any):" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(3,))\n", - "outputs = ActivityRegularizationLayer()(inputs)\n", - "model = keras.Model(inputs, outputs)\n", - "\n", - "# If there is a loss passed in `compile`, the regularization\n", - "# losses get added to it\n", - "model.compile(optimizer=\"adam\", loss=\"mse\")\n", - "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))\n", - "\n", - "# It's also possible not to pass any loss in `compile`,\n", - "# since the model already has a loss to minimize, via the `add_loss`\n", - "# call during the forward pass!\n", - "model.compile(optimizer=\"adam\")\n", - "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## You can optionally enable serialization on your layers\n", - "\n", - "If you need your custom layers to be serializable as part of a\n", - "[Functional model](/guides/functional_api/),\n", - "you can optionally implement a `get_config()` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Linear(keras.layers.Layer):\n", - " def __init__(self, units=32):\n", - " super().__init__()\n", - " self.units = units\n", - "\n", - " def build(self, input_shape):\n", - " self.w = self.add_weight(\n", - " shape=(input_shape[-1], self.units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(\n", - " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "\n", - " def get_config(self):\n", - " return {\"units\": self.units}\n", - "\n", - "\n", - "# Now you can recreate the layer from its config:\n", - "layer = Linear(64)\n", - "config = layer.get_config()\n", - "print(config)\n", - "new_layer = Linear.from_config(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that the `__init__()` method of the base `Layer` class takes some keyword\n", - "arguments, in particular a `name` and a `dtype`. It's good practice to pass\n", - "these arguments to the parent class in `__init__()` and to include them in the\n", - "layer config:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Linear(keras.layers.Layer):\n", - " def __init__(self, units=32, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.units = units\n", - "\n", - " def build(self, input_shape):\n", - " self.w = self.add_weight(\n", - " shape=(input_shape[-1], self.units),\n", - " initializer=\"random_normal\",\n", - " trainable=True,\n", - " )\n", - " self.b = self.add_weight(\n", - " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.w) + self.b\n", - "\n", - " def get_config(self):\n", - " config = super().get_config()\n", - " config.update({\"units\": self.units})\n", - " return config\n", - "\n", - "\n", - "layer = Linear(64)\n", - "config = layer.get_config()\n", - "print(config)\n", - "new_layer = Linear.from_config(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "If you need more flexibility when deserializing the layer from its config, you\n", - "can also override the `from_config()` class method. This is the base\n", - "implementation of `from_config()`:\n", - "\n", - "```python\n", - "def from_config(cls, config):\n", - " return cls(**config)\n", - "```\n", - "\n", - "To learn more about serialization and saving, see the complete\n", - "[guide to saving and serializing models](/guides/serialization_and_saving/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Privileged `training` argument in the `call()` method\n", - "\n", - "Some layers, in particular the `BatchNormalization` layer and the `Dropout`\n", - "layer, have different behaviors during training and inference. For such\n", - "layers, it is standard practice to expose a `training` (boolean) argument in\n", - "the `call()` method.\n", - "\n", - "By exposing this argument in `call()`, you enable the built-in training and\n", - "evaluation loops (e.g. `fit()`) to correctly use the layer in training and\n", - "inference." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomDropout(keras.layers.Layer):\n", - " def __init__(self, rate, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.rate = rate\n", - "\n", - " def call(self, inputs, training=None):\n", - " if training:\n", - " return keras.random.dropout(inputs, rate=self.rate)\n", - " return inputs\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Privileged `mask` argument in the `call()` method\n", - "\n", - "The other privileged argument supported by `call()` is the `mask` argument.\n", - "\n", - "You will find it in all Keras RNN layers. A mask is a boolean tensor (one\n", - "boolean value per timestep in the input) used to skip certain input timesteps\n", - "when processing timeseries data.\n", - "\n", - "Keras will automatically pass the correct `mask` argument to `__call__()` for\n", - "layers that support it, when a mask is generated by a prior layer.\n", - "Mask-generating layers are the `Embedding`\n", - "layer configured with `mask_zero=True`, and the `Masking` layer." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## The `Model` class\n", - "\n", - "In general, you will use the `Layer` class to define inner computation blocks,\n", - "and will use the `Model` class to define the outer model -- the object you\n", - "will train.\n", - "\n", - "For instance, in a ResNet50 model, you would have several ResNet blocks\n", - "subclassing `Layer`, and a single `Model` encompassing the entire ResNet50\n", - "network.\n", - "\n", - "The `Model` class has the same API as `Layer`, with the following differences:\n", - "\n", - "- It exposes built-in training, evaluation, and prediction loops\n", - "(`model.fit()`, `model.evaluate()`, `model.predict()`).\n", - "- It exposes the list of its inner layers, via the `model.layers` property.\n", - "- It exposes saving and serialization APIs (`save()`, `save_weights()`...)\n", - "\n", - "Effectively, the `Layer` class corresponds to what we refer to in the\n", - "literature as a \"layer\" (as in \"convolution layer\" or \"recurrent layer\") or as\n", - "a \"block\" (as in \"ResNet block\" or \"Inception block\").\n", - "\n", - "Meanwhile, the `Model` class corresponds to what is referred to in the\n", - "literature as a \"model\" (as in \"deep learning model\") or as a \"network\" (as in\n", - "\"deep neural network\").\n", - "\n", - "So if you're wondering, \"should I use the `Layer` class or the `Model` class?\",\n", - "ask yourself: will I need to call `fit()` on it? Will I need to call `save()`\n", - "on it? If so, go with `Model`. If not (either because your class is just a block\n", - "in a bigger system, or because you are writing training & saving code yourself),\n", - "use `Layer`.\n", - "\n", - "For instance, we could take our mini-resnet example above, and use it to build\n", - "a `Model` that we could train with `fit()`, and that we could save with\n", - "`save_weights()`:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "```python\n", - "class ResNet(keras.Model):\n", - "\n", - " def __init__(self, num_classes=1000):\n", - " super().__init__()\n", - " self.block_1 = ResNetBlock()\n", - " self.block_2 = ResNetBlock()\n", - " self.global_pool = layers.GlobalAveragePooling2D()\n", - " self.classifier = Dense(num_classes)\n", - "\n", - " def call(self, inputs):\n", - " x = self.block_1(inputs)\n", - " x = self.block_2(x)\n", - " x = self.global_pool(x)\n", - " return self.classifier(x)\n", - "\n", - "\n", - "resnet = ResNet()\n", - "dataset = ...\n", - "resnet.fit(dataset, epochs=10)\n", - "resnet.save(filepath.keras)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Putting it all together: an end-to-end example\n", - "\n", - "Here's what you've learned so far:\n", - "\n", - "- A `Layer` encapsulate a state (created in `__init__()` or `build()`) and some\n", - "computation (defined in `call()`).\n", - "- Layers can be recursively nested to create new, bigger computation blocks.\n", - "- Layers are backend-agnostic as long as they only use Keras APIs. You can use\n", - "backend-native APIs (such as `jax.numpy`, `torch.nn` or `tf.nn`), but then\n", - "your layer will only be usable with that specific backend.\n", - "- Layers can create and track losses (typically regularization losses)\n", - "via `add_loss()`.\n", - "- The outer container, the thing you want to train, is a `Model`. A `Model` is\n", - "just like a `Layer`, but with added training and serialization utilities.\n", - "\n", - "Let's put all of these things together into an end-to-end example: we're going\n", - "to implement a Variational AutoEncoder (VAE) in a backend-agnostic fashion\n", - "-- so that it runs the same with TensorFlow, JAX, and PyTorch.\n", - "We'll train it on MNIST digits.\n", - "\n", - "Our VAE will be a subclass of `Model`, built as a nested composition of layers\n", - "that subclass `Layer`. It will feature a regularization loss (KL divergence)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class Sampling(layers.Layer):\n", - " \"\"\"Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.\"\"\"\n", - "\n", - " def call(self, inputs):\n", - " z_mean, z_log_var = inputs\n", - " batch = ops.shape(z_mean)[0]\n", - " dim = ops.shape(z_mean)[1]\n", - " epsilon = keras.random.normal(shape=(batch, dim))\n", - " return z_mean + ops.exp(0.5 * z_log_var) * epsilon\n", - "\n", - "\n", - "class Encoder(layers.Layer):\n", - " \"\"\"Maps MNIST digits to a triplet (z_mean, z_log_var, z).\"\"\"\n", - "\n", - " def __init__(self, latent_dim=32, intermediate_dim=64, name=\"encoder\", **kwargs):\n", - " super().__init__(name=name, **kwargs)\n", - " self.dense_proj = layers.Dense(intermediate_dim, activation=\"relu\")\n", - " self.dense_mean = layers.Dense(latent_dim)\n", - " self.dense_log_var = layers.Dense(latent_dim)\n", - " self.sampling = Sampling()\n", - "\n", - " def call(self, inputs):\n", - " x = self.dense_proj(inputs)\n", - " z_mean = self.dense_mean(x)\n", - " z_log_var = self.dense_log_var(x)\n", - " z = self.sampling((z_mean, z_log_var))\n", - " return z_mean, z_log_var, z\n", - "\n", - "\n", - "class Decoder(layers.Layer):\n", - " \"\"\"Converts z, the encoded digit vector, back into a readable digit.\"\"\"\n", - "\n", - " def __init__(self, original_dim, intermediate_dim=64, name=\"decoder\", **kwargs):\n", - " super().__init__(name=name, **kwargs)\n", - " self.dense_proj = layers.Dense(intermediate_dim, activation=\"relu\")\n", - " self.dense_output = layers.Dense(original_dim, activation=\"sigmoid\")\n", - "\n", - " def call(self, inputs):\n", - " x = self.dense_proj(inputs)\n", - " return self.dense_output(x)\n", - "\n", - "\n", - "class VariationalAutoEncoder(keras.Model):\n", - " \"\"\"Combines the encoder and decoder into an end-to-end model for training.\"\"\"\n", - "\n", - " def __init__(\n", - " self,\n", - " original_dim,\n", - " intermediate_dim=64,\n", - " latent_dim=32,\n", - " name=\"autoencoder\",\n", - " **kwargs\n", - " ):\n", - " super().__init__(name=name, **kwargs)\n", - " self.original_dim = original_dim\n", - " self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim)\n", - " self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim)\n", - "\n", - " def call(self, inputs):\n", - " z_mean, z_log_var, z = self.encoder(inputs)\n", - " reconstructed = self.decoder(z)\n", - " # Add KL divergence regularization loss.\n", - " kl_loss = -0.5 * ops.mean(\n", - " z_log_var - ops.square(z_mean) - ops.exp(z_log_var) + 1\n", - " )\n", - " self.add_loss(kl_loss)\n", - " return reconstructed\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's train it on MNIST using the `fit()` API:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "(x_train, _), _ = keras.datasets.mnist.load_data()\n", - "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", - "\n", - "original_dim = 784\n", - "vae = VariationalAutoEncoder(784, 64, 32)\n", - "\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "vae.compile(optimizer, loss=keras.losses.MeanSquaredError())\n", - "\n", - "vae.fit(x_train, x_train, epochs=2, batch_size=64)" - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "making_new_layers_and_models_via_subclassing", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/sequential_model.ipynb b/guides/ipynb/keras_core/sequential_model.ipynb deleted file mode 100644 index 6138c4ae79..0000000000 --- a/guides/ipynb/keras_core/sequential_model.ipynb +++ /dev/null @@ -1,674 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# The Sequential model\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2020/04/12
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to the Sequential model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import keras\n", - "from keras import layers\n", - "from keras import ops" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## When to use a Sequential model\n", - "\n", - "A `Sequential` model is appropriate for **a plain stack of layers**\n", - "where each layer has **exactly one input tensor and one output tensor**.\n", - "\n", - "Schematically, the following `Sequential` model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Define Sequential model with 3 layers\n", - "model = keras.Sequential(\n", - " [\n", - " layers.Dense(2, activation=\"relu\", name=\"layer1\"),\n", - " layers.Dense(3, activation=\"relu\", name=\"layer2\"),\n", - " layers.Dense(4, name=\"layer3\"),\n", - " ]\n", - ")\n", - "# Call model on a test input\n", - "x = ops.ones((3, 3))\n", - "y = model(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "is equivalent to this function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Create 3 layers\n", - "layer1 = layers.Dense(2, activation=\"relu\", name=\"layer1\")\n", - "layer2 = layers.Dense(3, activation=\"relu\", name=\"layer2\")\n", - "layer3 = layers.Dense(4, name=\"layer3\")\n", - "\n", - "# Call layers on a test input\n", - "x = ops.ones((3, 3))\n", - "y = layer3(layer2(layer1(x)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "A Sequential model is **not appropriate** when:\n", - "\n", - "- Your model has multiple inputs or multiple outputs\n", - "- Any of your layers has multiple inputs or multiple outputs\n", - "- You need to do layer sharing\n", - "- You want non-linear topology (e.g. a residual connection, a multi-branch\n", - "model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Creating a Sequential model\n", - "\n", - "You can create a Sequential model by passing a list of layers to the Sequential\n", - "constructor:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential(\n", - " [\n", - " layers.Dense(2, activation=\"relu\"),\n", - " layers.Dense(3, activation=\"relu\"),\n", - " layers.Dense(4),\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Its layers are accessible via the `layers` attribute:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can also create a Sequential model incrementally via the `add()` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential()\n", - "model.add(layers.Dense(2, activation=\"relu\"))\n", - "model.add(layers.Dense(3, activation=\"relu\"))\n", - "model.add(layers.Dense(4))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that there's also a corresponding `pop()` method to remove layers:\n", - "a Sequential model behaves very much like a list of layers." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.pop()\n", - "print(len(model.layers)) # 2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Also note that the Sequential constructor accepts a `name` argument, just like\n", - "any layer or model in Keras. This is useful to annotate TensorBoard graphs\n", - "with semantically meaningful names." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential(name=\"my_sequential\")\n", - "model.add(layers.Dense(2, activation=\"relu\", name=\"layer1\"))\n", - "model.add(layers.Dense(3, activation=\"relu\", name=\"layer2\"))\n", - "model.add(layers.Dense(4, name=\"layer3\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Specifying the input shape in advance\n", - "\n", - "Generally, all layers in Keras need to know the shape of their inputs\n", - "in order to be able to create their weights. So when you create a layer like\n", - "this, initially, it has no weights:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = layers.Dense(3)\n", - "layer.weights # Empty" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "It creates its weights the first time it is called on an input, since the shape\n", - "of the weights depends on the shape of the inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Call layer on a test input\n", - "x = ops.ones((1, 4))\n", - "y = layer(x)\n", - "layer.weights # Now it has weights, of shape (4, 3) and (3,)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Naturally, this also applies to Sequential models. When you instantiate a\n", - "Sequential model without an input shape, it isn't \"built\": it has no weights\n", - "(and calling\n", - "`model.weights` results in an error stating just this). The weights are created\n", - "when the model first sees some input data:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential(\n", - " [\n", - " layers.Dense(2, activation=\"relu\"),\n", - " layers.Dense(3, activation=\"relu\"),\n", - " layers.Dense(4),\n", - " ]\n", - ") # No weights at this stage!\n", - "\n", - "# At this point, you can't do this:\n", - "# model.weights\n", - "\n", - "# You also can't do this:\n", - "# model.summary()\n", - "\n", - "# Call the model on a test input\n", - "x = ops.ones((1, 4))\n", - "y = model(x)\n", - "print(\"Number of weights after calling the model:\", len(model.weights)) # 6" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Once a model is \"built\", you can call its `summary()` method to display its\n", - "contents:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "However, it can be very useful when building a Sequential model incrementally\n", - "to be able to display the summary of the model so far, including the current\n", - "output shape. In this case, you should start your model by passing an `Input`\n", - "object to your model, so that it knows its input shape from the start:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential()\n", - "model.add(keras.Input(shape=(4,)))\n", - "model.add(layers.Dense(2, activation=\"relu\"))\n", - "\n", - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that the `Input` object is not displayed as part of `model.layers`, since\n", - "it isn't a layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Models built with a predefined input shape like this always have weights (even\n", - "before seeing any data) and always have a defined output shape.\n", - "\n", - "In general, it's a recommended best practice to always specify the input shape\n", - "of a Sequential model in advance if you know what it is." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A common debugging workflow: `add()` + `summary()`\n", - "\n", - "When building a new Sequential architecture, it's useful to incrementally stack\n", - "layers with `add()` and frequently print model summaries. For instance, this\n", - "enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is\n", - "downsampling image feature maps:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential()\n", - "model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images\n", - "model.add(layers.Conv2D(32, 5, strides=2, activation=\"relu\"))\n", - "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", - "model.add(layers.MaxPooling2D(3))\n", - "\n", - "# Can you guess what the current output shape is at this point? Probably not.\n", - "# Let's just print it:\n", - "model.summary()\n", - "\n", - "# The answer was: (40, 40, 32), so we can keep downsampling...\n", - "\n", - "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", - "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", - "model.add(layers.MaxPooling2D(3))\n", - "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", - "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", - "model.add(layers.MaxPooling2D(2))\n", - "\n", - "# And now?\n", - "model.summary()\n", - "\n", - "# Now that we have 4x4 feature maps, time to apply global max pooling.\n", - "model.add(layers.GlobalMaxPooling2D())\n", - "\n", - "# Finally, we add a classification layer.\n", - "model.add(layers.Dense(10))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Very practical, right?\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## What to do once you have a model\n", - "\n", - "Once your model architecture is ready, you will want to:\n", - "\n", - "- Train your model, evaluate it, and run inference. See our\n", - "[guide to training & evaluation with the built-in loops](\n", - " /guides/training_with_built_in_methods/)\n", - "- Save your model to disk and restore it. See our\n", - "[guide to serialization & saving](/guides/serialization_and_saving/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Feature extraction with a Sequential model\n", - "\n", - "Once a Sequential model has been built, it behaves like a\n", - "[Functional API model](/guides/functional_api/).\n", - "This means that every layer has an `input`\n", - "and `output` attribute. These attributes can be used to do neat things, like\n", - "quickly creating a model that extracts the outputs of all intermediate layers in a\n", - "Sequential model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "initial_model = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(250, 250, 3)),\n", - " layers.Conv2D(32, 5, strides=2, activation=\"relu\"),\n", - " layers.Conv2D(32, 3, activation=\"relu\"),\n", - " layers.Conv2D(32, 3, activation=\"relu\"),\n", - " ]\n", - ")\n", - "feature_extractor = keras.Model(\n", - " inputs=initial_model.inputs,\n", - " outputs=[layer.output for layer in initial_model.layers],\n", - ")\n", - "\n", - "# Call feature extractor on test input.\n", - "x = ops.ones((1, 250, 250, 3))\n", - "features = feature_extractor(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's a similar example that only extract features from one layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "initial_model = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(250, 250, 3)),\n", - " layers.Conv2D(32, 5, strides=2, activation=\"relu\"),\n", - " layers.Conv2D(32, 3, activation=\"relu\", name=\"my_intermediate_layer\"),\n", - " layers.Conv2D(32, 3, activation=\"relu\"),\n", - " ]\n", - ")\n", - "feature_extractor = keras.Model(\n", - " inputs=initial_model.inputs,\n", - " outputs=initial_model.get_layer(name=\"my_intermediate_layer\").output,\n", - ")\n", - "# Call feature extractor on test input.\n", - "x = ops.ones((1, 250, 250, 3))\n", - "features = feature_extractor(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Transfer learning with a Sequential model\n", - "\n", - "Transfer learning consists of freezing the bottom layers in a model and only training\n", - "the top layers. If you aren't familiar with it, make sure to read our [guide\n", - "to transfer learning](/guides/transfer_learning/).\n", - "\n", - "Here are two common transfer learning blueprint involving Sequential models.\n", - "\n", - "First, let's say that you have a Sequential model, and you want to freeze all\n", - "layers except the last one. In this case, you would simply iterate over\n", - "`model.layers` and set `layer.trainable = False` on each layer, except the\n", - "last one. Like this:\n", - "\n", - "```python\n", - "model = keras.Sequential([\n", - " keras.Input(shape=(784)),\n", - " layers.Dense(32, activation='relu'),\n", - " layers.Dense(32, activation='relu'),\n", - " layers.Dense(32, activation='relu'),\n", - " layers.Dense(10),\n", - "])\n", - "\n", - "# Presumably you would want to first load pre-trained weights.\n", - "model.load_weights(...)\n", - "\n", - "# Freeze all layers except the last one.\n", - "for layer in model.layers[:-1]:\n", - " layer.trainable = False\n", - "\n", - "# Recompile and train (this will only update the weights of the last layer).\n", - "model.compile(...)\n", - "model.fit(...)\n", - "```\n", - "\n", - "Another common blueprint is to use a Sequential model to stack a pre-trained\n", - "model and some freshly initialized classification layers. Like this:\n", - "\n", - "```python\n", - "# Load a convolutional base with pre-trained weights\n", - "base_model = keras.applications.Xception(\n", - " weights='imagenet',\n", - " include_top=False,\n", - " pooling='avg')\n", - "\n", - "# Freeze the base model\n", - "base_model.trainable = False\n", - "\n", - "# Use a Sequential model to add a trainable classifier on top\n", - "model = keras.Sequential([\n", - " base_model,\n", - " layers.Dense(1000),\n", - "])\n", - "\n", - "# Compile & train\n", - "model.compile(...)\n", - "model.fit(...)\n", - "```\n", - "\n", - "If you do transfer learning, you will probably find yourself frequently using\n", - "these two patterns." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's about all you need to know about Sequential models!\n", - "\n", - "To find out more about building models in Keras, see:\n", - "\n", - "- [Guide to the Functional API](/guides/functional_api/)\n", - "- [Guide to making new Layers & Models via subclassing](\n", - " /guides/making_new_layers_and_models_via_subclassing/)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "sequential_model", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/serialization_and_saving.ipynb b/guides/ipynb/keras_core/serialization_and_saving.ipynb deleted file mode 100644 index 15e7a3052d..0000000000 --- a/guides/ipynb/keras_core/serialization_and_saving.ipynb +++ /dev/null @@ -1,1116 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Save, serialize, and export models\n", - "\n", - "**Authors:** Neel Kovelamudi, Francois Chollet
\n", - "**Date created:** 2023/06/14
\n", - "**Last modified:** 2023/06/30
\n", - "**Description:** Complete guide to saving, serializing, and exporting models." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "**Note: this guide assumes Keras >= 2.13**\n", - "\n", - "## Introduction\n", - "\n", - "A Keras model consists of multiple components:\n", - "\n", - "- The architecture, or configuration, which specifies what layers the model\n", - "contain, and how they're connected.\n", - "- A set of weights values (the \"state of the model\").\n", - "- An optimizer (defined by compiling the model).\n", - "- A set of losses and metrics (defined by compiling the model).\n", - "\n", - "The Keras API saves all of these pieces together in a unified format,\n", - "marked by the `.keras` extension. This is a zip archive consisting of the\n", - "following:\n", - "\n", - "- A JSON-based configuration file (config.json): Records of model, layer, and\n", - "other trackables' configuration.\n", - "- A H5-based state file, such as `model.weights.h5` (for the whole model),\n", - "with directory keys for layers and their weights.\n", - "- A metadata file in JSON, storing things such as the current Keras version.\n", - "\n", - "Let's take a look at how this works." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## How to save and load a model\n", - "\n", - "If you only have 10 seconds to read this guide, here's what you need to know.\n", - "\n", - "**Saving a Keras model:**\n", - "\n", - "```python\n", - "model = ... # Get model (Sequential, Functional Model, or Model subclass)\n", - "model.save('path/to/location.keras') # The file needs to end with the .keras extension\n", - "```\n", - "\n", - "**Loading the model back:**\n", - "\n", - "```python\n", - "model = keras.models.load_model('path/to/location.keras')\n", - "```\n", - "\n", - "Now, let's look at the details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras\n", - "from keras import ops" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Saving\n", - "\n", - "This section is about saving an entire model to a single file. The file will include:\n", - "\n", - "- The model's architecture/config\n", - "- The model's weight values (which were learned during training)\n", - "- The model's compilation information (if `compile()` was called)\n", - "- The optimizer and its state, if any (this enables you to restart training\n", - "where you left)\n", - "\n", - "#### APIs\n", - "\n", - "You can save a model with `model.save()` or `keras.models.save_model()` (which is equivalent).\n", - "You can load it back with `keras.models.load_model()`.\n", - "\n", - "The only supported format in Keras 3 is the \"Keras v3\" format,\n", - "which uses the `.keras` extension.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_model():\n", - " # Create a simple model.\n", - " inputs = keras.Input(shape=(32,))\n", - " outputs = keras.layers.Dense(1)(inputs)\n", - " model = keras.Model(inputs, outputs)\n", - " model.compile(optimizer=keras.optimizers.Adam(), loss=\"mean_squared_error\")\n", - " return model\n", - "\n", - "\n", - "model = get_model()\n", - "\n", - "# Train the model.\n", - "test_input = np.random.random((128, 32))\n", - "test_target = np.random.random((128, 1))\n", - "model.fit(test_input, test_target)\n", - "\n", - "# Calling `save('my_model.keras')` creates a zip archive `my_model.keras`.\n", - "model.save(\"my_model.keras\")\n", - "\n", - "# It can be used to reconstruct the model identically.\n", - "reconstructed_model = keras.models.load_model(\"my_model.keras\")\n", - "\n", - "# Let's check:\n", - "np.testing.assert_allclose(\n", - " model.predict(test_input), reconstructed_model.predict(test_input)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Custom objects\n", - "\n", - "This section covers the basic workflows for handling custom layers, functions, and\n", - "models in Keras saving and reloading.\n", - "\n", - "When saving a model that includes custom objects, such as a subclassed Layer,\n", - "you **must** define a `get_config()` method on the object class.\n", - "If the arguments passed to the constructor (`__init__()` method) of the custom object\n", - "aren't Python objects (anything other than base types like ints, strings,\n", - "etc.), then you **must** also explicitly deserialize these arguments in the `from_config()`\n", - "class method.\n", - "\n", - "Like this:\n", - "\n", - "```python\n", - "class CustomLayer(keras.layers.Layer):\n", - " def __init__(self, sublayer, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.sublayer = layer\n", - "\n", - " def call(self, x):\n", - " return self.sublayer(x)\n", - "\n", - " def get_config(self):\n", - " base_config = super().get_config()\n", - " config = {\n", - " \"sublayer\": keras.saving.serialize_keras_object(self.sublayer),\n", - " }\n", - " return {**base_config, **config}\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " sublayer_config = config.pop(\"sublayer\")\n", - " sublayer = keras.saving.deserialize_keras_object(sublayer_config)\n", - " return cls(sublayer, **config)\n", - "```\n", - "\n", - "Please see the [Defining the config methods section](#config_methods) for more\n", - "details and examples.\n", - "\n", - "The saved `.keras` file is lightweight and does not store the Python code for custom\n", - "objects. Therefore, to reload the model, `load_model` requires access to the definition\n", - "of any custom objects used through one of the following methods:\n", - "\n", - "1. Registering custom objects **(preferred)**,\n", - "2. Passing custom objects directly when loading, or\n", - "3. Using a custom object scope\n", - "\n", - "Below are examples of each workflow:\n", - "\n", - "#### Registering custom objects (**preferred**)\n", - "\n", - "This is the preferred method, as custom object registration greatly simplifies saving and\n", - "loading code. Adding the `@keras.saving.register_keras_serializable` decorator to the\n", - "class definition of a custom object registers the object globally in a master list,\n", - "allowing Keras to recognize the object when loading the model.\n", - "\n", - "Let's create a custom model involving both a custom layer and a custom activation\n", - "function to demonstrate this.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Clear all previously registered custom objects\n", - "keras.saving.get_custom_objects().clear()\n", - "\n", - "\n", - "# Upon registration, you can optionally specify a package or a name.\n", - "# If left blank, the package defaults to `Custom` and the name defaults to\n", - "# the class name.\n", - "@keras.saving.register_keras_serializable(package=\"MyLayers\")\n", - "class CustomLayer(keras.layers.Layer):\n", - " def __init__(self, factor):\n", - " super().__init__()\n", - " self.factor = factor\n", - "\n", - " def call(self, x):\n", - " return x * self.factor\n", - "\n", - " def get_config(self):\n", - " return {\"factor\": self.factor}\n", - "\n", - "\n", - "@keras.saving.register_keras_serializable(package=\"my_package\", name=\"custom_fn\")\n", - "def custom_fn(x):\n", - " return x**2\n", - "\n", - "\n", - "# Create the model.\n", - "def get_model():\n", - " inputs = keras.Input(shape=(4,))\n", - " mid = CustomLayer(0.5)(inputs)\n", - " outputs = keras.layers.Dense(1, activation=custom_fn)(mid)\n", - " model = keras.Model(inputs, outputs)\n", - " model.compile(optimizer=\"rmsprop\", loss=\"mean_squared_error\")\n", - " return model\n", - "\n", - "\n", - "# Train the model.\n", - "def train_model(model):\n", - " input = np.random.random((4, 4))\n", - " target = np.random.random((4, 1))\n", - " model.fit(input, target)\n", - " return model\n", - "\n", - "\n", - "test_input = np.random.random((4, 4))\n", - "test_target = np.random.random((4, 1))\n", - "\n", - "model = get_model()\n", - "model = train_model(model)\n", - "model.save(\"custom_model.keras\")\n", - "\n", - "# Now, we can simply load without worrying about our custom objects.\n", - "reconstructed_model = keras.models.load_model(\"custom_model.keras\")\n", - "\n", - "# Let's check:\n", - "np.testing.assert_allclose(\n", - " model.predict(test_input), reconstructed_model.predict(test_input)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### Passing custom objects to `load_model()`" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_model()\n", - "model = train_model(model)\n", - "\n", - "# Calling `save('my_model.keras')` creates a zip archive `my_model.keras`.\n", - "model.save(\"custom_model.keras\")\n", - "\n", - "# Upon loading, pass a dict containing the custom objects used in the\n", - "# `custom_objects` argument of `keras.models.load_model()`.\n", - "reconstructed_model = keras.models.load_model(\n", - " \"custom_model.keras\",\n", - " custom_objects={\"CustomLayer\": CustomLayer, \"custom_fn\": custom_fn},\n", - ")\n", - "\n", - "# Let's check:\n", - "np.testing.assert_allclose(\n", - " model.predict(test_input), reconstructed_model.predict(test_input)\n", - ")\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### Using a custom object scope\n", - "\n", - "Any code within the custom object scope will be able to recognize the custom objects\n", - "passed to the scope argument. Therefore, loading the model within the scope will allow\n", - "the loading of our custom objects.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_model()\n", - "model = train_model(model)\n", - "model.save(\"custom_model.keras\")\n", - "\n", - "# Pass the custom objects dictionary to a custom object scope and place\n", - "# the `keras.models.load_model()` call within the scope.\n", - "custom_objects = {\"CustomLayer\": CustomLayer, \"custom_fn\": custom_fn}\n", - "\n", - "with keras.saving.custom_object_scope(custom_objects):\n", - " reconstructed_model = keras.models.load_model(\"custom_model.keras\")\n", - "\n", - "# Let's check:\n", - "np.testing.assert_allclose(\n", - " model.predict(test_input), reconstructed_model.predict(test_input)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Model serialization\n", - "\n", - "This section is about saving only the model's configuration, without its state.\n", - "The model's configuration (or architecture) specifies what layers the model\n", - "contains, and how these layers are connected. If you have the configuration of a model,\n", - "then the model can be created with a freshly initialized state (no weights or compilation\n", - "information).\n", - "\n", - "#### APIs\n", - "\n", - "The following serialization APIs are available:\n", - "\n", - "- `keras.models.clone_model(model)`: make a (randomly initialized) copy of a model.\n", - "- `get_config()` and `cls.from_config()`: retrieve the configuration of a layer or model, and recreate\n", - "a model instance from its config, respectively.\n", - "- `keras.models.model_to_json()` and `keras.models.model_from_json()`: similar, but as JSON strings.\n", - "- `keras.saving.serialize_keras_object()`: retrieve the configuration any arbitrary Keras object.\n", - "- `keras.saving.deserialize_keras_object()`: recreate an object instance from its configuration.\n", - "\n", - "#### In-memory model cloning\n", - "\n", - "You can do in-memory cloning of a model via `keras.models.clone_model()`.\n", - "This is equivalent to getting the config then recreating the model from its config\n", - "(so it does not preserve compilation information or layer weights values).\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "new_model = keras.models.clone_model(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### `get_config()` and `from_config()`\n", - "\n", - "Calling `model.get_config()` or `layer.get_config()` will return a Python dict containing\n", - "the configuration of the model or layer, respectively. You should define `get_config()`\n", - "to contain arguments needed for the `__init__()` method of the model or layer. At loading time,\n", - "the `from_config(config)` method will then call `__init__()` with these arguments to\n", - "reconstruct the model or layer.\n", - "\n", - "\n", - "**Layer example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(3, activation=\"relu\")\n", - "layer_config = layer.get_config()\n", - "print(layer_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now let's reconstruct the layer using the `from_config()` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "new_layer = keras.layers.Dense.from_config(layer_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "**Sequential model example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])\n", - "config = model.get_config()\n", - "new_model = keras.Sequential.from_config(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "**Functional model example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input((32,))\n", - "outputs = keras.layers.Dense(1)(inputs)\n", - "model = keras.Model(inputs, outputs)\n", - "config = model.get_config()\n", - "new_model = keras.Model.from_config(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### `to_json()` and `keras.models.model_from_json()`\n", - "\n", - "This is similar to `get_config` / `from_config`, except it turns the model\n", - "into a JSON string, which can then be loaded without the original model class.\n", - "It is also specific to models, it isn't meant for layers.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])\n", - "json_config = model.to_json()\n", - "new_model = keras.models.model_from_json(json_config)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### Arbitrary object serialization and deserialization\n", - "\n", - "The `keras.saving.serialize_keras_object()` and `keras.saving.deserialize_keras_object()`\n", - "APIs are general-purpose APIs that can be used to serialize or deserialize any Keras\n", - "object and any custom object. It is at the foundation of saving model architecture and is\n", - "behind all `serialize()`/`deserialize()` calls in keras.\n", - "\n", - "**Example**:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "my_reg = keras.regularizers.L1(0.005)\n", - "config = keras.saving.serialize_keras_object(my_reg)\n", - "print(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note the serialization format containing all the necessary information for proper\n", - "reconstruction:\n", - "\n", - "- `module` containing the name of the Keras module or other identifying module the object\n", - "comes from\n", - "- `class_name` containing the name of the object's class.\n", - "- `config` with all the information needed to reconstruct the object\n", - "- `registered_name` for custom objects. See [here](#custom_object_serialization).\n", - "\n", - "Now we can reconstruct the regularizer." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "new_reg = keras.saving.deserialize_keras_object(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Model weights saving\n", - "\n", - "You can choose to only save & load a model's weights. This can be useful if:\n", - "\n", - "- You only need the model for inference: in this case you won't need to\n", - "restart training, so you don't need the compilation information or optimizer state.\n", - "- You are doing transfer learning: in this case you will be training a new model\n", - "reusing the state of a prior model, so you don't need the compilation\n", - "information of the prior model.\n", - "\n", - "#### APIs for in-memory weight transfer\n", - "\n", - "Weights can be copied between different objects by using `get_weights()`\n", - "and `set_weights()`:\n", - "\n", - "* `keras.layers.Layer.get_weights()`: Returns a list of NumPy arrays of weight values.\n", - "* `keras.layers.Layer.set_weights(weights)`: Sets the model weights to the values\n", - "provided (as NumPy arrays).\n", - "\n", - "Examples:\n", - "\n", - "***Transfering weights from one layer to another, in memory***" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def create_layer():\n", - " layer = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")\n", - " layer.build((None, 784))\n", - " return layer\n", - "\n", - "\n", - "layer_1 = create_layer()\n", - "layer_2 = create_layer()\n", - "\n", - "# Copy weights from layer 1 to layer 2\n", - "layer_2.set_weights(layer_1.get_weights())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "***Transfering weights from one model to another model with a compatible architecture, in memory***" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Create a simple functional model\n", - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "functional_model = keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", - "\n", - "\n", - "# Define a subclassed model with the same architecture\n", - "class SubclassedModel(keras.Model):\n", - " def __init__(self, output_dim, name=None):\n", - " super().__init__(name=name)\n", - " self.output_dim = output_dim\n", - " self.dense_1 = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")\n", - " self.dense_2 = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")\n", - " self.dense_3 = keras.layers.Dense(output_dim, name=\"predictions\")\n", - "\n", - " def call(self, inputs):\n", - " x = self.dense_1(inputs)\n", - " x = self.dense_2(x)\n", - " x = self.dense_3(x)\n", - " return x\n", - "\n", - " def get_config(self):\n", - " return {\"output_dim\": self.output_dim, \"name\": self.name}\n", - "\n", - "\n", - "subclassed_model = SubclassedModel(10)\n", - "# Call the subclassed model once to create the weights.\n", - "subclassed_model(np.ones((1, 784)))\n", - "\n", - "# Copy weights from functional_model to subclassed_model.\n", - "subclassed_model.set_weights(functional_model.get_weights())\n", - "\n", - "assert len(functional_model.weights) == len(subclassed_model.weights)\n", - "for a, b in zip(functional_model.weights, subclassed_model.weights):\n", - " np.testing.assert_allclose(a.numpy(), b.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "***The case of stateless layers***\n", - "\n", - "Because stateless layers do not change the order or number of weights,\n", - "models can have compatible architectures even if there are extra/missing\n", - "stateless layers." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "functional_model = keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", - "\n", - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - "\n", - "# Add a dropout layer, which does not contain any weights.\n", - "x = keras.layers.Dropout(0.5)(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "functional_model_with_dropout = keras.Model(\n", - " inputs=inputs, outputs=outputs, name=\"3_layer_mlp\"\n", - ")\n", - "\n", - "functional_model_with_dropout.set_weights(functional_model.get_weights())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "#### APIs for saving weights to disk & loading them back\n", - "\n", - "Weights can be saved to disk by calling `model.save_weights(filepath)`.\n", - "The filename should end in `.weights.h5`.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Runnable example\n", - "sequential_model = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(784,), name=\"digits\"),\n", - " keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\"),\n", - " keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\"),\n", - " keras.layers.Dense(10, name=\"predictions\"),\n", - " ]\n", - ")\n", - "sequential_model.save_weights(\"my_model.weights.h5\")\n", - "sequential_model.load_weights(\"my_model.weights.h5\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that changing `layer.trainable` may result in a different\n", - "`layer.weights` ordering when the model contains nested layers." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class NestedDenseLayer(keras.layers.Layer):\n", - " def __init__(self, units, name=None):\n", - " super().__init__(name=name)\n", - " self.dense_1 = keras.layers.Dense(units, name=\"dense_1\")\n", - " self.dense_2 = keras.layers.Dense(units, name=\"dense_2\")\n", - "\n", - " def call(self, inputs):\n", - " return self.dense_2(self.dense_1(inputs))\n", - "\n", - "\n", - "nested_model = keras.Sequential([keras.Input((784,)), NestedDenseLayer(10, \"nested\")])\n", - "variable_names = [v.name for v in nested_model.weights]\n", - "print(\"variables: {}\".format(variable_names))\n", - "\n", - "print(\"\\nChanging trainable status of one of the nested layers...\")\n", - "nested_model.get_layer(\"nested\").dense_1.trainable = False\n", - "\n", - "variable_names_2 = [v.name for v in nested_model.weights]\n", - "print(\"\\nvariables: {}\".format(variable_names_2))\n", - "print(\"variable ordering changed:\", variable_names != variable_names_2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "##### **Transfer learning example**\n", - "\n", - "When loading pretrained weights from a weights file, it is recommended to load\n", - "the weights into the original checkpointed model, and then extract\n", - "the desired weights/layers into a new model.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def create_functional_model():\n", - " inputs = keras.Input(shape=(784,), name=\"digits\")\n", - " x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - " x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - " outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - " return keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", - "\n", - "\n", - "functional_model = create_functional_model()\n", - "functional_model.save_weights(\"pretrained.weights.h5\")\n", - "\n", - "# In a separate program:\n", - "pretrained_model = create_functional_model()\n", - "pretrained_model.load_weights(\"pretrained.weights.h5\")\n", - "\n", - "# Create a new model by extracting layers from the original model:\n", - "extracted_layers = pretrained_model.layers[:-1]\n", - "extracted_layers.append(keras.layers.Dense(5, name=\"dense_3\"))\n", - "model = keras.Sequential(extracted_layers)\n", - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Appendix: Handling custom objects\n", - "\n", - "\n", - "#### Defining the config methods\n", - "\n", - "Specifications:\n", - "\n", - "* `get_config()` should return a JSON-serializable dictionary in order to be\n", - "compatible with the Keras architecture- and model-saving APIs.\n", - "* `from_config(config)` (a `classmethod`) should return a new layer or model\n", - "object that is created from the config.\n", - "The default implementation returns `cls(**config)`.\n", - "\n", - "**NOTE**: If all your constructor arguments are already serializable, e.g. strings and\n", - "ints, or non-custom Keras objects, overriding `from_config` is not necessary. However,\n", - "for more complex objects such as layers or models passed to `__init__`, deserialization\n", - "must be handled explicitly either in `__init__` itself or overriding the `from_config()`\n", - "method.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@keras.saving.register_keras_serializable(package=\"MyLayers\", name=\"KernelMult\")\n", - "class MyDense(keras.layers.Layer):\n", - " def __init__(\n", - " self,\n", - " units,\n", - " *,\n", - " kernel_regularizer=None,\n", - " kernel_initializer=None,\n", - " nested_model=None,\n", - " **kwargs\n", - " ):\n", - " super().__init__(**kwargs)\n", - " self.hidden_units = units\n", - " self.kernel_regularizer = kernel_regularizer\n", - " self.kernel_initializer = kernel_initializer\n", - " self.nested_model = nested_model\n", - "\n", - " def get_config(self):\n", - " config = super().get_config()\n", - " # Update the config with the custom layer's parameters\n", - " config.update(\n", - " {\n", - " \"units\": self.hidden_units,\n", - " \"kernel_regularizer\": self.kernel_regularizer,\n", - " \"kernel_initializer\": self.kernel_initializer,\n", - " \"nested_model\": self.nested_model,\n", - " }\n", - " )\n", - " return config\n", - "\n", - " def build(self, input_shape):\n", - " input_units = input_shape[-1]\n", - " self.kernel = self.add_weight(\n", - " name=\"kernel\",\n", - " shape=(input_units, self.hidden_units),\n", - " regularizer=self.kernel_regularizer,\n", - " initializer=self.kernel_initializer,\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " return ops.matmul(inputs, self.kernel)\n", - "\n", - "\n", - "layer = MyDense(units=16, kernel_regularizer=\"l1\", kernel_initializer=\"ones\")\n", - "layer3 = MyDense(units=64, nested_model=layer)\n", - "\n", - "config = keras.layers.serialize(layer3)\n", - "\n", - "print(config)\n", - "\n", - "new_layer = keras.layers.deserialize(config)\n", - "\n", - "print(new_layer)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that overriding `from_config` is unnecessary above for `MyDense` because\n", - "`hidden_units`, `kernel_initializer`, and `kernel_regularizer` are ints, strings, and a\n", - "built-in Keras object, respectively. This means that the default `from_config`\n", - "implementation of `cls(**config)` will work as intended.\n", - "\n", - "For more complex objects, such as layers and models passed to `__init__`, for\n", - "example, you must explicitly deserialize these objects. Let's take a look at an example\n", - "of a model where a `from_config` override is necessary.\n", - "\n", - "**Example:**\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@keras.saving.register_keras_serializable(package=\"ComplexModels\")\n", - "class CustomModel(keras.layers.Layer):\n", - " def __init__(self, first_layer, second_layer=None, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.first_layer = first_layer\n", - " if second_layer is not None:\n", - " self.second_layer = second_layer\n", - " else:\n", - " self.second_layer = keras.layers.Dense(8)\n", - "\n", - " def get_config(self):\n", - " config = super().get_config()\n", - " config.update(\n", - " {\n", - " \"first_layer\": self.first_layer,\n", - " \"second_layer\": self.second_layer,\n", - " }\n", - " )\n", - " return config\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " # Note that you can also use `keras.saving.deserialize_keras_object` here\n", - " config[\"first_layer\"] = keras.layers.deserialize(config[\"first_layer\"])\n", - " config[\"second_layer\"] = keras.layers.deserialize(config[\"second_layer\"])\n", - " return cls(**config)\n", - "\n", - " def call(self, inputs):\n", - " return self.first_layer(self.second_layer(inputs))\n", - "\n", - "\n", - "# Let's make our first layer the custom layer from the previous example (MyDense)\n", - "inputs = keras.Input((32,))\n", - "outputs = CustomModel(first_layer=layer)(inputs)\n", - "model = keras.Model(inputs, outputs)\n", - "\n", - "config = model.get_config()\n", - "new_model = keras.Model.from_config(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "\n", - "#### How custom objects are serialized\n", - "\n", - "The serialization format has a special key for custom objects registered via\n", - "`@keras.saving.register_keras_serializable`. This `registered_name` key allows for easy\n", - "retrieval at loading/deserialization time while also allowing users to add custom naming.\n", - "\n", - "Let's take a look at the config from serializing the custom layer `MyDense` we defined\n", - "above.\n", - "\n", - "**Example**:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = MyDense(\n", - " units=16,\n", - " kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-4),\n", - " kernel_initializer=\"ones\",\n", - ")\n", - "config = keras.layers.serialize(layer)\n", - "print(config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "As shown, the `registered_name` key contains the lookup information for the Keras master\n", - "list, including the package `MyLayers` and the custom name `KernelMult` that we gave in\n", - "the `@keras.saving.register_keras_serializable` decorator. Take a look again at the custom\n", - "class definition/registration [here](#registration_example).\n", - "\n", - "Note that the `class_name` key contains the original name of the class, allowing for\n", - "proper re-initialization in `from_config`.\n", - "\n", - "Additionally, note that the `module` key is `None` since this is a custom object." - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "serialization_and_saving", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/training_with_built_in_methods.ipynb b/guides/ipynb/keras_core/training_with_built_in_methods.ipynb deleted file mode 100644 index 8d072a3674..0000000000 --- a/guides/ipynb/keras_core/training_with_built_in_methods.ipynb +++ /dev/null @@ -1,1965 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Training & evaluation with the built-in methods\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2019/03/01
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to training & evaluation with `fit()` and `evaluate()`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# We import torch & TF so as to use torch Dataloaders & tf.data.Datasets.\n", - "import torch\n", - "import tensorflow as tf\n", - "\n", - "import os\n", - "import numpy as np\n", - "import keras\n", - "from keras import layers\n", - "from keras import ops" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "This guide covers training, evaluation, and prediction (inference) models\n", - "when using built-in APIs for training & validation (such as `Model.fit()`,\n", - "`Model.evaluate()` and `Model.predict()`).\n", - "\n", - "If you are interested in leveraging `fit()` while specifying your\n", - "own training step function, see the guides on customizing what happens in `fit()`:\n", - "\n", - "- [Writing a custom train step with TensorFlow](/guides/custom_train_step_in_tensorflow/)\n", - "- [Writing a custom train step with JAX](/guides/custom_train_step_in_jax/)\n", - "- [Writing a custom train step with PyTorch](/guides/custom_train_step_in_torch/)\n", - "\n", - "If you are interested in writing your own training & evaluation loops from\n", - "scratch, see the guides on writing training loops:\n", - "\n", - "- [Writing a training loop with TensorFlow](/guides/writing_a_custom_training_loop_in_tensorflow/)\n", - "- [Writing a training loop with JAX](/guides/writing_a_custom_training_loop_in_jax/)\n", - "- [Writing a training loop with PyTorch](/guides/writing_a_custom_training_loop_in_torch/)\n", - "\n", - "In general, whether you are using built-in loops or writing your own, model training &\n", - "evaluation works strictly in the same way across every kind of Keras model --\n", - "Sequential models, models built with the Functional API, and models written from\n", - "scratch via model subclassing." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## API overview: a first end-to-end example\n", - "\n", - "When passing data to the built-in training loops of a model, you should either use:\n", - "\n", - "- NumPy arrays (if your data is small and fits in memory)\n", - "- Subclasses of `keras.utils.PyDataset`\n", - "- `tf.data.Dataset` objects\n", - "- PyTorch `DataLoader` instances\n", - "\n", - "In the next few paragraphs, we'll use the MNIST dataset as NumPy arrays, in\n", - "order to demonstrate how to use optimizers, losses, and metrics. Afterwards, we'll\n", - "take a close look at each of the other options.\n", - "\n", - "Let's consider the following model (here, we build in with the Functional API, but it\n", - "could be a Sequential model or a subclassed model as well):" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - "outputs = layers.Dense(10, activation=\"softmax\", name=\"predictions\")(x)\n", - "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's what the typical end-to-end workflow looks like, consisting of:\n", - "\n", - "- Training\n", - "- Validation on a holdout set generated from the original training data\n", - "- Evaluation on the test data\n", - "\n", - "We'll use MNIST data for this example." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - "# Preprocess the data (these are NumPy arrays)\n", - "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", - "x_test = x_test.reshape(10000, 784).astype(\"float32\") / 255\n", - "\n", - "y_train = y_train.astype(\"float32\")\n", - "y_test = y_test.astype(\"float32\")\n", - "\n", - "# Reserve 10,000 samples for validation\n", - "x_val = x_train[-10000:]\n", - "y_val = y_train[-10000:]\n", - "x_train = x_train[:-10000]\n", - "y_train = y_train[:-10000]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We specify the training configuration (optimizer, loss, metrics):" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(), # Optimizer\n", - " # Loss function to minimize\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " # List of metrics to monitor\n", - " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We call `fit()`, which will train the model by slicing the data into \"batches\" of size\n", - "`batch_size`, and repeatedly iterating over the entire dataset for a given number of\n", - "`epochs`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "print(\"Fit model on training data\")\n", - "history = model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=64,\n", - " epochs=2,\n", - " # We pass some validation for\n", - " # monitoring validation loss and metrics\n", - " # at the end of each epoch\n", - " validation_data=(x_val, y_val),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The returned `history` object holds a record of the loss values and metric values\n", - "during training:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "history.history" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We evaluate the model on the test data via `evaluate()`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Evaluate the model on the test data using `evaluate`\n", - "print(\"Evaluate on test data\")\n", - "results = model.evaluate(x_test, y_test, batch_size=128)\n", - "print(\"test loss, test acc:\", results)\n", - "\n", - "# Generate predictions (probabilities -- the output of the last layer)\n", - "# on new data using `predict`\n", - "print(\"Generate predictions for 3 samples\")\n", - "predictions = model.predict(x_test[:3])\n", - "print(\"predictions shape:\", predictions.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now, let's review each piece of this workflow in detail." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## The `compile()` method: specifying a loss, metrics, and an optimizer\n", - "\n", - "To train a model with `fit()`, you need to specify a loss function, an optimizer, and\n", - "optionally, some metrics to monitor.\n", - "\n", - "You pass these to the model as arguments to the `compile()` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The `metrics` argument should be a list -- your model can have any number of metrics.\n", - "\n", - "If your model has multiple outputs, you can specify different losses and metrics for\n", - "each output, and you can modulate the contribution of each output to the total loss of\n", - "the model. You will find more details about this in the **Passing data to multi-input,\n", - "multi-output models** section.\n", - "\n", - "Note that if you're satisfied with the default settings, in many cases the optimizer,\n", - "loss, and metrics can be specified via string identifiers as a shortcut:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=\"rmsprop\",\n", - " loss=\"sparse_categorical_crossentropy\",\n", - " metrics=[\"sparse_categorical_accuracy\"],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For later reuse, let's put our model definition and compile step in functions; we will\n", - "call them several times across different examples in this guide." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_uncompiled_model():\n", - " inputs = keras.Input(shape=(784,), name=\"digits\")\n", - " x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - " x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - " outputs = layers.Dense(10, activation=\"softmax\", name=\"predictions\")(x)\n", - " model = keras.Model(inputs=inputs, outputs=outputs)\n", - " return model\n", - "\n", - "\n", - "def get_compiled_model():\n", - " model = get_uncompiled_model()\n", - " model.compile(\n", - " optimizer=\"rmsprop\",\n", - " loss=\"sparse_categorical_crossentropy\",\n", - " metrics=[\"sparse_categorical_accuracy\"],\n", - " )\n", - " return model\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Many built-in optimizers, losses, and metrics are available\n", - "\n", - "In general, you won't have to create your own losses, metrics, or optimizers\n", - "from scratch, because what you need is likely to be already part of the Keras API:\n", - "\n", - "Optimizers:\n", - "\n", - "- `SGD()` (with or without momentum)\n", - "- `RMSprop()`\n", - "- `Adam()`\n", - "- etc.\n", - "\n", - "Losses:\n", - "\n", - "- `MeanSquaredError()`\n", - "- `KLDivergence()`\n", - "- `CosineSimilarity()`\n", - "- etc.\n", - "\n", - "Metrics:\n", - "\n", - "- `AUC()`\n", - "- `Precision()`\n", - "- `Recall()`\n", - "- etc." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Custom losses\n", - "\n", - "If you need to create a custom loss, Keras provides three ways to do so.\n", - "\n", - "The first method involves creating a function that accepts inputs `y_true` and\n", - "`y_pred`. The following example shows a loss function that computes the mean squared\n", - "error between the real data and the predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def custom_mean_squared_error(y_true, y_pred):\n", - " return ops.mean(ops.square(y_true - y_pred), axis=-1)\n", - "\n", - "\n", - "model = get_uncompiled_model()\n", - "model.compile(optimizer=keras.optimizers.Adam(), loss=custom_mean_squared_error)\n", - "\n", - "# We need to one-hot encode the labels to use MSE\n", - "y_train_one_hot = ops.one_hot(y_train, num_classes=10)\n", - "model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "If you need a loss function that takes in parameters beside `y_true` and `y_pred`, you\n", - "can subclass the `keras.losses.Loss` class and implement the following two methods:\n", - "\n", - "- `__init__(self)`: accept parameters to pass during the call of your loss function\n", - "- `call(self, y_true, y_pred)`: use the targets (y_true) and the model predictions\n", - "(y_pred) to compute the model's loss\n", - "\n", - "Let's say you want to use mean squared error, but with an added term that\n", - "will de-incentivize prediction values far from 0.5 (we assume that the categorical\n", - "targets are one-hot encoded and take values between 0 and 1). This\n", - "creates an incentive for the model not to be too confident, which may help\n", - "reduce overfitting (we won't know if it works until we try!).\n", - "\n", - "Here's how you would do it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomMSE(keras.losses.Loss):\n", - " def __init__(self, regularization_factor=0.1, name=\"custom_mse\"):\n", - " super().__init__(name=name)\n", - " self.regularization_factor = regularization_factor\n", - "\n", - " def call(self, y_true, y_pred):\n", - " mse = ops.mean(ops.square(y_true - y_pred), axis=-1)\n", - " reg = ops.mean(ops.square(0.5 - y_pred), axis=-1)\n", - " return mse + reg * self.regularization_factor\n", - "\n", - "\n", - "model = get_uncompiled_model()\n", - "model.compile(optimizer=keras.optimizers.Adam(), loss=CustomMSE())\n", - "\n", - "y_train_one_hot = ops.one_hot(y_train, num_classes=10)\n", - "model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Custom metrics\n", - "\n", - "If you need a metric that isn't part of the API, you can easily create custom metrics\n", - "by subclassing the `keras.metrics.Metric` class. You will need to implement 4\n", - "methods:\n", - "\n", - "- `__init__(self)`, in which you will create state variables for your metric.\n", - "- `update_state(self, y_true, y_pred, sample_weight=None)`, which uses the targets\n", - "y_true and the model predictions y_pred to update the state variables.\n", - "- `result(self)`, which uses the state variables to compute the final results.\n", - "- `reset_state(self)`, which reinitializes the state of the metric.\n", - "\n", - "State update and results computation are kept separate (in `update_state()` and\n", - "`result()`, respectively) because in some cases, the results computation might be very\n", - "expensive and would only be done periodically.\n", - "\n", - "Here's a simple example showing how to implement a `CategoricalTruePositives` metric\n", - "that counts how many samples were correctly classified as belonging to a given class:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CategoricalTruePositives(keras.metrics.Metric):\n", - " def __init__(self, name=\"categorical_true_positives\", **kwargs):\n", - " super().__init__(name=name, **kwargs)\n", - " self.true_positives = self.add_variable(\n", - " shape=(), name=\"ctp\", initializer=\"zeros\"\n", - " )\n", - "\n", - " def update_state(self, y_true, y_pred, sample_weight=None):\n", - " y_pred = ops.reshape(ops.argmax(y_pred, axis=1), (-1, 1))\n", - " values = ops.cast(y_true, \"int32\") == ops.cast(y_pred, \"int32\")\n", - " values = ops.cast(values, \"float32\")\n", - " if sample_weight is not None:\n", - " sample_weight = ops.cast(sample_weight, \"float32\")\n", - " values = ops.multiply(values, sample_weight)\n", - " self.true_positives.assign_add(ops.sum(values))\n", - "\n", - " def result(self):\n", - " return self.true_positives\n", - "\n", - " def reset_state(self):\n", - " # The state of the metric will be reset at the start of each epoch.\n", - " self.true_positives.assign(0.0)\n", - "\n", - "\n", - "model = get_uncompiled_model()\n", - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", - " loss=keras.losses.SparseCategoricalCrossentropy(),\n", - " metrics=[CategoricalTruePositives()],\n", - ")\n", - "model.fit(x_train, y_train, batch_size=64, epochs=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Handling losses and metrics that don't fit the standard signature\n", - "\n", - "The overwhelming majority of losses and metrics can be computed from `y_true` and\n", - "`y_pred`, where `y_pred` is an output of your model -- but not all of them. For\n", - "instance, a regularization loss may only require the activation of a layer (there are\n", - "no targets in this case), and this activation may not be a model output.\n", - "\n", - "In such cases, you can call `self.add_loss(loss_value)` from inside the call method of\n", - "a custom layer. Losses added in this way get added to the \"main\" loss during training\n", - "(the one passed to `compile()`). Here's a simple example that adds activity\n", - "regularization (note that activity regularization is built-in in all Keras layers --\n", - "this layer is just for the sake of providing a concrete example):" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ActivityRegularizationLayer(layers.Layer):\n", - " def call(self, inputs):\n", - " self.add_loss(ops.sum(inputs) * 0.1)\n", - " return inputs # Pass-through layer.\n", - "\n", - "\n", - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", - "\n", - "# Insert activity regularization as a layer\n", - "x = ActivityRegularizationLayer()(x)\n", - "\n", - "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", - "outputs = layers.Dense(10, name=\"predictions\")(x)\n", - "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)\n", - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", - " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - ")\n", - "\n", - "# The displayed loss will be much higher than before\n", - "# due to the regularization component.\n", - "model.fit(x_train, y_train, batch_size=64, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that when you pass losses via `add_loss()`, it becomes possible to call\n", - "`compile()` without a loss function, since the model already has a loss to minimize.\n", - "\n", - "Consider the following `LogisticEndpoint` layer: it takes as inputs\n", - "targets & logits, and it tracks a crossentropy loss via `add_loss()`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class LogisticEndpoint(keras.layers.Layer):\n", - " def __init__(self, name=None):\n", - " super().__init__(name=name)\n", - " self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", - "\n", - " def call(self, targets, logits, sample_weights=None):\n", - " # Compute the training-time loss value and add it\n", - " # to the layer using `self.add_loss()`.\n", - " loss = self.loss_fn(targets, logits, sample_weights)\n", - " self.add_loss(loss)\n", - "\n", - " # Return the inference-time prediction tensor (for `.predict()`).\n", - " return ops.softmax(logits)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can use it in a model with two inputs (input data & targets), compiled without a\n", - "`loss` argument, like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(3,), name=\"inputs\")\n", - "targets = keras.Input(shape=(10,), name=\"targets\")\n", - "logits = keras.layers.Dense(10)(inputs)\n", - "predictions = LogisticEndpoint(name=\"predictions\")(targets, logits)\n", - "\n", - "model = keras.Model(inputs=[inputs, targets], outputs=predictions)\n", - "model.compile(optimizer=\"adam\") # No loss argument!\n", - "\n", - "data = {\n", - " \"inputs\": np.random.random((3, 3)),\n", - " \"targets\": np.random.random((3, 10)),\n", - "}\n", - "model.fit(data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For more information about training multi-input models, see the section **Passing data\n", - "to multi-input, multi-output models**." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Automatically setting apart a validation holdout set\n", - "\n", - "In the first end-to-end example you saw, we used the `validation_data` argument to pass\n", - "a tuple of NumPy arrays `(x_val, y_val)` to the model for evaluating a validation loss\n", - "and validation metrics at the end of each epoch.\n", - "\n", - "Here's another option: the argument `validation_split` allows you to automatically\n", - "reserve part of your training data for validation. The argument value represents the\n", - "fraction of the data to be reserved for validation, so it should be set to a number\n", - "higher than 0 and lower than 1. For instance, `validation_split=0.2` means \"use 20% of\n", - "the data for validation\", and `validation_split=0.6` means \"use 60% of the data for\n", - "validation\".\n", - "\n", - "The way the validation is computed is by taking the last x% samples of the arrays\n", - "received by the `fit()` call, before any shuffling.\n", - "\n", - "Note that you can only use `validation_split` when training with NumPy data." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "model.fit(x_train, y_train, batch_size=64, validation_split=0.2, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Training & evaluation using `tf.data` Datasets\n", - "\n", - "In the past few paragraphs, you've seen how to handle losses, metrics, and optimizers,\n", - "and you've seen how to use the `validation_data` and `validation_split` arguments in\n", - "`fit()`, when your data is passed as NumPy arrays.\n", - "\n", - "Another option is to use an iterator-like, such as a `tf.data.Dataset`, a\n", - "PyTorch `DataLoader`, or a Keras `PyDataset`. Let's take look at the former.\n", - "\n", - "The `tf.data` API is a set of utilities in TensorFlow 2.0 for loading and preprocessing\n", - "data in a way that's fast and scalable. For a complete guide about creating `Datasets`,\n", - "see the [tf.data documentation](https://www.tensorflow.org/guide/data).\n", - "\n", - "**You can use `tf.data` to train your Keras\n", - "models regardless of the backend you're using --\n", - "whether it's JAX, PyTorch, or TensorFlow.**\n", - "You can pass a `Dataset` instance directly to the methods `fit()`, `evaluate()`, and\n", - "`predict()`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "# First, let's create a training Dataset instance.\n", - "# For the sake of our example, we'll use the same MNIST data as before.\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "# Shuffle and slice the dataset.\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "# Now we get a test dataset.\n", - "test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", - "test_dataset = test_dataset.batch(64)\n", - "\n", - "# Since the dataset already takes care of batching,\n", - "# we don't pass a `batch_size` argument.\n", - "model.fit(train_dataset, epochs=3)\n", - "\n", - "# You can also evaluate or predict on a dataset.\n", - "print(\"Evaluate\")\n", - "result = model.evaluate(test_dataset)\n", - "dict(zip(model.metrics_names, result))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that the Dataset is reset at the end of each epoch, so it can be reused of the\n", - "next epoch.\n", - "\n", - "If you want to run training only on a specific number of batches from this Dataset, you\n", - "can pass the `steps_per_epoch` argument, which specifies how many training steps the\n", - "model should run using this Dataset before moving on to the next epoch." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "# Prepare the training dataset\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "# Only use the 100 batches per epoch (that's 64 * 100 samples)\n", - "model.fit(train_dataset, epochs=3, steps_per_epoch=100)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can also pass a `Dataset` instance as the `validation_data` argument in `fit()`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "# Prepare the training dataset\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "# Prepare the validation dataset\n", - "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", - "val_dataset = val_dataset.batch(64)\n", - "\n", - "model.fit(train_dataset, epochs=1, validation_data=val_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "At the end of each epoch, the model will iterate over the validation dataset and\n", - "compute the validation loss and validation metrics.\n", - "\n", - "If you want to run validation only on a specific number of batches from this dataset,\n", - "you can pass the `validation_steps` argument, which specifies how many validation\n", - "steps the model should run with the validation dataset before interrupting validation\n", - "and moving on to the next epoch:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "# Prepare the training dataset\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "# Prepare the validation dataset\n", - "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", - "val_dataset = val_dataset.batch(64)\n", - "\n", - "model.fit(\n", - " train_dataset,\n", - " epochs=1,\n", - " # Only run validation using the first 10 batches of the dataset\n", - " # using the `validation_steps` argument\n", - " validation_data=val_dataset,\n", - " validation_steps=10,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note that the validation dataset will be reset after each use (so that you will always\n", - "be evaluating on the same samples from epoch to epoch).\n", - "\n", - "The argument `validation_split` (generating a holdout set from the training data) is\n", - "not supported when training from `Dataset` objects, since this feature requires the\n", - "ability to index the samples of the datasets, which is not possible in general with\n", - "the `Dataset` API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Training & evaluation using `PyDataset` instances\n", - "\n", - "`keras.utils.PyDataset` is a utility that you can subclass to obtain\n", - "a Python generator with two important properties:\n", - "\n", - "- It works well with multiprocessing.\n", - "- It can be shuffled (e.g. when passing `shuffle=True` in `fit()`).\n", - "\n", - "A `PyDataset` must implement two methods:\n", - "\n", - "- `__getitem__`\n", - "- `__len__`\n", - "\n", - "The method `__getitem__` should return a complete batch.\n", - "If you want to modify your dataset between epochs, you may implement `on_epoch_end`.\n", - "\n", - "Here's a quick example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ExamplePyDataset(keras.utils.PyDataset):\n", - " def __init__(self, x, y, batch_size, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.x = x\n", - " self.y = y\n", - " self.batch_size = batch_size\n", - "\n", - " def __len__(self):\n", - " return int(np.ceil(len(self.x) / float(self.batch_size)))\n", - "\n", - " def __getitem__(self, idx):\n", - " batch_x = self.x[idx * self.batch_size : (idx + 1) * self.batch_size]\n", - " batch_y = self.y[idx * self.batch_size : (idx + 1) * self.batch_size]\n", - " return batch_x, batch_y\n", - "\n", - "\n", - "train_py_dataset = ExamplePyDataset(x_train, y_train, batch_size=32)\n", - "val_py_dataset = ExamplePyDataset(x_val, y_val, batch_size=32)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "To fit the model, pass the dataset instead as the `x` argument (no need for a `y`\n", - "argument since the dataset includes the targets), and pass the validation dataset\n", - "as the `validation_data` argument. And no need for the `batch_size` argument, since\n", - "the dataset is already batched!" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "model.fit(train_py_dataset, batch_size=64, validation_data=val_py_dataset, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Evaluating the model is just as easy:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.evaluate(val_py_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Importantly, `PyDataset` objects support three common constructor arguments\n", - "that handle the parallel processing configuration:\n", - "\n", - "- `workers`: Number of workers to use in multithreading or\n", - " multiprocessing. Typically, you'd set it to the number of\n", - " cores on your CPU.\n", - "- `use_multiprocessing`: Whether to use Python multiprocessing for\n", - " parallelism. Setting this to `True` means that your\n", - " dataset will be replicated in multiple forked processes.\n", - " This is necessary to gain compute-level (rather than I/O level)\n", - " benefits from parallelism. However it can only be set to\n", - " `True` if your dataset can be safely pickled.\n", - "- `max_queue_size`: Maximum number of batches to keep in the queue\n", - " when iterating over the dataset in a multithreaded or\n", - " multipricessed setting.\n", - " You can reduce this value to reduce the CPU memory consumption of\n", - " your dataset. It defaults to 10.\n", - "\n", - "By default, multiprocessing is disabled (`use_multiprocessing=False`) and only\n", - "one thread is used. You should make sure to only turn on `use_multiprocessing` if\n", - "your code is running inside a Python `if __name__ == \"__main__\":` block in order\n", - "to avoid issues.\n", - "\n", - "Here's a 4-thread, non-multiprocessed example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "train_py_dataset = ExamplePyDataset(x_train, y_train, batch_size=32, workers=4)\n", - "val_py_dataset = ExamplePyDataset(x_val, y_val, batch_size=32, workers=4)\n", - "\n", - "model = get_compiled_model()\n", - "model.fit(train_py_dataset, batch_size=64, validation_data=val_py_dataset, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Training & evaluation using PyTorch `DataLoader` objects\n", - "\n", - "All built-in training and evaluation APIs are also compatible with `torch.utils.data.Dataset` and\n", - "`torch.utils.data.DataLoader` objects -- regardless of whether you're using the PyTorch backend,\n", - "or the JAX or TensorFlow backends. Let's take a look at a simple example.\n", - "\n", - "Unlike `PyDataset` which are batch-centric, PyTorch `Dataset` objects are sample-centric:\n", - "the `__len__` method returns the number of samples,\n", - "and the `__getitem__` method returns a specific sample." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ExampleTorchDataset(torch.utils.data.Dataset):\n", - " def __init__(self, x, y):\n", - " self.x = x\n", - " self.y = y\n", - "\n", - " def __len__(self):\n", - " return len(self.x)\n", - "\n", - " def __getitem__(self, idx):\n", - " return self.x[idx], self.y[idx]\n", - "\n", - "\n", - "train_torch_dataset = ExampleTorchDataset(x_train, y_train)\n", - "val_torch_dataset = ExampleTorchDataset(x_val, y_val)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "To use a PyTorch Dataset, you need to wrap it into a `Dataloader` which takes care\n", - "of batching and shuffling:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "train_dataloader = torch.utils.data.DataLoader(\n", - " train_torch_dataset, batch_size=32, shuffle=True\n", - ")\n", - "val_dataloader = torch.utils.data.DataLoader(\n", - " val_torch_dataset, batch_size=32, shuffle=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now you can use them in the Keras API just like any other iterator:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "model.fit(train_dataloader, batch_size=64, validation_data=val_dataloader, epochs=1)\n", - "model.evaluate(val_dataloader)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Using sample weighting and class weighting\n", - "\n", - "With the default settings the weight of a sample is decided by its frequency\n", - "in the dataset. There are two methods to weight the data, independent of\n", - "sample frequency:\n", - "\n", - "* Class weights\n", - "* Sample weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Class weights\n", - "\n", - "This is set by passing a dictionary to the `class_weight` argument to\n", - "`Model.fit()`. This dictionary maps class indices to the weight that should\n", - "be used for samples belonging to this class.\n", - "\n", - "This can be used to balance classes without resampling, or to train a\n", - "model that gives more importance to a particular class.\n", - "\n", - "For instance, if class \"0\" is half as represented as class \"1\" in your data,\n", - "you could use `Model.fit(..., class_weight={0: 1., 1: 0.5})`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's a NumPy example where we use class weights or sample weights to\n", - "give more importance to the correct classification of class #5 (which\n", - "is the digit \"5\" in the MNIST dataset)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "class_weight = {\n", - " 0: 1.0,\n", - " 1: 1.0,\n", - " 2: 1.0,\n", - " 3: 1.0,\n", - " 4: 1.0,\n", - " # Set weight \"2\" for class \"5\",\n", - " # making this class 2x more important\n", - " 5: 2.0,\n", - " 6: 1.0,\n", - " 7: 1.0,\n", - " 8: 1.0,\n", - " 9: 1.0,\n", - "}\n", - "\n", - "print(\"Fit with class weight\")\n", - "model = get_compiled_model()\n", - "model.fit(x_train, y_train, class_weight=class_weight, batch_size=64, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Sample weights\n", - "\n", - "For fine grained control, or if you are not building a classifier,\n", - "you can use \"sample weights\".\n", - "\n", - "- When training from NumPy data: Pass the `sample_weight`\n", - " argument to `Model.fit()`.\n", - "- When training from `tf.data` or any other sort of iterator:\n", - " Yield `(input_batch, label_batch, sample_weight_batch)` tuples.\n", - "\n", - "A \"sample weights\" array is an array of numbers that specify how much weight\n", - "each sample in a batch should have in computing the total loss. It is commonly\n", - "used in imbalanced classification problems (the idea being to give more weight\n", - "to rarely-seen classes).\n", - "\n", - "When the weights used are ones and zeros, the array can be used as a *mask* for\n", - "the loss function (entirely discarding the contribution of certain samples to\n", - "the total loss)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "sample_weight = np.ones(shape=(len(y_train),))\n", - "sample_weight[y_train == 5] = 2.0\n", - "\n", - "print(\"Fit with sample weight\")\n", - "model = get_compiled_model()\n", - "model.fit(x_train, y_train, sample_weight=sample_weight, batch_size=64, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's a matching `Dataset` example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "sample_weight = np.ones(shape=(len(y_train),))\n", - "sample_weight[y_train == 5] = 2.0\n", - "\n", - "# Create a Dataset that includes sample weights\n", - "# (3rd element in the return tuple).\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train, sample_weight))\n", - "\n", - "# Shuffle and slice the dataset.\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "model = get_compiled_model()\n", - "model.fit(train_dataset, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Passing data to multi-input, multi-output models\n", - "\n", - "In the previous examples, we were considering a model with a single input (a tensor of\n", - "shape `(764,)`) and a single output (a prediction tensor of shape `(10,)`). But what\n", - "about models that have multiple inputs or outputs?\n", - "\n", - "Consider the following model, which has an image input of shape `(32, 32, 3)` (that's\n", - "`(height, width, channels)`) and a time series input of shape `(None, 10)` (that's\n", - "`(timesteps, features)`). Our model will have two outputs computed from the\n", - "combination of these inputs: a \"score\" (of shape `(1,)`) and a probability\n", - "distribution over five classes (of shape `(5,)`)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "image_input = keras.Input(shape=(32, 32, 3), name=\"img_input\")\n", - "timeseries_input = keras.Input(shape=(None, 10), name=\"ts_input\")\n", - "\n", - "x1 = layers.Conv2D(3, 3)(image_input)\n", - "x1 = layers.GlobalMaxPooling2D()(x1)\n", - "\n", - "x2 = layers.Conv1D(3, 3)(timeseries_input)\n", - "x2 = layers.GlobalMaxPooling1D()(x2)\n", - "\n", - "x = layers.concatenate([x1, x2])\n", - "\n", - "score_output = layers.Dense(1, name=\"score_output\")(x)\n", - "class_output = layers.Dense(5, name=\"class_output\")(x)\n", - "\n", - "model = keras.Model(\n", - " inputs=[image_input, timeseries_input], outputs=[score_output, class_output]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's plot this model, so you can clearly see what we're doing here (note that the\n", - "shapes shown in the plot are batch shapes, rather than per-sample shapes)." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.utils.plot_model(model, \"multi_input_and_output_model.png\", show_shapes=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "At compilation time, we can specify different losses to different outputs, by passing\n", - "the loss functions as a list:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=[\n", - " keras.losses.MeanSquaredError(),\n", - " keras.losses.CategoricalCrossentropy(),\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "If we only passed a single loss function to the model, the same loss function would be\n", - "applied to every output (which is not appropriate here).\n", - "\n", - "Likewise for metrics:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=[\n", - " keras.losses.MeanSquaredError(),\n", - " keras.losses.CategoricalCrossentropy(),\n", - " ],\n", - " metrics=[\n", - " [\n", - " keras.metrics.MeanAbsolutePercentageError(),\n", - " keras.metrics.MeanAbsoluteError(),\n", - " ],\n", - " [keras.metrics.CategoricalAccuracy()],\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Since we gave names to our output layers, we could also specify per-output losses and\n", - "metrics via a dict:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss={\n", - " \"score_output\": keras.losses.MeanSquaredError(),\n", - " \"class_output\": keras.losses.CategoricalCrossentropy(),\n", - " },\n", - " metrics={\n", - " \"score_output\": [\n", - " keras.metrics.MeanAbsolutePercentageError(),\n", - " keras.metrics.MeanAbsoluteError(),\n", - " ],\n", - " \"class_output\": [keras.metrics.CategoricalAccuracy()],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We recommend the use of explicit names and dicts if you have more than 2 outputs.\n", - "\n", - "It's possible to give different weights to different output-specific losses (for\n", - "instance, one might wish to privilege the \"score\" loss in our example, by giving to 2x\n", - "the importance of the class loss), using the `loss_weights` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss={\n", - " \"score_output\": keras.losses.MeanSquaredError(),\n", - " \"class_output\": keras.losses.CategoricalCrossentropy(),\n", - " },\n", - " metrics={\n", - " \"score_output\": [\n", - " keras.metrics.MeanAbsolutePercentageError(),\n", - " keras.metrics.MeanAbsoluteError(),\n", - " ],\n", - " \"class_output\": [keras.metrics.CategoricalAccuracy()],\n", - " },\n", - " loss_weights={\"score_output\": 2.0, \"class_output\": 1.0},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You could also choose not to compute a loss for certain outputs, if these outputs are\n", - "meant for prediction but not for training:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# List loss version\n", - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=[None, keras.losses.CategoricalCrossentropy()],\n", - ")\n", - "\n", - "# Or dict loss version\n", - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss={\"class_output\": keras.losses.CategoricalCrossentropy()},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Passing data to a multi-input or multi-output model in `fit()` works in a similar way as\n", - "specifying a loss function in compile: you can pass **lists of NumPy arrays** (with\n", - "1:1 mapping to the outputs that received a loss function) or **dicts mapping output\n", - "names to NumPy arrays**." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.RMSprop(1e-3),\n", - " loss=[\n", - " keras.losses.MeanSquaredError(),\n", - " keras.losses.CategoricalCrossentropy(),\n", - " ],\n", - ")\n", - "\n", - "# Generate dummy NumPy data\n", - "img_data = np.random.random_sample(size=(100, 32, 32, 3))\n", - "ts_data = np.random.random_sample(size=(100, 20, 10))\n", - "score_targets = np.random.random_sample(size=(100, 1))\n", - "class_targets = np.random.random_sample(size=(100, 5))\n", - "\n", - "# Fit on lists\n", - "model.fit([img_data, ts_data], [score_targets, class_targets], batch_size=32, epochs=1)\n", - "\n", - "# Alternatively, fit on dicts\n", - "model.fit(\n", - " {\"img_input\": img_data, \"ts_input\": ts_data},\n", - " {\"score_output\": score_targets, \"class_output\": class_targets},\n", - " batch_size=32,\n", - " epochs=1,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's the `Dataset` use case: similarly as what we did for NumPy arrays, the `Dataset`\n", - "should return a tuple of dicts." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "train_dataset = tf.data.Dataset.from_tensor_slices(\n", - " (\n", - " {\"img_input\": img_data, \"ts_input\": ts_data},\n", - " {\"score_output\": score_targets, \"class_output\": class_targets},\n", - " )\n", - ")\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", - "\n", - "model.fit(train_dataset, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Using callbacks\n", - "\n", - "Callbacks in Keras are objects that are called at different points during training (at\n", - "the start of an epoch, at the end of a batch, at the end of an epoch, etc.). They\n", - "can be used to implement certain behaviors, such as:\n", - "\n", - "- Doing validation at different points during training (beyond the built-in per-epoch\n", - "validation)\n", - "- Checkpointing the model at regular intervals or when it exceeds a certain accuracy\n", - "threshold\n", - "- Changing the learning rate of the model when training seems to be plateauing\n", - "- Doing fine-tuning of the top layers when training seems to be plateauing\n", - "- Sending email or instant message notifications when training ends or where a certain\n", - "performance threshold is exceeded\n", - "- Etc.\n", - "\n", - "Callbacks can be passed as a list to your call to `fit()`:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "callbacks = [\n", - " keras.callbacks.EarlyStopping(\n", - " # Stop training when `val_loss` is no longer improving\n", - " monitor=\"val_loss\",\n", - " # \"no longer improving\" being defined as \"no better than 1e-2 less\"\n", - " min_delta=1e-2,\n", - " # \"no longer improving\" being further defined as \"for at least 2 epochs\"\n", - " patience=2,\n", - " verbose=1,\n", - " )\n", - "]\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " epochs=20,\n", - " batch_size=64,\n", - " callbacks=callbacks,\n", - " validation_split=0.2,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Many built-in callbacks are available\n", - "\n", - "There are many built-in callbacks already available in Keras, such as:\n", - "\n", - "- `ModelCheckpoint`: Periodically save the model.\n", - "- `EarlyStopping`: Stop training when training is no longer improving the validation\n", - "metrics.\n", - "- `TensorBoard`: periodically write model logs that can be visualized in\n", - "[TensorBoard](https://www.tensorflow.org/tensorboard) (more details in the section\n", - "\"Visualization\").\n", - "- `CSVLogger`: streams loss and metrics data to a CSV file.\n", - "- etc.\n", - "\n", - "See the [callbacks documentation](/api/callbacks/) for the complete list.\n", - "\n", - "### Writing your own callback\n", - "\n", - "You can create a custom callback by extending the base class\n", - "`keras.callbacks.Callback`. A callback has access to its associated model through the\n", - "class property `self.model`.\n", - "\n", - "Make sure to read the\n", - "[complete guide to writing custom callbacks](/guides/writing_your_own_callbacks/).\n", - "\n", - "Here's a simple example saving a list of per-batch loss values during training:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class LossHistory(keras.callbacks.Callback):\n", - " def on_train_begin(self, logs):\n", - " self.per_batch_losses = []\n", - "\n", - " def on_batch_end(self, batch, logs):\n", - " self.per_batch_losses.append(logs.get(\"loss\"))\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Checkpointing models\n", - "\n", - "When you're training model on relatively large datasets, it's crucial to save\n", - "checkpoints of your model at frequent intervals.\n", - "\n", - "The easiest way to achieve this is with the `ModelCheckpoint` callback:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_compiled_model()\n", - "\n", - "callbacks = [\n", - " keras.callbacks.ModelCheckpoint(\n", - " # Path where to save the model\n", - " # The two parameters below mean that we will overwrite\n", - " # the current checkpoint if and only if\n", - " # the `val_loss` score has improved.\n", - " # The saved model name will include the current epoch.\n", - " filepath=\"mymodel_{epoch}.keras\",\n", - " save_best_only=True, # Only save a model if `val_loss` has improved.\n", - " monitor=\"val_loss\",\n", - " verbose=1,\n", - " )\n", - "]\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " epochs=2,\n", - " batch_size=64,\n", - " callbacks=callbacks,\n", - " validation_split=0.2,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "The `ModelCheckpoint` callback can be used to implement fault-tolerance:\n", - "the ability to restart training from the last saved state of the model in case training\n", - "gets randomly interrupted. Here's a basic example:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Prepare a directory to store all the checkpoints.\n", - "checkpoint_dir = \"./ckpt\"\n", - "if not os.path.exists(checkpoint_dir):\n", - " os.makedirs(checkpoint_dir)\n", - "\n", - "\n", - "def make_or_restore_model():\n", - " # Either restore the latest model, or create a fresh one\n", - " # if there is no checkpoint available.\n", - " checkpoints = [checkpoint_dir + \"/\" + name for name in os.listdir(checkpoint_dir)]\n", - " if checkpoints:\n", - " latest_checkpoint = max(checkpoints, key=os.path.getctime)\n", - " print(\"Restoring from\", latest_checkpoint)\n", - " return keras.models.load_model(latest_checkpoint)\n", - " print(\"Creating a new model\")\n", - " return get_compiled_model()\n", - "\n", - "\n", - "model = make_or_restore_model()\n", - "callbacks = [\n", - " # This callback saves the model every 100 batches.\n", - " # We include the training loss in the saved model name.\n", - " keras.callbacks.ModelCheckpoint(\n", - " filepath=checkpoint_dir + \"/model-loss={loss:.2f}.keras\", save_freq=100\n", - " )\n", - "]\n", - "model.fit(x_train, y_train, epochs=1, callbacks=callbacks)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You call also write your own callback for saving and restoring models.\n", - "\n", - "For a complete guide on serialization and saving, see the\n", - "[guide to saving and serializing Models](/guides/serialization_and_saving/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Using learning rate schedules\n", - "\n", - "A common pattern when training deep learning models is to gradually reduce the learning\n", - "as training progresses. This is generally known as \"learning rate decay\".\n", - "\n", - "The learning decay schedule could be static (fixed in advance, as a function of the\n", - "current epoch or the current batch index), or dynamic (responding to the current\n", - "behavior of the model, in particular the validation loss).\n", - "\n", - "### Passing a schedule to an optimizer\n", - "\n", - "You can easily use a static learning rate decay schedule by passing a schedule object\n", - "as the `learning_rate` argument in your optimizer:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "initial_learning_rate = 0.1\n", - "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n", - " initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True\n", - ")\n", - "\n", - "optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Several built-in schedules are available: `ExponentialDecay`, `PiecewiseConstantDecay`,\n", - "`PolynomialDecay`, and `InverseTimeDecay`.\n", - "\n", - "### Using callbacks to implement a dynamic learning rate schedule\n", - "\n", - "A dynamic learning rate schedule (for instance, decreasing the learning rate when the\n", - "validation loss is no longer improving) cannot be achieved with these schedule objects,\n", - "since the optimizer does not have access to validation metrics.\n", - "\n", - "However, callbacks do have access to all metrics, including validation metrics! You can\n", - "thus achieve this pattern by using a callback that modifies the current learning rate\n", - "on the optimizer. In fact, this is even built-in as the `ReduceLROnPlateau` callback." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Visualizing loss and metrics during training with TensorBoard\n", - "\n", - "The best way to keep an eye on your model during training is to use\n", - "[TensorBoard](https://www.tensorflow.org/tensorboard) -- a browser-based application\n", - "that you can run locally that provides you with:\n", - "\n", - "- Live plots of the loss and metrics for training and evaluation\n", - "- (optionally) Visualizations of the histograms of your layer activations\n", - "- (optionally) 3D visualizations of the embedding spaces learned by your `Embedding`\n", - "layers\n", - "\n", - "If you have installed TensorFlow with pip, you should be able to launch TensorBoard\n", - "from the command line:\n", - "\n", - "```\n", - "tensorboard --logdir=/full_path_to_your_logs\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Using the TensorBoard callback\n", - "\n", - "The easiest way to use TensorBoard with a Keras model and the `fit()` method is the\n", - "`TensorBoard` callback.\n", - "\n", - "In the simplest case, just specify where you want the callback to write logs, and\n", - "you're good to go:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "keras.callbacks.TensorBoard(\n", - " log_dir=\"/full_path_to_your_logs\",\n", - " histogram_freq=0, # How often to log histogram visualizations\n", - " embeddings_freq=0, # How often to log embedding visualizations\n", - " update_freq=\"epoch\",\n", - ") # How often to write logs (default: once per epoch)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "For more information, see the\n", - "[documentation for the `TensorBoard` callback](https://keras.io/api/callbacks/tensorboard/)." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "training_with_built_in_methods", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/transfer_learning.ipynb b/guides/ipynb/keras_core/transfer_learning.ipynb deleted file mode 100644 index 33f499c11a..0000000000 --- a/guides/ipynb/keras_core/transfer_learning.ipynb +++ /dev/null @@ -1,843 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Transfer learning & fine-tuning\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2020/04/15
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to transfer learning & fine-tuning in Keras." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras\n", - "from keras import layers\n", - "import tensorflow_datasets as tfds\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "**Transfer learning** consists of taking features learned on one problem, and\n", - "leveraging them on a new, similar problem. For instance, features from a model that has\n", - "learned to identify racoons may be useful to kick-start a model meant to identify\n", - " tanukis.\n", - "\n", - "Transfer learning is usually done for tasks where your dataset has too little data to\n", - " train a full-scale model from scratch.\n", - "\n", - "The most common incarnation of transfer learning in the context of deep learning is the\n", - " following workflow:\n", - "\n", - "1. Take layers from a previously trained model.\n", - "2. Freeze them, so as to avoid destroying any of the information they contain during\n", - " future training rounds.\n", - "3. Add some new, trainable layers on top of the frozen layers. They will learn to turn\n", - " the old features into predictions on a new dataset.\n", - "4. Train the new layers on your dataset.\n", - "\n", - "A last, optional step, is **fine-tuning**, which consists of unfreezing the entire\n", - "model you obtained above (or part of it), and re-training it on the new data with a\n", - "very low learning rate. This can potentially achieve meaningful improvements, by\n", - " incrementally adapting the pretrained features to the new data.\n", - "\n", - "First, we will go over the Keras `trainable` API in detail, which underlies most\n", - " transfer learning & fine-tuning workflows.\n", - "\n", - "Then, we'll demonstrate the typical workflow by taking a model pretrained on the\n", - "ImageNet dataset, and retraining it on the Kaggle \"cats vs dogs\" classification\n", - " dataset.\n", - "\n", - "This is adapted from\n", - "[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python)\n", - "and the 2016 blog post\n", - "[\"building powerful image classification models using very little data\"](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Freezing layers: understanding the `trainable` attribute\n", - "\n", - "Layers & models have three weight attributes:\n", - "\n", - "- `weights` is the list of all weights variables of the layer.\n", - "- `trainable_weights` is the list of those that are meant to be updated (via gradient\n", - " descent) to minimize the loss during training.\n", - "- `non_trainable_weights` is the list of those that aren't meant to be trained.\n", - " Typically they are updated by the model during the forward pass.\n", - "\n", - "**Example: the `Dense` layer has 2 trainable weights (kernel & bias)**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(3)\n", - "layer.build((None, 4)) # Create the weights\n", - "\n", - "print(\"weights:\", len(layer.weights))\n", - "print(\"trainable_weights:\", len(layer.trainable_weights))\n", - "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "In general, all weights are trainable weights. The only built-in layer that has\n", - "non-trainable weights is the `BatchNormalization` layer. It uses non-trainable weights\n", - " to keep track of the mean and variance of its inputs during training.\n", - "To learn how to use non-trainable weights in your own custom layers, see the\n", - "[guide to writing new layers from scratch](/guides/making_new_layers_and_models_via_subclassing/).\n", - "\n", - "**Example: the `BatchNormalization` layer has 2 trainable weights and 2 non-trainable\n", - " weights**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = keras.layers.BatchNormalization()\n", - "layer.build((None, 4)) # Create the weights\n", - "\n", - "print(\"weights:\", len(layer.weights))\n", - "print(\"trainable_weights:\", len(layer.trainable_weights))\n", - "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Layers & models also feature a boolean attribute `trainable`. Its value can be changed.\n", - "Setting `layer.trainable` to `False` moves all the layer's weights from trainable to\n", - "non-trainable. This is called \"freezing\" the layer: the state of a frozen layer won't\n", - "be updated during training (either when training with `fit()` or when training with\n", - " any custom loop that relies on `trainable_weights` to apply gradient updates).\n", - "\n", - "**Example: setting `trainable` to `False`**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(3)\n", - "layer.build((None, 4)) # Create the weights\n", - "layer.trainable = False # Freeze the layer\n", - "\n", - "print(\"weights:\", len(layer.weights))\n", - "print(\"trainable_weights:\", len(layer.trainable_weights))\n", - "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "When a trainable weight becomes non-trainable, its value is no longer updated during\n", - " training." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Make a model with 2 layers\n", - "layer1 = keras.layers.Dense(3, activation=\"relu\")\n", - "layer2 = keras.layers.Dense(3, activation=\"sigmoid\")\n", - "model = keras.Sequential([keras.Input(shape=(3,)), layer1, layer2])\n", - "\n", - "# Freeze the first layer\n", - "layer1.trainable = False\n", - "\n", - "# Keep a copy of the weights of layer1 for later reference\n", - "initial_layer1_weights_values = layer1.get_weights()\n", - "\n", - "# Train the model\n", - "model.compile(optimizer=\"adam\", loss=\"mse\")\n", - "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))\n", - "\n", - "# Check that the weights of layer1 have not changed during training\n", - "final_layer1_weights_values = layer1.get_weights()\n", - "np.testing.assert_allclose(\n", - " initial_layer1_weights_values[0], final_layer1_weights_values[0]\n", - ")\n", - "np.testing.assert_allclose(\n", - " initial_layer1_weights_values[1], final_layer1_weights_values[1]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Do not confuse the `layer.trainable` attribute with the argument `training` in\n", - "`layer.__call__()` (which controls whether the layer should run its forward pass in\n", - " inference mode or training mode). For more information, see the\n", - "[Keras FAQ](\n", - " https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Recursive setting of the `trainable` attribute\n", - "\n", - "If you set `trainable = False` on a model or on any layer that has sublayers,\n", - "all children layers become non-trainable as well.\n", - "\n", - "**Example:**" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inner_model = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(3,)),\n", - " keras.layers.Dense(3, activation=\"relu\"),\n", - " keras.layers.Dense(3, activation=\"relu\"),\n", - " ]\n", - ")\n", - "\n", - "model = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(3,)),\n", - " inner_model,\n", - " keras.layers.Dense(3, activation=\"sigmoid\"),\n", - " ]\n", - ")\n", - "\n", - "model.trainable = False # Freeze the outer model\n", - "\n", - "assert inner_model.trainable == False # All layers in `model` are now frozen\n", - "assert inner_model.layers[0].trainable == False # `trainable` is propagated recursively" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## The typical transfer-learning workflow\n", - "\n", - "This leads us to how a typical transfer learning workflow can be implemented in Keras:\n", - "\n", - "1. Instantiate a base model and load pre-trained weights into it.\n", - "2. Freeze all layers in the base model by setting `trainable = False`.\n", - "3. Create a new model on top of the output of one (or several) layers from the base\n", - " model.\n", - "4. Train your new model on your new dataset.\n", - "\n", - "Note that an alternative, more lightweight workflow could also be:\n", - "\n", - "1. Instantiate a base model and load pre-trained weights into it.\n", - "2. Run your new dataset through it and record the output of one (or several) layers\n", - " from the base model. This is called **feature extraction**.\n", - "3. Use that output as input data for a new, smaller model.\n", - "\n", - "A key advantage of that second workflow is that you only run the base model once on\n", - " your data, rather than once per epoch of training. So it's a lot faster & cheaper.\n", - "\n", - "An issue with that second workflow, though, is that it doesn't allow you to dynamically\n", - "modify the input data of your new model during training, which is required when doing\n", - "data augmentation, for instance. Transfer learning is typically used for tasks when\n", - "your new dataset has too little data to train a full-scale model from scratch, and in\n", - "such scenarios data augmentation is very important. So in what follows, we will focus\n", - " on the first workflow.\n", - "\n", - "Here's what the first workflow looks like in Keras:\n", - "\n", - "First, instantiate a base model with pre-trained weights.\n", - "\n", - "```python\n", - "base_model = keras.applications.Xception(\n", - " weights='imagenet', # Load weights pre-trained on ImageNet.\n", - " input_shape=(150, 150, 3),\n", - " include_top=False) # Do not include the ImageNet classifier at the top.\n", - "```\n", - "\n", - "Then, freeze the base model.\n", - "\n", - "```python\n", - "base_model.trainable = False\n", - "```\n", - "\n", - "Create a new model on top.\n", - "\n", - "```python\n", - "inputs = keras.Input(shape=(150, 150, 3))\n", - "# We make sure that the base_model is running in inference mode here,\n", - "# by passing `training=False`. This is important for fine-tuning, as you will\n", - "# learn in a few paragraphs.\n", - "x = base_model(inputs, training=False)\n", - "# Convert features of shape `base_model.output_shape[1:]` to vectors\n", - "x = keras.layers.GlobalAveragePooling2D()(x)\n", - "# A Dense classifier with a single unit (binary classification)\n", - "outputs = keras.layers.Dense(1)(x)\n", - "model = keras.Model(inputs, outputs)\n", - "```\n", - "\n", - "Train the model on new data.\n", - "\n", - "```python\n", - "model.compile(optimizer=keras.optimizers.Adam(),\n", - " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=[keras.metrics.BinaryAccuracy()])\n", - "model.fit(new_dataset, epochs=20, callbacks=..., validation_data=...)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Fine-tuning\n", - "\n", - "Once your model has converged on the new data, you can try to unfreeze all or part of\n", - " the base model and retrain the whole model end-to-end with a very low learning rate.\n", - "\n", - "This is an optional last step that can potentially give you incremental improvements.\n", - " It could also potentially lead to quick overfitting -- keep that in mind.\n", - "\n", - "It is critical to only do this step *after* the model with frozen layers has been\n", - "trained to convergence. If you mix randomly-initialized trainable layers with\n", - "trainable layers that hold pre-trained features, the randomly-initialized layers will\n", - "cause very large gradient updates during training, which will destroy your pre-trained\n", - " features.\n", - "\n", - "It's also critical to use a very low learning rate at this stage, because\n", - "you are training a much larger model than in the first round of training, on a dataset\n", - " that is typically very small.\n", - "As a result, you are at risk of overfitting very quickly if you apply large weight\n", - " updates. Here, you only want to readapt the pretrained weights in an incremental way.\n", - "\n", - "This is how to implement fine-tuning of the whole base model:\n", - "\n", - "```python\n", - "# Unfreeze the base model\n", - "base_model.trainable = True\n", - "\n", - "# It's important to recompile your model after you make any changes\n", - "# to the `trainable` attribute of any inner layer, so that your changes\n", - "# are take into account\n", - "model.compile(optimizer=keras.optimizers.Adam(1e-5), # Very low learning rate\n", - " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=[keras.metrics.BinaryAccuracy()])\n", - "\n", - "# Train end-to-end. Be careful to stop before you overfit!\n", - "model.fit(new_dataset, epochs=10, callbacks=..., validation_data=...)\n", - "```\n", - "\n", - "**Important note about `compile()` and `trainable`**\n", - "\n", - "Calling `compile()` on a model is meant to \"freeze\" the behavior of that model. This\n", - " implies that the `trainable`\n", - "attribute values at the time the model is compiled should be preserved throughout the\n", - " lifetime of that model,\n", - "until `compile` is called again. Hence, if you change any `trainable` value, make sure\n", - " to call `compile()` again on your\n", - "model for your changes to be taken into account.\n", - "\n", - "**Important notes about `BatchNormalization` layer**\n", - "\n", - "Many image models contain `BatchNormalization` layers. That layer is a special case on\n", - " every imaginable count. Here are a few things to keep in mind.\n", - "\n", - "- `BatchNormalization` contains 2 non-trainable weights that get updated during\n", - "training. These are the variables tracking the mean and variance of the inputs.\n", - "- When you set `bn_layer.trainable = False`, the `BatchNormalization` layer will\n", - "run in inference mode, and will not update its mean & variance statistics. This is not\n", - "the case for other layers in general, as\n", - "[weight trainability & inference/training modes are two orthogonal concepts](\n", - " https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute).\n", - "But the two are tied in the case of the `BatchNormalization` layer.\n", - "- When you unfreeze a model that contains `BatchNormalization` layers in order to do\n", - "fine-tuning, you should keep the `BatchNormalization` layers in inference mode by\n", - " passing `training=False` when calling the base model.\n", - "Otherwise the updates applied to the non-trainable weights will suddenly destroy\n", - "what the model has learned.\n", - "\n", - "You'll see this pattern in action in the end-to-end example at the end of this guide." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## An end-to-end example: fine-tuning an image classification model on a cats vs. dogs dataset\n", - "\n", - "To solidify these concepts, let's walk you through a concrete end-to-end transfer\n", - "learning & fine-tuning example. We will load the Xception model, pre-trained on\n", - " ImageNet, and use it on the Kaggle \"cats vs. dogs\" classification dataset." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Getting the data\n", - "\n", - "First, let's fetch the cats vs. dogs dataset using TFDS. If you have your own dataset,\n", - "you'll probably want to use the utility\n", - "`keras.utils.image_dataset_from_directory` to generate similar labeled\n", - " dataset objects from a set of images on disk filed into class-specific folders.\n", - "\n", - "Transfer learning is most useful when working with very small datasets. To keep our\n", - "dataset small, we will use 40% of the original training data (25,000 images) for\n", - " training, 10% for validation, and 10% for testing." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "tfds.disable_progress_bar()\n", - "\n", - "train_ds, validation_ds, test_ds = tfds.load(\n", - " \"cats_vs_dogs\",\n", - " # Reserve 10% for validation and 10% for test\n", - " split=[\"train[:40%]\", \"train[40%:50%]\", \"train[50%:60%]\"],\n", - " as_supervised=True, # Include labels\n", - ")\n", - "\n", - "print(f\"Number of training samples: {train_ds.cardinality()}\")\n", - "print(f\"Number of validation samples: {validation_ds.cardinality()}\")\n", - "print(f\"Number of test samples: {test_ds.cardinality()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "These are the first 9 images in the training dataset -- as you can see, they're all\n", - "different sizes." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "plt.figure(figsize=(10, 10))\n", - "for i, (image, label) in enumerate(train_ds.take(9)):\n", - " ax = plt.subplot(3, 3, i + 1)\n", - " plt.imshow(image)\n", - " plt.title(int(label))\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We can also see that label 1 is \"dog\" and label 0 is \"cat\"." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Standardizing the data\n", - "\n", - "Our raw images have a variety of sizes. In addition, each pixel consists of 3 integer\n", - "values between 0 and 255 (RGB level values). This isn't a great fit for feeding a\n", - "neural network. We need to do 2 things:\n", - "\n", - "- Standardize to a fixed image size. We pick 150x150.\n", - "- Normalize pixel values between -1 and 1. We'll do this using a `Normalization` layer as\n", - "part of the model itself.\n", - "\n", - "In general, it's a good practice to develop models that take raw data as input, as\n", - "opposed to models that take already-preprocessed data. The reason being that, if your\n", - "model expects preprocessed data, any time you export your model to use it elsewhere\n", - "(in a web browser, in a mobile app), you'll need to reimplement the exact same\n", - "preprocessing pipeline. This gets very tricky very quickly. So we should do the least\n", - " possible amount of preprocessing before hitting the model.\n", - "\n", - "Here, we'll do image resizing in the data pipeline (because a deep neural network can\n", - "only process contiguous batches of data), and we'll do the input value scaling as part\n", - "of the model, when we create it.\n", - "\n", - "Let's resize images to 150x150:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "resize_fn = keras.layers.Resizing(150, 150)\n", - "\n", - "train_ds = train_ds.map(lambda x, y: (resize_fn(x), y))\n", - "validation_ds = validation_ds.map(lambda x, y: (resize_fn(x), y))\n", - "test_ds = test_ds.map(lambda x, y: (resize_fn(x), y))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Using random data augmentation\n", - "\n", - "When you don't have a large image dataset, it's a good practice to artificially\n", - "introduce sample diversity by applying random yet realistic transformations to\n", - "the training images, such as random horizontal flipping or small random rotations. This\n", - "helps expose the model to different aspects of the training data while slowing down\n", - "overfitting." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "augmentation_layers = [\n", - " layers.RandomFlip(\"horizontal\"),\n", - " layers.RandomRotation(0.1),\n", - "]\n", - "\n", - "\n", - "def data_augmentation(x):\n", - " for layer in augmentation_layers:\n", - " x = layer(x)\n", - " return x\n", - "\n", - "\n", - "train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's batch the data and use prefetching to optimize loading speed." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "from tensorflow import data as tf_data\n", - "\n", - "batch_size = 64\n", - "\n", - "train_ds = train_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache()\n", - "validation_ds = validation_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache()\n", - "test_ds = test_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's visualize what the first image of the first batch looks like after various random\n", - " transformations:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "for images, labels in train_ds.take(1):\n", - " plt.figure(figsize=(10, 10))\n", - " first_image = images[0]\n", - " for i in range(9):\n", - " ax = plt.subplot(3, 3, i + 1)\n", - " augmented_image = data_augmentation(np.expand_dims(first_image, 0))\n", - " plt.imshow(np.array(augmented_image[0]).astype(\"int32\"))\n", - " plt.title(int(labels[0]))\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Build a model\n", - "\n", - "Now let's built a model that follows the blueprint we've explained earlier.\n", - "\n", - "Note that:\n", - "\n", - "- We add a `Rescaling` layer to scale input values (initially in the `[0, 255]`\n", - " range) to the `[-1, 1]` range.\n", - "- We add a `Dropout` layer before the classification layer, for regularization.\n", - "- We make sure to pass `training=False` when calling the base model, so that\n", - "it runs in inference mode, so that batchnorm statistics don't get updated\n", - "even after we unfreeze the base model for fine-tuning." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "base_model = keras.applications.Xception(\n", - " weights=\"imagenet\", # Load weights pre-trained on ImageNet.\n", - " input_shape=(150, 150, 3),\n", - " include_top=False,\n", - ") # Do not include the ImageNet classifier at the top.\n", - "\n", - "# Freeze the base_model\n", - "base_model.trainable = False\n", - "\n", - "# Create new model on top\n", - "inputs = keras.Input(shape=(150, 150, 3))\n", - "\n", - "# Pre-trained Xception weights requires that input be scaled\n", - "# from (0, 255) to a range of (-1., +1.), the rescaling layer\n", - "# outputs: `(inputs * scale) + offset`\n", - "scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)\n", - "x = scale_layer(inputs)\n", - "\n", - "# The base model contains batchnorm layers. We want to keep them in inference mode\n", - "# when we unfreeze the base model for fine-tuning, so we make sure that the\n", - "# base_model is running in inference mode here.\n", - "x = base_model(x, training=False)\n", - "x = keras.layers.GlobalAveragePooling2D()(x)\n", - "x = keras.layers.Dropout(0.2)(x) # Regularize with dropout\n", - "outputs = keras.layers.Dense(1)(x)\n", - "model = keras.Model(inputs, outputs)\n", - "\n", - "model.summary(show_trainable=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Train the top layer" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model.compile(\n", - " optimizer=keras.optimizers.Adam(),\n", - " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=[keras.metrics.BinaryAccuracy()],\n", - ")\n", - "\n", - "epochs = 2\n", - "print(\"Fitting the top layer of the model\")\n", - "model.fit(train_ds, epochs=epochs, validation_data=validation_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Do a round of fine-tuning of the entire model\n", - "\n", - "Finally, let's unfreeze the base model and train the entire model end-to-end with a low\n", - " learning rate.\n", - "\n", - "Importantly, although the base model becomes trainable, it is still running in\n", - "inference mode since we passed `training=False` when calling it when we built the\n", - "model. This means that the batch normalization layers inside won't update their batch\n", - "statistics. If they did, they would wreck havoc on the representations learned by the\n", - " model so far." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Unfreeze the base_model. Note that it keeps running in inference mode\n", - "# since we passed `training=False` when calling it. This means that\n", - "# the batchnorm layers will not update their batch statistics.\n", - "# This prevents the batchnorm layers from undoing all the training\n", - "# we've done so far.\n", - "base_model.trainable = True\n", - "model.summary(show_trainable=True)\n", - "\n", - "model.compile(\n", - " optimizer=keras.optimizers.Adam(1e-5), # Low learning rate\n", - " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=[keras.metrics.BinaryAccuracy()],\n", - ")\n", - "\n", - "epochs = 1\n", - "print(\"Fitting the end-to-end model\")\n", - "model.fit(train_ds, epochs=epochs, validation_data=validation_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "After 10 epochs, fine-tuning gains us a nice improvement here.\n", - "Let's evaluate the model on the test dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "print(\"Test dataset evaluation\")\n", - "model.evaluate(test_ds)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "transfer_learning", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/understanding_masking_and_padding.ipynb b/guides/ipynb/keras_core/understanding_masking_and_padding.ipynb deleted file mode 100644 index c6a2b0c664..0000000000 --- a/guides/ipynb/keras_core/understanding_masking_and_padding.ipynb +++ /dev/null @@ -1,601 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Understanding masking & padding\n", - "\n", - "**Authors:** Scott Zhu, Francois Chollet
\n", - "**Date created:** 2019/07/16
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to using mask-aware sequence layers in Keras." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras\n", - "from keras import ops\n", - "from keras import layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "**Masking** is a way to tell sequence-processing layers that certain timesteps\n", - "in an input are missing, and thus should be skipped when processing the data.\n", - "\n", - "**Padding** is a special form of masking where the masked steps are at the start or\n", - "the end of a sequence. Padding comes from the need to encode sequence data into\n", - "contiguous batches: in order to make all sequences in a batch fit a given standard\n", - "length, it is necessary to pad or truncate some sequences.\n", - "\n", - "Let's take a close look." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Padding sequence data\n", - "\n", - "When processing sequence data, it is very common for individual samples to have\n", - "different lengths. Consider the following example (text tokenized as words):\n", - "\n", - "```\n", - "[\n", - " [\"Hello\", \"world\", \"!\"],\n", - " [\"How\", \"are\", \"you\", \"doing\", \"today\"],\n", - " [\"The\", \"weather\", \"will\", \"be\", \"nice\", \"tomorrow\"],\n", - "]\n", - "```\n", - "\n", - "After vocabulary lookup, the data might be vectorized as integers, e.g.:\n", - "\n", - "```\n", - "[\n", - " [71, 1331, 4231]\n", - " [73, 8, 3215, 55, 927],\n", - " [83, 91, 1, 645, 1253, 927],\n", - "]\n", - "```\n", - "\n", - "The data is a nested list where individual samples have length 3, 5, and 6,\n", - "respectively. Since the input data for a deep learning model must be a single tensor\n", - "(of shape e.g. `(batch_size, 6, vocab_size)` in this case), samples that are shorter\n", - "than the longest item need to be padded with some placeholder value (alternatively,\n", - "one might also truncate long samples before padding short samples).\n", - "\n", - "Keras provides a utility function to truncate and pad Python lists to a common length:\n", - "`keras.utils.pad_sequences`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "raw_inputs = [\n", - " [711, 632, 71],\n", - " [73, 8, 3215, 55, 927],\n", - " [83, 91, 1, 645, 1253, 927],\n", - "]\n", - "\n", - "# By default, this will pad using 0s; it is configurable via the\n", - "# \"value\" parameter.\n", - "# Note that you could use \"pre\" padding (at the beginning) or\n", - "# \"post\" padding (at the end).\n", - "# We recommend using \"post\" padding when working with RNN layers\n", - "# (in order to be able to use the\n", - "# CuDNN implementation of the layers).\n", - "padded_inputs = keras.utils.pad_sequences(raw_inputs, padding=\"post\")\n", - "print(padded_inputs)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Masking\n", - "\n", - "Now that all samples have a uniform length, the model must be informed that some part\n", - "of the data is actually padding and should be ignored. That mechanism is **masking**.\n", - "\n", - "There are three ways to introduce input masks in Keras models:\n", - "\n", - "- Add a `keras.layers.Masking` layer.\n", - "- Configure a `keras.layers.Embedding` layer with `mask_zero=True`.\n", - "- Pass a `mask` argument manually when calling layers that support this argument (e.g.\n", - "RNN layers)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Mask-generating layers: `Embedding` and `Masking`\n", - "\n", - "Under the hood, these layers will create a mask tensor (2D tensor with shape `(batch,\n", - "sequence_length)`), and attach it to the tensor output returned by the `Masking` or\n", - "`Embedding` layer." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)\n", - "masked_output = embedding(padded_inputs)\n", - "\n", - "print(masked_output._keras_mask)\n", - "\n", - "masking_layer = layers.Masking()\n", - "# Simulate the embedding lookup by expanding the 2D input to 3D,\n", - "# with embedding dimension of 10.\n", - "unmasked_embedding = ops.cast(\n", - " ops.tile(ops.expand_dims(padded_inputs, axis=-1), [1, 1, 10]),\n", - " dtype=\"float32\",\n", - ")\n", - "\n", - "masked_embedding = masking_layer(unmasked_embedding)\n", - "print(masked_embedding._keras_mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "As you can see from the printed result, the mask is a 2D boolean tensor with shape\n", - "`(batch_size, sequence_length)`, where each individual `False` entry indicates that\n", - "the corresponding timestep should be ignored during processing." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Mask propagation in the Functional API and Sequential API\n", - "\n", - "When using the Functional API or the Sequential API, a mask generated by an `Embedding`\n", - "or `Masking` layer will be propagated through the network for any layer that is\n", - "capable of using them (for example, RNN layers). Keras will automatically fetch the\n", - "mask corresponding to an input and pass it to any layer that knows how to use it.\n", - "\n", - "For instance, in the following Sequential model, the `LSTM` layer will automatically\n", - "receive a mask, which means it will ignore padded values:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = keras.Sequential(\n", - " [\n", - " layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True),\n", - " layers.LSTM(32),\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "This is also the case for the following Functional API model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", - "x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)\n", - "outputs = layers.LSTM(32)(x)\n", - "\n", - "model = keras.Model(inputs, outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Passing mask tensors directly to layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Layers that can handle masks (such as the `LSTM` layer) have a `mask` argument in their\n", - "`__call__` method.\n", - "\n", - "Meanwhile, layers that produce a mask (e.g. `Embedding`) expose a `compute_mask(input,\n", - "previous_mask)` method which you can call.\n", - "\n", - "Thus, you can pass the output of the `compute_mask()` method of a mask-producing layer\n", - "to the `__call__` method of a mask-consuming layer, like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MyLayer(layers.Layer):\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)\n", - " self.lstm = layers.LSTM(32)\n", - "\n", - " def call(self, inputs):\n", - " x = self.embedding(inputs)\n", - " # Note that you could also prepare a `mask` tensor manually.\n", - " # It only needs to be a boolean tensor\n", - " # with the right shape, i.e. (batch_size, timesteps).\n", - " mask = self.embedding.compute_mask(inputs)\n", - " output = self.lstm(x, mask=mask) # The layer will ignore the masked values\n", - " return output\n", - "\n", - "\n", - "layer = MyLayer()\n", - "x = np.random.random((32, 10)) * 100\n", - "x = x.astype(\"int32\")\n", - "layer(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Supporting masking in your custom layers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Sometimes, you may need to write layers that generate a mask (like `Embedding`), or\n", - "layers that need to modify the current mask.\n", - "\n", - "For instance, any layer that produces a tensor with a different time dimension than its\n", - "input, such as a `Concatenate` layer that concatenates on the time dimension, will\n", - "need to modify the current mask so that downstream layers will be able to properly\n", - "take masked timesteps into account.\n", - "\n", - "To do this, your layer should implement the `layer.compute_mask()` method, which\n", - "produces a new mask given the input and the current mask.\n", - "\n", - "Here is an example of a `TemporalSplit` layer that needs to modify the current mask." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class TemporalSplit(keras.layers.Layer):\n", - " \"\"\"Split the input tensor into 2 tensors along the time dimension.\"\"\"\n", - "\n", - " def call(self, inputs):\n", - " # Expect the input to be 3D and mask to be 2D, split the input tensor into 2\n", - " # subtensors along the time axis (axis 1).\n", - " return ops.split(inputs, 2, axis=1)\n", - "\n", - " def compute_mask(self, inputs, mask=None):\n", - " # Also split the mask into 2 if it presents.\n", - " if mask is None:\n", - " return None\n", - " return ops.split(mask, 2, axis=1)\n", - "\n", - "\n", - "first_half, second_half = TemporalSplit()(masked_embedding)\n", - "print(first_half._keras_mask)\n", - "print(second_half._keras_mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here is another example of a `CustomEmbedding` layer that is capable of generating a\n", - "mask from input values:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomEmbedding(keras.layers.Layer):\n", - " def __init__(self, input_dim, output_dim, mask_zero=False, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.input_dim = input_dim\n", - " self.output_dim = output_dim\n", - " self.mask_zero = mask_zero\n", - "\n", - " def build(self, input_shape):\n", - " self.embeddings = self.add_weight(\n", - " shape=(self.input_dim, self.output_dim),\n", - " initializer=\"random_normal\",\n", - " dtype=\"float32\",\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " inputs = ops.cast(inputs, \"int32\")\n", - " return ops.take(self.embeddings, inputs)\n", - "\n", - " def compute_mask(self, inputs, mask=None):\n", - " if not self.mask_zero:\n", - " return None\n", - " return ops.not_equal(inputs, 0)\n", - "\n", - "\n", - "layer = CustomEmbedding(10, 32, mask_zero=True)\n", - "x = np.random.random((3, 10)) * 9\n", - "x = x.astype(\"int32\")\n", - "\n", - "y = layer(x)\n", - "mask = layer.compute_mask(x)\n", - "\n", - "print(mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Note: For more details about format limitations related to masking, see the\n", - "[serialization guide](/guides/serialization_and_saving)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Opting-in to mask propagation on compatible layers\n", - "\n", - "Most layers don't modify the time dimension, so don't need to modify the current mask.\n", - "However, they may still want to be able to **propagate** the current mask, unchanged,\n", - "to the next layer. **This is an opt-in behavior.** By default, a custom layer will\n", - "destroy the current mask (since the framework has no way to tell whether propagating\n", - "the mask is safe to do).\n", - "\n", - "If you have a custom layer that does not modify the time dimension, and if you want it\n", - "to be able to propagate the current input mask, you should set `self.supports_masking\n", - "= True` in the layer constructor. In this case, the default behavior of\n", - "`compute_mask()` is to just pass the current mask through.\n", - "\n", - "Here's an example of a layer that is whitelisted for mask propagation:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class MyActivation(keras.layers.Layer):\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " # Signal that the layer is safe for mask propagation\n", - " self.supports_masking = True\n", - "\n", - " def call(self, inputs):\n", - " return ops.relu(inputs)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "You can now use this custom layer in-between a mask-generating layer (like `Embedding`)\n", - "and a mask-consuming layer (like `LSTM`), and it will pass the mask along so that it\n", - "reaches the mask-consuming layer." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", - "x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)\n", - "x = MyActivation()(x) # Will pass the mask along\n", - "print(\"Mask found:\", x._keras_mask)\n", - "outputs = layers.LSTM(32)(x) # Will receive the mask\n", - "\n", - "model = keras.Model(inputs, outputs)\n", - "y = model(np.random.randint(0, 5000, size=(32, 100)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Writing layers that need mask information\n", - "\n", - "Some layers are mask *consumers*: they accept a `mask` argument in `call` and use it to\n", - "determine whether to skip certain time steps.\n", - "\n", - "To write such a layer, you can simply add a `mask=None` argument in your `call`\n", - "signature. The mask associated with the inputs will be passed to your layer whenever\n", - "it is available.\n", - "\n", - "Here's a simple example below: a layer that computes a softmax over the time dimension\n", - "(axis 1) of an input sequence, while discarding masked timesteps." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class TemporalSoftmax(keras.layers.Layer):\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.supports_masking = True\n", - "\n", - " def call(self, inputs, mask=None):\n", - " assert mask is not None\n", - " broadcast_float_mask = ops.expand_dims(ops.cast(mask, \"float32\"), -1)\n", - " inputs_exp = ops.exp(inputs) * broadcast_float_mask\n", - " inputs_sum = ops.sum(inputs_exp * broadcast_float_mask, axis=-1, keepdims=True)\n", - " return inputs_exp / inputs_sum\n", - "\n", - "\n", - "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", - "x = layers.Embedding(input_dim=10, output_dim=32, mask_zero=True)(inputs)\n", - "x = layers.Dense(1)(x)\n", - "outputs = TemporalSoftmax()(x)\n", - "\n", - "model = keras.Model(inputs, outputs)\n", - "y = model(np.random.randint(0, 10, size=(32, 100)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Summary\n", - "\n", - "That is all you need to know about padding & masking in Keras. To recap:\n", - "\n", - "- \"Masking\" is how layers are able to know when to skip / ignore certain timesteps in\n", - "sequence inputs.\n", - "- Some layers are mask-generators: `Embedding` can generate a mask from input values\n", - "(if `mask_zero=True`), and so can the `Masking` layer.\n", - "- Some layers are mask-consumers: they expose a `mask` argument in their `__call__`\n", - "method. This is the case for RNN layers.\n", - "- In the Functional API and Sequential API, mask information is propagated\n", - "automatically.\n", - "- When using layers in a standalone way, you can pass the `mask` arguments to layers\n", - "manually.\n", - "- You can easily write layers that modify the current mask, that generate a new mask,\n", - "or that consume the mask associated with the inputs." - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "understanding_masking_and_padding", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_jax.ipynb b/guides/ipynb/keras_core/writing_a_custom_training_loop_in_jax.ipynb deleted file mode 100644 index af92435de3..0000000000 --- a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_jax.ipynb +++ /dev/null @@ -1,779 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Writing a training loop from scratch in JAX\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/06/25
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Writing low-level training & evaluation loops in JAX." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# This guide can only be run with the jax backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", - "\n", - "import jax\n", - "\n", - "# We import TF so we can use tf.data.\n", - "import tensorflow as tf\n", - "import keras\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "Keras provides default training and evaluation loops, `fit()` and `evaluate()`.\n", - "Their usage is covered in the guide\n", - "[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/).\n", - "\n", - "If you want to customize the learning algorithm of your model while still leveraging\n", - "the convenience of `fit()`\n", - "(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and\n", - "implement your own `train_step()` method, which\n", - "is called repeatedly during `fit()`.\n", - "\n", - "Now, if you want very low-level control over training & evaluation, you should write\n", - "your own training & evaluation loops from scratch. This is what this guide is about." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first end-to-end example\n", - "\n", - "To write a custom training loop, we need the following ingredients:\n", - "\n", - "- A model to train, of course.\n", - "- An optimizer. You could either use an optimizer from `keras.optimizers`, or\n", - "one from the `optax` package.\n", - "- A loss function.\n", - "- A dataset. The standard in the JAX ecosystem is to load data via `tf.data`,\n", - "so that's what we'll use.\n", - "\n", - "Let's line them up.\n", - "\n", - "First, let's get the model and the MNIST dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_model():\n", - " inputs = keras.Input(shape=(784,), name=\"digits\")\n", - " x1 = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - " x2 = keras.layers.Dense(64, activation=\"relu\")(x1)\n", - " outputs = keras.layers.Dense(10, name=\"predictions\")(x2)\n", - " model = keras.Model(inputs=inputs, outputs=outputs)\n", - " return model\n", - "\n", - "\n", - "model = get_model()\n", - "\n", - "# Prepare the training dataset.\n", - "batch_size = 32\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "x_train = np.reshape(x_train, (-1, 784)).astype(\"float32\")\n", - "x_test = np.reshape(x_test, (-1, 784)).astype(\"float32\")\n", - "y_train = keras.utils.to_categorical(y_train)\n", - "y_test = keras.utils.to_categorical(y_test)\n", - "\n", - "# Reserve 10,000 samples for validation.\n", - "x_val = x_train[-10000:]\n", - "y_val = y_train[-10000:]\n", - "x_train = x_train[:-10000]\n", - "y_train = y_train[:-10000]\n", - "\n", - "# Prepare the training dataset.\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)\n", - "\n", - "# Prepare the validation dataset.\n", - "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", - "val_dataset = val_dataset.batch(batch_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, here's the loss function and the optimizer.\n", - "We'll use a Keras optimizer in this case." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Instantiate an optimizer.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Getting gradients in JAX\n", - "\n", - "Let's train our model using mini-batch gradient with a custom training loop.\n", - "\n", - "In JAX, gradients are computed via *metaprogramming*: you call the `jax.grad` (or\n", - "`jax.value_and_grad` on a function in order to create a gradient-computing function\n", - "for that first function.\n", - "\n", - "So the first thing we need is a function that returns the loss value.\n", - "That's the function we'll use to generate the gradient function. Something like this:\n", - "\n", - "```python\n", - "def compute_loss(x, y):\n", - " ...\n", - " return loss\n", - "```\n", - "\n", - "Once you have such a function, you can compute gradients via metaprogramming as such:\n", - "\n", - "```python\n", - "grad_fn = jax.grad(compute_loss)\n", - "grads = grad_fn(x, y)\n", - "```\n", - "\n", - "Typically, you don't just want to get the gradient values, you also want to get\n", - "the loss value. You can do this by using `jax.value_and_grad` instead of `jax.grad`:\n", - "\n", - "```python\n", - "grad_fn = jax.value_and_grad(compute_loss)\n", - "loss, grads = grad_fn(x, y)\n", - "```\n", - "\n", - "### JAX computation is purely stateless\n", - "\n", - "In JAX, everything must be a stateless function -- so our loss computation function\n", - "must be stateless as well. That means that all Keras variables (e.g. weight tensors)\n", - "must be passed as function inputs, and any variable that has been updated during the\n", - "forward pass must be returned as function output. The function have no side effect.\n", - "\n", - "During the forward pass, the non-trainable variables of a Keras model might get\n", - "updated. These variables could be, for instance, RNG seed state variables or\n", - "BatchNormalization statistics. We're going to need to return those. So we need\n", - "something like this:\n", - "\n", - "```python\n", - "def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y):\n", - " ...\n", - " return loss, non_trainable_variables\n", - "```\n", - "\n", - "Once you have such a function, you can get the gradient function by\n", - "specifying `hax_aux` in `value_and_grad`: it tells JAX that the loss\n", - "computation function returns more outputs than just the loss. Note that the loss\n", - "should always be the first output.\n", - "\n", - "```python\n", - "grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)\n", - "(loss, non_trainable_variables), grads = grad_fn(\n", - " trainable_variables, non_trainable_variables, x, y\n", - ")\n", - "```\n", - "\n", - "Now that we have established the basics,\n", - "let's implement this `compute_loss_and_updates` function.\n", - "Keras models have a `stateless_call` method which will come in handy here.\n", - "It works just like `model.__call__`, but it requires you to explicitly\n", - "pass the value of all the variables in the model, and it returns not just\n", - "the `__call__` outputs but also the (potentially updated) non-trainable\n", - "variables." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y):\n", - " y_pred, non_trainable_variables = model.stateless_call(\n", - " trainable_variables, non_trainable_variables, x\n", - " )\n", - " loss = loss_fn(y, y_pred)\n", - " return loss, non_trainable_variables\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's get the gradient function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### The training step function\n", - "\n", - "Next, let's implement the end-to-end training step, the function\n", - "that will both run the forward pass, compute the loss, compute the gradients,\n", - "but also use the optimizer to update the trainable variables. This function\n", - "also needs to be stateless, so it will get as input a `state` tuple that\n", - "includes every state element we're going to use:\n", - "\n", - "- `trainable_variables` and `non_trainable_variables`: the model's variables.\n", - "- `optimizer_variables`: the optimizer's state variables,\n", - "such as momentum accumulators.\n", - "\n", - "To update the trainable variables, we use the optimizer's stateless method\n", - "`stateless_apply`. It's equivalent to `optimizer.apply()`, but it requires\n", - "always passing `trainable_variables` and `optimizer_variables`. It returns\n", - "both the updated trainable variables and the updated optimizer_variables." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def train_step(state, data):\n", - " trainable_variables, non_trainable_variables, optimizer_variables = state\n", - " x, y = data\n", - " (loss, non_trainable_variables), grads = grad_fn(\n", - " trainable_variables, non_trainable_variables, x, y\n", - " )\n", - " trainable_variables, optimizer_variables = optimizer.stateless_apply(\n", - " grads, trainable_variables, optimizer_variables\n", - " )\n", - " # Return updated state\n", - " return loss, (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " )\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Make it fast with `jax.jit`\n", - "\n", - "By default, JAX operations run eagerly,\n", - "just like in TensorFlow eager mode and PyTorch eager mode.\n", - "And just like TensorFlow eager mode and PyTorch eager mode, it's pretty slow\n", - "-- eager mode is better used as a debugging environment, not as a way to do\n", - "any actual work. So let's make our `train_step` fast by compiling it.\n", - "\n", - "When you have a stateless JAX function, you can compile it to XLA via the\n", - "`@jax.jit` decorator. It will get traced during its first execution, and in\n", - "subsequent executions you will be executing the traced graph (this is just\n", - "like `@tf.function(jit_compile=True)`. Let's try it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@jax.jit\n", - "def train_step(state, data):\n", - " trainable_variables, non_trainable_variables, optimizer_variables = state\n", - " x, y = data\n", - " (loss, non_trainable_variables), grads = grad_fn(\n", - " trainable_variables, non_trainable_variables, x, y\n", - " )\n", - " trainable_variables, optimizer_variables = optimizer.stateless_apply(\n", - " optimizer_variables, grads, trainable_variables\n", - " )\n", - " # Return updated state\n", - " return loss, (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " )\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We're now ready to train our model. The training loop itself\n", - "is trivial: we just repeatedly call `loss, state = train_step(state, data)`.\n", - "\n", - "Note:\n", - "\n", - "- We convert the TF tensors yielded by the `tf.data.Dataset` to NumPy\n", - "before passing them to our JAX function.\n", - "- All variables must be built beforehand:\n", - "the model must be built and the optimizer must be built. Since we're using a\n", - "Functional API model, it's already built, but if it were a subclassed model\n", - "you'd need to call it on a batch of data to build it." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Build optimizer variables.\n", - "optimizer.build(model.trainable_variables)\n", - "\n", - "trainable_variables = model.trainable_variables\n", - "non_trainable_variables = model.non_trainable_variables\n", - "optimizer_variables = optimizer.variables\n", - "state = trainable_variables, non_trainable_variables, optimizer_variables\n", - "\n", - "# Training loop\n", - "for step, data in enumerate(train_dataset):\n", - " data = (data[0].numpy(), data[1].numpy())\n", - " loss, state = train_step(state, data)\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(f\"Training loss (for 1 batch) at step {step}: {float(loss):.4f}\")\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "A key thing to notice here is that the loop is entirely stateless -- the variables\n", - "attached to the model (`model.weights`) are never getting updated during the loop.\n", - "Their new values are only stored in the `state` tuple. That means that at some point,\n", - "before saving the model, you should be attaching the new variable values back to the model.\n", - "\n", - "Just call `variable.assign(new_value)` on each model variable you want to update:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "trainable_variables, non_trainable_variables, optimizer_variables = state\n", - "for variable, value in zip(model.trainable_variables, trainable_variables):\n", - " variable.assign(value)\n", - "for variable, value in zip(model.non_trainable_variables, non_trainable_variables):\n", - " variable.assign(value)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of metrics\n", - "\n", - "Let's add metrics monitoring to this basic training loop.\n", - "\n", - "You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training\n", - "loops written from scratch. Here's the flow:\n", - "\n", - "- Instantiate the metric at the start of the loop\n", - "- Include `metric_variables` in the `train_step` arguments\n", - "and `compute_loss_and_updates` arguments.\n", - "- Call `metric.stateless_update_state()` in the `compute_loss_and_updates` function.\n", - "It's equivalent to `update_state()` -- only stateless.\n", - "- When you need to display the current value of the metric, outside the `train_step`\n", - "(in the eager scope), attach the new metric variable values to the metric object\n", - "and vall `metric.result()`.\n", - "- Call `metric.reset_state()` when you need to clear the state of the metric\n", - "(typically at the end of an epoch)\n", - "\n", - "Let's use this knowledge to compute `CategoricalAccuracy` on training and\n", - "validation data at the end of training:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Get a fresh model\n", - "model = get_model()\n", - "\n", - "# Instantiate an optimizer to train the model.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Prepare the metrics.\n", - "train_acc_metric = keras.metrics.CategoricalAccuracy()\n", - "val_acc_metric = keras.metrics.CategoricalAccuracy()\n", - "\n", - "\n", - "def compute_loss_and_updates(\n", - " trainable_variables, non_trainable_variables, metric_variables, x, y\n", - "):\n", - " y_pred, non_trainable_variables = model.stateless_call(\n", - " trainable_variables, non_trainable_variables, x\n", - " )\n", - " loss = loss_fn(y, y_pred)\n", - " metric_variables = train_acc_metric.stateless_update_state(\n", - " metric_variables, y, y_pred\n", - " )\n", - " return loss, (non_trainable_variables, metric_variables)\n", - "\n", - "\n", - "grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)\n", - "\n", - "\n", - "@jax.jit\n", - "def train_step(state, data):\n", - " (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metric_variables,\n", - " ) = state\n", - " x, y = data\n", - " (loss, (non_trainable_variables, metric_variables)), grads = grad_fn(\n", - " trainable_variables, non_trainable_variables, metric_variables, x, y\n", - " )\n", - " trainable_variables, optimizer_variables = optimizer.stateless_apply(\n", - " optimizer_variables, grads, trainable_variables\n", - " )\n", - " # Return updated state\n", - " return loss, (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metric_variables,\n", - " )\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "We'll also prepare an evaluation step function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@jax.jit\n", - "def eval_step(state, data):\n", - " trainable_variables, non_trainable_variables, metric_variables = state\n", - " x, y = data\n", - " y_pred, non_trainable_variables = model.stateless_call(\n", - " trainable_variables, non_trainable_variables, x\n", - " )\n", - " loss = loss_fn(y, y_pred)\n", - " metric_variables = val_acc_metric.stateless_update_state(\n", - " metric_variables, y, y_pred\n", - " )\n", - " return loss, (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " metric_variables,\n", - " )\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here are our loops:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Build optimizer variables.\n", - "optimizer.build(model.trainable_variables)\n", - "\n", - "trainable_variables = model.trainable_variables\n", - "non_trainable_variables = model.non_trainable_variables\n", - "optimizer_variables = optimizer.variables\n", - "metric_variables = train_acc_metric.variables\n", - "state = (\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metric_variables,\n", - ")\n", - "\n", - "# Training loop\n", - "for step, data in enumerate(train_dataset):\n", - " data = (data[0].numpy(), data[1].numpy())\n", - " loss, state = train_step(state, data)\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(f\"Training loss (for 1 batch) at step {step}: {float(loss):.4f}\")\n", - " _, _, _, metric_variables = state\n", - " for variable, value in zip(train_acc_metric.variables, metric_variables):\n", - " variable.assign(value)\n", - " print(f\"Training accuracy: {train_acc_metric.result()}\")\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")\n", - "\n", - "metric_variables = val_acc_metric.variables\n", - "(\n", - " trainable_variables,\n", - " non_trainable_variables,\n", - " optimizer_variables,\n", - " metric_variables,\n", - ") = state\n", - "state = trainable_variables, non_trainable_variables, metric_variables\n", - "\n", - "# Eval loop\n", - "for step, data in enumerate(val_dataset):\n", - " data = (data[0].numpy(), data[1].numpy())\n", - " loss, state = eval_step(state, data)\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(f\"Validation loss (for 1 batch) at step {step}: {float(loss):.4f}\")\n", - " _, _, metric_variables = state\n", - " for variable, value in zip(val_acc_metric.variables, metric_variables):\n", - " variable.assign(value)\n", - " print(f\"Validation accuracy: {val_acc_metric.result()}\")\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of losses tracked by the model\n", - "\n", - "Layers & models recursively track any losses created during the forward pass\n", - "by layers that call `self.add_loss(value)`. The resulting list of scalar loss\n", - "values are available via the property `model.losses`\n", - "at the end of the forward pass.\n", - "\n", - "If you want to be using these loss components, you should sum them\n", - "and add them to the main loss in your training step.\n", - "\n", - "Consider this layer, that creates an activity regularization loss:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ActivityRegularizationLayer(keras.layers.Layer):\n", - " def call(self, inputs):\n", - " self.add_loss(1e-2 * jax.numpy.sum(inputs))\n", - " return inputs\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's build a really simple model that uses it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - "# Insert activity regularization as a layer\n", - "x = ActivityRegularizationLayer()(x)\n", - "x = keras.layers.Dense(64, activation=\"relu\")(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's what our `compute_loss_and_updates` function should look like now:\n", - "\n", - "- Pass `return_losses=True` to `model.stateless_call()`.\n", - "- Sum the resulting `losses` and add them to the main loss." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def compute_loss_and_updates(\n", - " trainable_variables, non_trainable_variables, metric_variables, x, y\n", - "):\n", - " y_pred, non_trainable_variables, losses = model.stateless_call(\n", - " trainable_variables, non_trainable_variables, x, return_losses=True\n", - " )\n", - " loss = loss_fn(y, y_pred)\n", - " if losses:\n", - " loss += jax.numpy.sum(losses)\n", - " metric_variables = train_acc_metric.stateless_update_state(\n", - " metric_variables, y, y_pred\n", - " )\n", - " return loss, non_trainable_variables, metric_variables\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "writing_a_custom_training_loop_in_jax", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_tensorflow.ipynb b/guides/ipynb/keras_core/writing_a_custom_training_loop_in_tensorflow.ipynb deleted file mode 100644 index cfeba90063..0000000000 --- a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_tensorflow.ipynb +++ /dev/null @@ -1,806 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Writing a training loop from scratch in TensorFlow\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2019/03/01
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Writing low-level training & evaluation loops in TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import time\n", - "import os\n", - "\n", - "# This guide can only be run with the TensorFlow backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", - "\n", - "import tensorflow as tf\n", - "import keras\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "Keras provides default training and evaluation loops, `fit()` and `evaluate()`.\n", - "Their usage is covered in the guide\n", - "[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/).\n", - "\n", - "If you want to customize the learning algorithm of your model while still leveraging\n", - "the convenience of `fit()`\n", - "(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and\n", - "implement your own `train_step()` method, which\n", - "is called repeatedly during `fit()`.\n", - "\n", - "Now, if you want very low-level control over training & evaluation, you should write\n", - "your own training & evaluation loops from scratch. This is what this guide is about." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first end-to-end example\n", - "\n", - "Let's consider a simple MNIST model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "def get_model():\n", - " inputs = keras.Input(shape=(784,), name=\"digits\")\n", - " x1 = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - " x2 = keras.layers.Dense(64, activation=\"relu\")(x1)\n", - " outputs = keras.layers.Dense(10, name=\"predictions\")(x2)\n", - " model = keras.Model(inputs=inputs, outputs=outputs)\n", - " return model\n", - "\n", - "\n", - "model = get_model()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's train it using mini-batch gradient with a custom training loop.\n", - "\n", - "First, we're going to need an optimizer, a loss function, and a dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Instantiate an optimizer.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Prepare the training dataset.\n", - "batch_size = 32\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "x_train = np.reshape(x_train, (-1, 784))\n", - "x_test = np.reshape(x_test, (-1, 784))\n", - "\n", - "# Reserve 10,000 samples for validation.\n", - "x_val = x_train[-10000:]\n", - "y_val = y_train[-10000:]\n", - "x_train = x_train[:-10000]\n", - "y_train = y_train[:-10000]\n", - "\n", - "# Prepare the training dataset.\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)\n", - "\n", - "# Prepare the validation dataset.\n", - "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", - "val_dataset = val_dataset.batch(batch_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Calling a model inside a `GradientTape` scope enables you to retrieve the gradients of\n", - "the trainable weights of the layer with respect to a loss value. Using an optimizer\n", - "instance, you can use these gradients to update these variables (which you can\n", - "retrieve using `model.trainable_weights`).\n", - "\n", - "Here's our training loop, step by step:\n", - "\n", - "- We open a `for` loop that iterates over epochs\n", - "- For each epoch, we open a `for` loop that iterates over the dataset, in batches\n", - "- For each batch, we open a `GradientTape()` scope\n", - "- Inside this scope, we call the model (forward pass) and compute the loss\n", - "- Outside the scope, we retrieve the gradients of the weights\n", - "of the model with regard to the loss\n", - "- Finally, we use the optimizer to update the weights of the model based on the\n", - "gradients" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "epochs = 3\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", - " # Open a GradientTape to record the operations run\n", - " # during the forward pass, which enables auto-differentiation.\n", - " with tf.GradientTape() as tape:\n", - " # Run the forward pass of the layer.\n", - " # The operations that the layer applies\n", - " # to its inputs are going to be recorded\n", - " # on the GradientTape.\n", - " logits = model(x_batch_train, training=True) # Logits for this minibatch\n", - "\n", - " # Compute the loss value for this minibatch.\n", - " loss_value = loss_fn(y_batch_train, logits)\n", - "\n", - " # Use the gradient tape to automatically retrieve\n", - " # the gradients of the trainable variables with respect to the loss.\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - "\n", - " # Run one step of gradient descent by updating\n", - " # the value of the variables to minimize the loss.\n", - " optimizer.apply(grads, model.trainable_weights)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of metrics\n", - "\n", - "Let's add metrics monitoring to this basic loop.\n", - "\n", - "You can readily reuse the built-in metrics (or custom ones you wrote) in such training\n", - "loops written from scratch. Here's the flow:\n", - "\n", - "- Instantiate the metric at the start of the loop\n", - "- Call `metric.update_state()` after each batch\n", - "- Call `metric.result()` when you need to display the current value of the metric\n", - "- Call `metric.reset_state()` when you need to clear the state of the metric\n", - "(typically at the end of an epoch)\n", - "\n", - "Let's use this knowledge to compute `SparseCategoricalAccuracy` on training and\n", - "validation data at the end of each epoch:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Get a fresh model\n", - "model = get_model()\n", - "\n", - "# Instantiate an optimizer to train the model.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Prepare the metrics.\n", - "train_acc_metric = keras.metrics.SparseCategoricalAccuracy()\n", - "val_acc_metric = keras.metrics.SparseCategoricalAccuracy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's our training & evaluation loop:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "epochs = 2\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - " start_time = time.time()\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", - " with tf.GradientTape() as tape:\n", - " logits = model(x_batch_train, training=True)\n", - " loss_value = loss_fn(y_batch_train, logits)\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - " optimizer.apply(grads, model.trainable_weights)\n", - "\n", - " # Update training metric.\n", - " train_acc_metric.update_state(y_batch_train, logits)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")\n", - "\n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(f\"Training acc over epoch: {float(train_acc):.4f}\")\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_state()\n", - "\n", - " # Run a validation loop at the end of each epoch.\n", - " for x_batch_val, y_batch_val in val_dataset:\n", - " val_logits = model(x_batch_val, training=False)\n", - " # Update val metrics\n", - " val_acc_metric.update_state(y_batch_val, val_logits)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_state()\n", - " print(f\"Validation acc: {float(val_acc):.4f}\")\n", - " print(f\"Time taken: {time.time() - start_time:.2f}s\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Speeding-up your training step with `tf.function`\n", - "\n", - "The default runtime in TensorFlow is eager execution.\n", - "As such, our training loop above executes eagerly.\n", - "\n", - "This is great for debugging, but graph compilation has a definite performance\n", - "advantage. Describing your computation as a static graph enables the framework\n", - "to apply global performance optimizations. This is impossible when\n", - "the framework is constrained to greedily execute one operation after another,\n", - "with no knowledge of what comes next.\n", - "\n", - "You can compile into a static graph any function that takes tensors as input.\n", - "Just add a `@tf.function` decorator on it, like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@tf.function\n", - "def train_step(x, y):\n", - " with tf.GradientTape() as tape:\n", - " logits = model(x, training=True)\n", - " loss_value = loss_fn(y, logits)\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - " optimizer.apply(grads, model.trainable_weights)\n", - " train_acc_metric.update_state(y, logits)\n", - " return loss_value\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's do the same with the evaluation step:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@tf.function\n", - "def test_step(x, y):\n", - " val_logits = model(x, training=False)\n", - " val_acc_metric.update_state(y, val_logits)\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now, let's re-run our training loop with this compiled training step:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "epochs = 2\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - " start_time = time.time()\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", - " loss_value = train_step(x_batch_train, y_batch_train)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")\n", - "\n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(f\"Training acc over epoch: {float(train_acc):.4f}\")\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_state()\n", - "\n", - " # Run a validation loop at the end of each epoch.\n", - " for x_batch_val, y_batch_val in val_dataset:\n", - " test_step(x_batch_val, y_batch_val)\n", - "\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_state()\n", - " print(f\"Validation acc: {float(val_acc):.4f}\")\n", - " print(f\"Time taken: {time.time() - start_time:.2f}s\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Much faster, isn't it?" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of losses tracked by the model\n", - "\n", - "Layers & models recursively track any losses created during the forward pass\n", - "by layers that call `self.add_loss(value)`. The resulting list of scalar loss\n", - "values are available via the property `model.losses`\n", - "at the end of the forward pass.\n", - "\n", - "If you want to be using these loss components, you should sum them\n", - "and add them to the main loss in your training step.\n", - "\n", - "Consider this layer, that creates an activity regularization loss:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ActivityRegularizationLayer(keras.layers.Layer):\n", - " def call(self, inputs):\n", - " self.add_loss(1e-2 * tf.reduce_sum(inputs))\n", - " return inputs\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's build a really simple model that uses it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - "# Insert activity regularization as a layer\n", - "x = ActivityRegularizationLayer()(x)\n", - "x = keras.layers.Dense(64, activation=\"relu\")(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's what our training step should look like now:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "@tf.function\n", - "def train_step(x, y):\n", - " with tf.GradientTape() as tape:\n", - " logits = model(x, training=True)\n", - " loss_value = loss_fn(y, logits)\n", - " # Add any extra losses created during the forward pass.\n", - " loss_value += sum(model.losses)\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - " optimizer.apply(grads, model.trainable_weights)\n", - " train_acc_metric.update_state(y, logits)\n", - " return loss_value\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Summary\n", - "\n", - "Now you know everything there is to know about using built-in training loops and\n", - "writing your own from scratch.\n", - "\n", - "To conclude, here's a simple end-to-end example that ties together everything\n", - "you've learned in this guide: a DCGAN trained on MNIST digits." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## End-to-end example: a GAN training loop from scratch\n", - "\n", - "You may be familiar with Generative Adversarial Networks (GANs). GANs can generate new\n", - "images that look almost real, by learning the latent distribution of a training\n", - "dataset of images (the \"latent space\" of the images).\n", - "\n", - "A GAN is made of two parts: a \"generator\" model that maps points in the latent\n", - "space to points in image space, a \"discriminator\" model, a classifier\n", - "that can tell the difference between real images (from the training dataset)\n", - "and fake images (the output of the generator network).\n", - "\n", - "A GAN training loop looks like this:\n", - "\n", - "1) Train the discriminator.\n", - "- Sample a batch of random points in the latent space.\n", - "- Turn the points into fake images via the \"generator\" model.\n", - "- Get a batch of real images and combine them with the generated images.\n", - "- Train the \"discriminator\" model to classify generated vs. real images.\n", - "\n", - "2) Train the generator.\n", - "- Sample random points in the latent space.\n", - "- Turn the points into fake images via the \"generator\" network.\n", - "- Get a batch of real images and combine them with the generated images.\n", - "- Train the \"generator\" model to \"fool\" the discriminator and classify the fake images\n", - "as real.\n", - "\n", - "For a much more detailed overview of how GANs works, see\n", - "[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python).\n", - "\n", - "Let's implement this training loop. First, create the discriminator meant to classify\n", - "fake vs real digits:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "discriminator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(28, 28, 1)),\n", - " keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " keras.layers.LeakyReLU(negative_slope=0.2),\n", - " keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n", - " keras.layers.LeakyReLU(negative_slope=0.2),\n", - " keras.layers.GlobalMaxPooling2D(),\n", - " keras.layers.Dense(1),\n", - " ],\n", - " name=\"discriminator\",\n", - ")\n", - "discriminator.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Then let's create a generator network,\n", - "that turns latent vectors into outputs of shape `(28, 28, 1)` (representing\n", - "MNIST digits):" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "latent_dim = 128\n", - "\n", - "generator = keras.Sequential(\n", - " [\n", - " keras.Input(shape=(latent_dim,)),\n", - " # We want to generate 128 coefficients to reshape into a 7x7x128 map\n", - " keras.layers.Dense(7 * 7 * 128),\n", - " keras.layers.LeakyReLU(negative_slope=0.2),\n", - " keras.layers.Reshape((7, 7, 128)),\n", - " keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " keras.layers.LeakyReLU(negative_slope=0.2),\n", - " keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", - " keras.layers.LeakyReLU(negative_slope=0.2),\n", - " keras.layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", - " ],\n", - " name=\"generator\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's the key bit: the training loop. As you can see it is quite straightforward. The\n", - "training step function only takes 17 lines." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Instantiate one optimizer for the discriminator and another for the generator.\n", - "d_optimizer = keras.optimizers.Adam(learning_rate=0.0003)\n", - "g_optimizer = keras.optimizers.Adam(learning_rate=0.0004)\n", - "\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", - "\n", - "\n", - "@tf.function\n", - "def train_step(real_images):\n", - " # Sample random points in the latent space\n", - " random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim))\n", - " # Decode them to fake images\n", - " generated_images = generator(random_latent_vectors)\n", - " # Combine them with real images\n", - " combined_images = tf.concat([generated_images, real_images], axis=0)\n", - "\n", - " # Assemble labels discriminating real from fake images\n", - " labels = tf.concat(\n", - " [tf.ones((batch_size, 1)), tf.zeros((real_images.shape[0], 1))], axis=0\n", - " )\n", - " # Add random noise to the labels - important trick!\n", - " labels += 0.05 * tf.random.uniform(labels.shape)\n", - "\n", - " # Train the discriminator\n", - " with tf.GradientTape() as tape:\n", - " predictions = discriminator(combined_images)\n", - " d_loss = loss_fn(labels, predictions)\n", - " grads = tape.gradient(d_loss, discriminator.trainable_weights)\n", - " d_optimizer.apply(grads, discriminator.trainable_weights)\n", - "\n", - " # Sample random points in the latent space\n", - " random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim))\n", - " # Assemble labels that say \"all real images\"\n", - " misleading_labels = tf.zeros((batch_size, 1))\n", - "\n", - " # Train the generator (note that we should *not* update the weights\n", - " # of the discriminator)!\n", - " with tf.GradientTape() as tape:\n", - " predictions = discriminator(generator(random_latent_vectors))\n", - " g_loss = loss_fn(misleading_labels, predictions)\n", - " grads = tape.gradient(g_loss, generator.trainable_weights)\n", - " g_optimizer.apply(grads, generator.trainable_weights)\n", - " return d_loss, g_loss, generated_images\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's train our GAN, by repeatedly calling `train_step` on batches of images.\n", - "\n", - "Since our discriminator and generator are convnets, you're going to want to\n", - "run this code on a GPU." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Prepare the dataset. We use both the training & test MNIST digits.\n", - "batch_size = 64\n", - "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n", - "all_digits = np.concatenate([x_train, x_test])\n", - "all_digits = all_digits.astype(\"float32\") / 255.0\n", - "all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n", - "dataset = tf.data.Dataset.from_tensor_slices(all_digits)\n", - "dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n", - "\n", - "epochs = 1 # In practice you need at least 20 epochs to generate nice digits.\n", - "save_dir = \"./\"\n", - "\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart epoch {epoch}\")\n", - "\n", - " for step, real_images in enumerate(dataset):\n", - " # Train the discriminator & generator on one batch of real images.\n", - " d_loss, g_loss, generated_images = train_step(real_images)\n", - "\n", - " # Logging.\n", - " if step % 100 == 0:\n", - " # Print metrics\n", - " print(f\"discriminator loss at step {step}: {d_loss:.2f}\")\n", - " print(f\"adversarial loss at step {step}: {g_loss:.2f}\")\n", - "\n", - " # Save one generated image\n", - " img = keras.utils.array_to_img(generated_images[0] * 255.0, scale=False)\n", - " img.save(os.path.join(save_dir, f\"generated_img_{step}.png\"))\n", - "\n", - " # To limit execution time we stop after 10 steps.\n", - " # Remove the lines below to actually train the model!\n", - " if step > 10:\n", - " break" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it! You'll get nice-looking fake MNIST digits after just ~30s of training on the\n", - "Colab GPU." - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "writing_a_custom_training_loop_in_tensorflow", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_torch.ipynb b/guides/ipynb/keras_core/writing_a_custom_training_loop_in_torch.ipynb deleted file mode 100644 index 2acc1ac632..0000000000 --- a/guides/ipynb/keras_core/writing_a_custom_training_loop_in_torch.ipynb +++ /dev/null @@ -1,575 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Writing a training loop from scratch in PyTorch\n", - "\n", - "**Author:** [fchollet](https://twitter.com/fchollet)
\n", - "**Date created:** 2023/06/25
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Writing low-level training & evaluation loops in PyTorch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# This guide can only be run with the torch backend.\n", - "os.environ[\"KERAS_BACKEND\"] = \"torch\"\n", - "\n", - "import torch\n", - "import keras\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "Keras provides default training and evaluation loops, `fit()` and `evaluate()`.\n", - "Their usage is covered in the guide\n", - "[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/).\n", - "\n", - "If you want to customize the learning algorithm of your model while still leveraging\n", - "the convenience of `fit()`\n", - "(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and\n", - "implement your own `train_step()` method, which\n", - "is called repeatedly during `fit()`.\n", - "\n", - "Now, if you want very low-level control over training & evaluation, you should write\n", - "your own training & evaluation loops from scratch. This is what this guide is about." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A first end-to-end example\n", - "\n", - "To write a custom training loop, we need the following ingredients:\n", - "\n", - "- A model to train, of course.\n", - "- An optimizer. You could either use a `keras.optimizers` optimizer,\n", - "or a native PyTorch optimizer from `torch.optim`.\n", - "- A loss function. You could either use a `keras.losses` loss,\n", - "or a native PyTorch loss from `torch.nn`.\n", - "- A dataset. You could use any format: a `tf.data.Dataset`,\n", - "a PyTorch `DataLoader`, a Python generator, etc.\n", - "\n", - "Let's line them up. We'll use torch-native objects in each case --\n", - "except, of course, for the Keras model.\n", - "\n", - "First, let's get the model and the MNIST dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "# Let's consider a simple MNIST model\n", - "def get_model():\n", - " inputs = keras.Input(shape=(784,), name=\"digits\")\n", - " x1 = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - " x2 = keras.layers.Dense(64, activation=\"relu\")(x1)\n", - " outputs = keras.layers.Dense(10, name=\"predictions\")(x2)\n", - " model = keras.Model(inputs=inputs, outputs=outputs)\n", - " return model\n", - "\n", - "\n", - "# Create load up the MNIST dataset and put it in a torch DataLoader\n", - "# Prepare the training dataset.\n", - "batch_size = 32\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "x_train = np.reshape(x_train, (-1, 784)).astype(\"float32\")\n", - "x_test = np.reshape(x_test, (-1, 784)).astype(\"float32\")\n", - "y_train = keras.utils.to_categorical(y_train)\n", - "y_test = keras.utils.to_categorical(y_test)\n", - "\n", - "# Reserve 10,000 samples for validation.\n", - "x_val = x_train[-10000:]\n", - "y_val = y_train[-10000:]\n", - "x_train = x_train[:-10000]\n", - "y_train = y_train[:-10000]\n", - "\n", - "# Create torch Datasets\n", - "train_dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(x_train), torch.from_numpy(y_train)\n", - ")\n", - "val_dataset = torch.utils.data.TensorDataset(\n", - " torch.from_numpy(x_val), torch.from_numpy(y_val)\n", - ")\n", - "\n", - "# Create DataLoaders for the Datasets\n", - "train_dataloader = torch.utils.data.DataLoader(\n", - " train_dataset, batch_size=batch_size, shuffle=True\n", - ")\n", - "val_dataloader = torch.utils.data.DataLoader(\n", - " val_dataset, batch_size=batch_size, shuffle=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Next, here's our PyTorch optimizer and our PyTorch loss function:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Instantiate a torch optimizer\n", - "model = get_model()\n", - "optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n", - "\n", - "# Instantiate a torch loss function\n", - "loss_fn = torch.nn.CrossEntropyLoss()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's train our model using mini-batch gradient with a custom training loop.\n", - "\n", - "Calling `loss.backward()` on a loss tensor triggers backpropagation.\n", - "Once that's done, your optimizer is magically aware of the gradients for each variable\n", - "and can update its variables, which is done via `optimizer.step()`.\n", - "Tensors, variables, optimizers are all interconnected to one another via hidden global state.\n", - "Also, don't forget to call `model.zero_grad()` before `loss.backward()`, or you won't\n", - "get the right gradients for your variables.\n", - "\n", - "Here's our training loop, step by step:\n", - "\n", - "- We open a `for` loop that iterates over epochs\n", - "- For each epoch, we open a `for` loop that iterates over the dataset, in batches\n", - "- For each batch, we call the model on the input data to retrive the predictions,\n", - "then we use them to compute a loss value\n", - "- We call `loss.backward()` to\n", - "- Outside the scope, we retrieve the gradients of the weights\n", - "of the model with regard to the loss\n", - "- Finally, we use the optimizer to update the weights of the model based on the\n", - "gradients" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "epochs = 3\n", - "for epoch in range(epochs):\n", - " for step, (inputs, targets) in enumerate(train_dataloader):\n", - " # Forward pass\n", - " logits = model(inputs)\n", - " loss = loss_fn(logits, targets)\n", - "\n", - " # Backward pass\n", - " model.zero_grad()\n", - " loss.backward()\n", - "\n", - " # Optimizer variable updates\n", - " optimizer.step()\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "As an alternative, let's look at what the loop looks like when using a Keras optimizer\n", - "and a Keras loss function.\n", - "\n", - "Important differences:\n", - "\n", - "- You retrieve the gradients for the variables via `v.value.grad`,\n", - "called on each trainable variable.\n", - "- You update your variables via `optimizer.apply()`, which must be\n", - "called in a `torch.no_grad()` scope.\n", - "\n", - "**Also, a big gotcha:** while all NumPy/TensorFlow/JAX/Keras APIs\n", - "as well as Python `unittest` APIs use the argument order convention\n", - "`fn(y_true, y_pred)` (reference values first, predicted values second),\n", - "PyTorch actually uses `fn(y_pred, y_true)` for its losses.\n", - "So make sure to invert the order of `logits` and `targets`." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_model()\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - " for step, (inputs, targets) in enumerate(train_dataloader):\n", - " # Forward pass\n", - " logits = model(inputs)\n", - " loss = loss_fn(targets, logits)\n", - "\n", - " # Backward pass\n", - " model.zero_grad()\n", - " trainable_weights = [v for v in model.trainable_weights]\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of metrics\n", - "\n", - "Let's add metrics monitoring to this basic training loop.\n", - "\n", - "You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training\n", - "loops written from scratch. Here's the flow:\n", - "\n", - "- Instantiate the metric at the start of the loop\n", - "- Call `metric.update_state()` after each batch\n", - "- Call `metric.result()` when you need to display the current value of the metric\n", - "- Call `metric.reset_state()` when you need to clear the state of the metric\n", - "(typically at the end of an epoch)\n", - "\n", - "Let's use this knowledge to compute `CategoricalAccuracy` on training and\n", - "validation data at the end of each epoch:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Get a fresh model\n", - "model = get_model()\n", - "\n", - "# Instantiate an optimizer to train the model.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Prepare the metrics.\n", - "train_acc_metric = keras.metrics.CategoricalAccuracy()\n", - "val_acc_metric = keras.metrics.CategoricalAccuracy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's our training & evaluation loop:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - " for step, (inputs, targets) in enumerate(train_dataloader):\n", - " # Forward pass\n", - " logits = model(inputs)\n", - " loss = loss_fn(targets, logits)\n", - "\n", - " # Backward pass\n", - " model.zero_grad()\n", - " trainable_weights = [v for v in model.trainable_weights]\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Update training metric.\n", - " train_acc_metric.update_state(targets, logits)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")\n", - "\n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(f\"Training acc over epoch: {float(train_acc):.4f}\")\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_state()\n", - "\n", - " # Run a validation loop at the end of each epoch.\n", - " for x_batch_val, y_batch_val in val_dataloader:\n", - " val_logits = model(x_batch_val, training=False)\n", - " # Update val metrics\n", - " val_acc_metric.update_state(y_batch_val, val_logits)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_state()\n", - " print(f\"Validation acc: {float(val_acc):.4f}\")\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Low-level handling of losses tracked by the model\n", - "\n", - "Layers & models recursively track any losses created during the forward pass\n", - "by layers that call `self.add_loss(value)`. The resulting list of scalar loss\n", - "values are available via the property `model.losses`\n", - "at the end of the forward pass.\n", - "\n", - "If you want to be using these loss components, you should sum them\n", - "and add them to the main loss in your training step.\n", - "\n", - "Consider this layer, that creates an activity regularization loss:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class ActivityRegularizationLayer(keras.layers.Layer):\n", - " def call(self, inputs):\n", - " self.add_loss(1e-2 * torch.sum(inputs))\n", - " return inputs\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's build a really simple model that uses it:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "inputs = keras.Input(shape=(784,), name=\"digits\")\n", - "x = keras.layers.Dense(64, activation=\"relu\")(inputs)\n", - "# Insert activity regularization as a layer\n", - "x = ActivityRegularizationLayer()(x)\n", - "x = keras.layers.Dense(64, activation=\"relu\")(x)\n", - "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", - "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Here's what our training loop should look like now:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Get a fresh model\n", - "model = get_model()\n", - "\n", - "# Instantiate an optimizer to train the model.\n", - "optimizer = keras.optimizers.Adam(learning_rate=1e-3)\n", - "# Instantiate a loss function.\n", - "loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "# Prepare the metrics.\n", - "train_acc_metric = keras.metrics.CategoricalAccuracy()\n", - "val_acc_metric = keras.metrics.CategoricalAccuracy()\n", - "\n", - "for epoch in range(epochs):\n", - " print(f\"\\nStart of epoch {epoch}\")\n", - " for step, (inputs, targets) in enumerate(train_dataloader):\n", - " # Forward pass\n", - " logits = model(inputs)\n", - " loss = loss_fn(targets, logits)\n", - " if model.losses:\n", - " loss = loss + torch.sum(*model.losses)\n", - "\n", - " # Backward pass\n", - " model.zero_grad()\n", - " trainable_weights = [v for v in model.trainable_weights]\n", - "\n", - " # Call torch.Tensor.backward() on the loss to compute gradients\n", - " # for the weights.\n", - " loss.backward()\n", - " gradients = [v.value.grad for v in trainable_weights]\n", - "\n", - " # Update weights\n", - " with torch.no_grad():\n", - " optimizer.apply(gradients, trainable_weights)\n", - "\n", - " # Update training metric.\n", - " train_acc_metric.update_state(targets, logits)\n", - "\n", - " # Log every 100 batches.\n", - " if step % 100 == 0:\n", - " print(\n", - " f\"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}\"\n", - " )\n", - " print(f\"Seen so far: {(step + 1) * batch_size} samples\")\n", - "\n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(f\"Training acc over epoch: {float(train_acc):.4f}\")\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_state()\n", - "\n", - " # Run a validation loop at the end of each epoch.\n", - " for x_batch_val, y_batch_val in val_dataloader:\n", - " val_logits = model(x_batch_val, training=False)\n", - " # Update val metrics\n", - " val_acc_metric.update_state(y_batch_val, val_logits)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_state()\n", - " print(f\"Validation acc: {float(val_acc):.4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "That's it!" - ] - } - ], - "metadata": { - "accelerator": "None", - "colab": { - "collapsed_sections": [], - "name": "writing_a_custom_training_loop_in_torch", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/guides/ipynb/keras_core/writing_your_own_callbacks.ipynb b/guides/ipynb/keras_core/writing_your_own_callbacks.ipynb deleted file mode 100644 index 73da31a899..0000000000 --- a/guides/ipynb/keras_core/writing_your_own_callbacks.ipynb +++ /dev/null @@ -1,590 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "# Writing your own callbacks\n", - "\n", - "**Authors:** Rick Chao, Francois Chollet
\n", - "**Date created:** 2019/03/20
\n", - "**Last modified:** 2023/06/25
\n", - "**Description:** Complete guide to writing new Keras callbacks." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Introduction\n", - "\n", - "A callback is a powerful tool to customize the behavior of a Keras model during\n", - "training, evaluation, or inference. Examples include `keras.callbacks.TensorBoard`\n", - "to visualize training progress and results with TensorBoard, or\n", - "`keras.callbacks.ModelCheckpoint` to periodically save your model during training.\n", - "\n", - "In this guide, you will learn what a Keras callback is, what it can do, and how you can\n", - "build your own. We provide a few demos of simple callback applications to get you\n", - "started." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import keras" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Keras callbacks overview\n", - "\n", - "All callbacks subclass the `keras.callbacks.Callback` class, and\n", - "override a set of methods called at various stages of training, testing, and\n", - "predicting. Callbacks are useful to get a view on internal states and statistics of\n", - "the model during training.\n", - "\n", - "You can pass a list of callbacks (as the keyword argument `callbacks`) to the following\n", - "model methods:\n", - "\n", - "- `keras.Model.fit()`\n", - "- `keras.Model.evaluate()`\n", - "- `keras.Model.predict()`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## An overview of callback methods\n", - "\n", - "### Global methods\n", - "\n", - "#### `on_(train|test|predict)_begin(self, logs=None)`\n", - "\n", - "Called at the beginning of `fit`/`evaluate`/`predict`.\n", - "\n", - "#### `on_(train|test|predict)_end(self, logs=None)`\n", - "\n", - "Called at the end of `fit`/`evaluate`/`predict`.\n", - "\n", - "### Batch-level methods for training/testing/predicting\n", - "\n", - "#### `on_(train|test|predict)_batch_begin(self, batch, logs=None)`\n", - "\n", - "Called right before processing a batch during training/testing/predicting.\n", - "\n", - "#### `on_(train|test|predict)_batch_end(self, batch, logs=None)`\n", - "\n", - "Called at the end of training/testing/predicting a batch. Within this method, `logs` is\n", - "a dict containing the metrics results.\n", - "\n", - "### Epoch-level methods (training only)\n", - "\n", - "#### `on_epoch_begin(self, epoch, logs=None)`\n", - "\n", - "Called at the beginning of an epoch during training.\n", - "\n", - "#### `on_epoch_end(self, epoch, logs=None)`\n", - "\n", - "Called at the end of an epoch during training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## A basic example\n", - "\n", - "Let's take a look at a concrete example. To get started, let's import tensorflow and\n", - "define a simple Sequential Keras model:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "# Define the Keras model to add callbacks to\n", - "def get_model():\n", - " model = keras.Sequential()\n", - " model.add(keras.layers.Dense(1))\n", - " model.compile(\n", - " optimizer=keras.optimizers.RMSprop(learning_rate=0.1),\n", - " loss=\"mean_squared_error\",\n", - " metrics=[\"mean_absolute_error\"],\n", - " )\n", - " return model\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Then, load the MNIST data for training and testing from Keras datasets API:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "# Load example MNIST data and pre-process it\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "x_train = x_train.reshape(-1, 784).astype(\"float32\") / 255.0\n", - "x_test = x_test.reshape(-1, 784).astype(\"float32\") / 255.0\n", - "\n", - "# Limit the data to 1000 samples\n", - "x_train = x_train[:1000]\n", - "y_train = y_train[:1000]\n", - "x_test = x_test[:1000]\n", - "y_test = y_test[:1000]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Now, define a simple custom callback that logs:\n", - "\n", - "- When `fit`/`evaluate`/`predict` starts & ends\n", - "- When each epoch starts & ends\n", - "- When each training batch starts & ends\n", - "- When each evaluation (test) batch starts & ends\n", - "- When each inference (prediction) batch starts & ends" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomCallback(keras.callbacks.Callback):\n", - " def on_train_begin(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Starting training; got log keys: {}\".format(keys))\n", - "\n", - " def on_train_end(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Stop training; got log keys: {}\".format(keys))\n", - "\n", - " def on_epoch_begin(self, epoch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Start epoch {} of training; got log keys: {}\".format(epoch, keys))\n", - "\n", - " def on_epoch_end(self, epoch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"End epoch {} of training; got log keys: {}\".format(epoch, keys))\n", - "\n", - " def on_test_begin(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Start testing; got log keys: {}\".format(keys))\n", - "\n", - " def on_test_end(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Stop testing; got log keys: {}\".format(keys))\n", - "\n", - " def on_predict_begin(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Start predicting; got log keys: {}\".format(keys))\n", - "\n", - " def on_predict_end(self, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"Stop predicting; got log keys: {}\".format(keys))\n", - "\n", - " def on_train_batch_begin(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Training: start of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", - " def on_train_batch_end(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Training: end of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", - " def on_test_batch_begin(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Evaluating: start of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", - " def on_test_batch_end(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Evaluating: end of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", - " def on_predict_batch_begin(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Predicting: start of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", - " def on_predict_batch_end(self, batch, logs=None):\n", - " keys = list(logs.keys())\n", - " print(\"...Predicting: end of batch {}; got log keys: {}\".format(batch, keys))\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "Let's try it out:" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "model = get_model()\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=128,\n", - " epochs=1,\n", - " verbose=0,\n", - " validation_split=0.5,\n", - " callbacks=[CustomCallback()],\n", - ")\n", - "\n", - "res = model.evaluate(\n", - " x_test, y_test, batch_size=128, verbose=0, callbacks=[CustomCallback()]\n", - ")\n", - "\n", - "res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Usage of `logs` dict\n", - "\n", - "The `logs` dict contains the loss value, and all the metrics at the end of a batch or\n", - "epoch. Example includes the loss and mean absolute error." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class LossAndErrorPrintingCallback(keras.callbacks.Callback):\n", - " def on_train_batch_end(self, batch, logs=None):\n", - " print(\n", - " \"Up to batch {}, the average loss is {:7.2f}.\".format(batch, logs[\"loss\"])\n", - " )\n", - "\n", - " def on_test_batch_end(self, batch, logs=None):\n", - " print(\n", - " \"Up to batch {}, the average loss is {:7.2f}.\".format(batch, logs[\"loss\"])\n", - " )\n", - "\n", - " def on_epoch_end(self, epoch, logs=None):\n", - " print(\n", - " \"The average loss for epoch {} is {:7.2f} \"\n", - " \"and mean absolute error is {:7.2f}.\".format(\n", - " epoch, logs[\"loss\"], logs[\"mean_absolute_error\"]\n", - " )\n", - " )\n", - "\n", - "\n", - "model = get_model()\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=128,\n", - " epochs=2,\n", - " verbose=0,\n", - " callbacks=[LossAndErrorPrintingCallback()],\n", - ")\n", - "\n", - "res = model.evaluate(\n", - " x_test,\n", - " y_test,\n", - " batch_size=128,\n", - " verbose=0,\n", - " callbacks=[LossAndErrorPrintingCallback()],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Usage of `self.model` attribute\n", - "\n", - "In addition to receiving log information when one of their methods is called,\n", - "callbacks have access to the model associated with the current round of\n", - "training/evaluation/inference: `self.model`.\n", - "\n", - "Here are a few of the things you can do with `self.model` in a callback:\n", - "\n", - "- Set `self.model.stop_training = True` to immediately interrupt training.\n", - "- Mutate hyperparameters of the optimizer (available as `self.model.optimizer`),\n", - "such as `self.model.optimizer.learning_rate`.\n", - "- Save the model at period intervals.\n", - "- Record the output of `model.predict()` on a few test samples at the end of each\n", - "epoch, to use as a sanity check during training.\n", - "- Extract visualizations of intermediate features at the end of each epoch, to monitor\n", - "what the model is learning over time.\n", - "- etc.\n", - "\n", - "Let's see this in action in a couple of examples." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "## Examples of Keras callback applications\n", - "\n", - "### Early stopping at minimum loss\n", - "\n", - "This first example shows the creation of a `Callback` that stops training when the\n", - "minimum of loss has been reached, by setting the attribute `self.model.stop_training`\n", - "(boolean). Optionally, you can provide an argument `patience` to specify how many\n", - "epochs we should wait before stopping after having reached a local minimum.\n", - "\n", - "`keras.callbacks.EarlyStopping` provides a more complete and general implementation." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class EarlyStoppingAtMinLoss(keras.callbacks.Callback):\n", - " \"\"\"Stop training when the loss is at its min, i.e. the loss stops decreasing.\n", - "\n", - " Arguments:\n", - " patience: Number of epochs to wait after min has been hit. After this\n", - " number of no improvement, training stops.\n", - " \"\"\"\n", - "\n", - " def __init__(self, patience=0):\n", - " super().__init__()\n", - " self.patience = patience\n", - " # best_weights to store the weights at which the minimum loss occurs.\n", - " self.best_weights = None\n", - "\n", - " def on_train_begin(self, logs=None):\n", - " # The number of epoch it has waited when loss is no longer minimum.\n", - " self.wait = 0\n", - " # The epoch the training stops at.\n", - " self.stopped_epoch = 0\n", - " # Initialize the best as infinity.\n", - " self.best = np.Inf\n", - "\n", - " def on_epoch_end(self, epoch, logs=None):\n", - " current = logs.get(\"loss\")\n", - " if np.less(current, self.best):\n", - " self.best = current\n", - " self.wait = 0\n", - " # Record the best weights if current results is better (less).\n", - " self.best_weights = self.model.get_weights()\n", - " else:\n", - " self.wait += 1\n", - " if self.wait >= self.patience:\n", - " self.stopped_epoch = epoch\n", - " self.model.stop_training = True\n", - " print(\"Restoring model weights from the end of the best epoch.\")\n", - " self.model.set_weights(self.best_weights)\n", - "\n", - " def on_train_end(self, logs=None):\n", - " if self.stopped_epoch > 0:\n", - " print(f\"Epoch {self.stopped_epoch + 1}: early stopping\")\n", - "\n", - "\n", - "model = get_model()\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=64,\n", - " epochs=30,\n", - " verbose=0,\n", - " callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Learning rate scheduling\n", - "\n", - "In this example, we show how a custom Callback can be used to dynamically change the\n", - "learning rate of the optimizer during the course of training.\n", - "\n", - "See `callbacks.LearningRateScheduler` for a more general implementations." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab_type": "code" - }, - "outputs": [], - "source": [ - "\n", - "class CustomLearningRateScheduler(keras.callbacks.Callback):\n", - " \"\"\"Learning rate scheduler which sets the learning rate according to schedule.\n", - "\n", - " Arguments:\n", - " schedule: a function that takes an epoch index\n", - " (integer, indexed from 0) and current learning rate\n", - " as inputs and returns a new learning rate as output (float).\n", - " \"\"\"\n", - "\n", - " def __init__(self, schedule):\n", - " super().__init__()\n", - " self.schedule = schedule\n", - "\n", - " def on_epoch_begin(self, epoch, logs=None):\n", - " if not hasattr(self.model.optimizer, \"learning_rate\"):\n", - " raise ValueError('Optimizer must have a \"learning_rate\" attribute.')\n", - " # Get the current learning rate from model's optimizer.\n", - " lr = self.model.optimizer.learning_rate\n", - " # Call schedule function to get the scheduled learning rate.\n", - " scheduled_lr = self.schedule(epoch, lr)\n", - " # Set the value back to the optimizer before this epoch starts\n", - " self.model.optimizer.learning_rate = scheduled_lr\n", - " print(f\"\\nEpoch {epoch}: Learning rate is {float(np.array(scheduled_lr))}.\")\n", - "\n", - "\n", - "LR_SCHEDULE = [\n", - " # (epoch to start, learning rate) tuples\n", - " (3, 0.05),\n", - " (6, 0.01),\n", - " (9, 0.005),\n", - " (12, 0.001),\n", - "]\n", - "\n", - "\n", - "def lr_schedule(epoch, lr):\n", - " \"\"\"Helper function to retrieve the scheduled learning rate based on epoch.\"\"\"\n", - " if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]:\n", - " return lr\n", - " for i in range(len(LR_SCHEDULE)):\n", - " if epoch == LR_SCHEDULE[i][0]:\n", - " return LR_SCHEDULE[i][1]\n", - " return lr\n", - "\n", - "\n", - "model = get_model()\n", - "model.fit(\n", - " x_train,\n", - " y_train,\n", - " batch_size=64,\n", - " epochs=15,\n", - " verbose=0,\n", - " callbacks=[\n", - " LossAndErrorPrintingCallback(),\n", - " CustomLearningRateScheduler(lr_schedule),\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text" - }, - "source": [ - "### Built-in Keras callbacks\n", - "\n", - "Be sure to check out the existing Keras callbacks by\n", - "reading the [API docs](https://keras.io/api/callbacks/).\n", - "Applications include logging to CSV, saving\n", - "the model, visualizing metrics in TensorBoard, and a lot more!" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "writing_your_own_callbacks", - "private_outputs": false, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file From 2e67fa6e9fac8a586344c1b90e57945cdf47542a Mon Sep 17 00:00:00 2001 From: Shingo Kumagai Date: Thu, 4 Apr 2024 13:29:41 +0900 Subject: [PATCH 4/4] Set input dtype to int32 for categorical features in tabtransformer example (#1815) --- examples/structured_data/ipynb/tabtransformer.ipynb | 4 ++-- examples/structured_data/md/tabtransformer.md | 4 ++-- examples/structured_data/tabtransformer.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/structured_data/ipynb/tabtransformer.ipynb b/examples/structured_data/ipynb/tabtransformer.ipynb index d42af71405..421f0e90ec 100644 --- a/examples/structured_data/ipynb/tabtransformer.ipynb +++ b/examples/structured_data/ipynb/tabtransformer.ipynb @@ -409,7 +409,7 @@ " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", - " name=feature_name, shape=(), dtype=\"float32\"\n", + " name=feature_name, shape=(), dtype=\"int32\"\n", " )\n", " return inputs\n", "" @@ -489,7 +489,7 @@ "def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None):\n", " mlp_layers = []\n", " for units in hidden_units:\n", - " mlp_layers.append(normalization_layer()),\n", + " mlp_layers.append(normalization_layer())\n", " mlp_layers.append(layers.Dense(units, activation=activation))\n", " mlp_layers.append(layers.Dropout(dropout_rate))\n", "\n", diff --git a/examples/structured_data/md/tabtransformer.md b/examples/structured_data/md/tabtransformer.md index 9dc552759f..dbf9272546 100644 --- a/examples/structured_data/md/tabtransformer.md +++ b/examples/structured_data/md/tabtransformer.md @@ -303,7 +303,7 @@ def create_model_inputs(): ) else: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype="float32" + name=feature_name, shape=(), dtype="int32" ) return inputs @@ -359,7 +359,7 @@ def encode_inputs(inputs, embedding_dims): def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None): mlp_layers = [] for units in hidden_units: - mlp_layers.append(normalization_layer()), + mlp_layers.append(normalization_layer()) mlp_layers.append(layers.Dense(units, activation=activation)) mlp_layers.append(layers.Dropout(dropout_rate)) diff --git a/examples/structured_data/tabtransformer.py b/examples/structured_data/tabtransformer.py index f2194a0c24..681a542b83 100644 --- a/examples/structured_data/tabtransformer.py +++ b/examples/structured_data/tabtransformer.py @@ -276,7 +276,7 @@ def create_model_inputs(): ) else: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype="float32" + name=feature_name, shape=(), dtype="int32" ) return inputs @@ -328,7 +328,7 @@ def encode_inputs(inputs, embedding_dims): def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None): mlp_layers = [] for units in hidden_units: - mlp_layers.append(normalization_layer()), + mlp_layers.append(normalization_layer()) mlp_layers.append(layers.Dense(units, activation=activation)) mlp_layers.append(layers.Dropout(dropout_rate))