From d79d92e8373455d1b031ae925781dd7a125f0776 Mon Sep 17 00:00:00 2001 From: Maria Khalusova Date: Wed, 24 May 2023 08:13:23 -0400 Subject: [PATCH] Export to ONNX doc refocused on using optimum, added tflite (#23434) * doc refocused on using optimum, tflite * minor updates to fix checks * Apply suggestions from code review Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> * TFLite to separate page, added links * Removed the onnx list builder * make style * Update docs/source/en/serialization.mdx Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> --------- Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> --- docs/source/en/_toctree.yml | 2 + docs/source/en/serialization.mdx | 556 ++++++------------------------- docs/source/en/tflite.mdx | 58 ++++ utils/check_table.py | 47 --- 4 files changed, 170 insertions(+), 493 deletions(-) create mode 100644 docs/source/en/tflite.mdx diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 0325b7f355272b..4c9e06dec5acb2 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -93,6 +93,8 @@ title: Run training on Amazon SageMaker - local: serialization title: Export to ONNX + - local: tflite + title: Export to TFLite - local: torchscript title: Export to TorchScript - local: benchmarks diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx index cc429dea08a5ab..022cf460f808bf 100644 --- a/docs/source/en/serialization.mdx +++ b/docs/source/en/serialization.mdx @@ -12,13 +12,20 @@ specific language governing permissions and limitations under the License. # Export to ONNX -If you need to deploy 🤗 Transformers models in production environments, we recommend -exporting them to a serialized format that can be loaded and executed on specialized -runtimes and hardware. In this guide, we'll show you how to export 🤗 Transformers -models to [ONNX (Open Neural Network eXchange)](http://onnx.ai). +Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into +a serialized format that can be loaded and executed on specialized runtimes and hardware. -ONNX is an open standard that defines a common set of operators and a common file format -to represent deep learning models in a wide variety of frameworks, including PyTorch and +🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats +such as ONNX and TFLite through its `exporters` module. 🤗 Optimum also provides a set of performance optimization tools to train +and run models on targeted hardware with maximum efficiency. + +This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, +please refer to the [Export to TFLite page](tflite). + +## Export to ONNX + +[ONNX (Open Neural Network eXchange)](http://onnx.ai) is an open standard that defines a common set of operators and a +common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and TensorFlow. When a model is exported to the ONNX format, these operators are used to construct a computational graph (often called an _intermediate representation_) which represents the flow of data through the neural network. @@ -27,166 +34,67 @@ By exposing a graph with standardized operators and data types, ONNX makes it ea switch between frameworks. For example, a model trained in PyTorch can be exported to ONNX format and then imported in TensorFlow (and vice versa). -🤗 Transformers provides a [`transformers.onnx`](main_classes/onnx) package that enables -you to convert model checkpoints to an ONNX graph by leveraging configuration objects. -These configuration objects come ready made for a number of model architectures, and are -designed to be easily extendable to other architectures. - - - -You can also export 🤗 Transformers models with the [`optimum.exporters.onnx` package](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) -from 🤗 Optimum. - -Once exported, a model can be: - -- Optimized for inference via techniques such as quantization and graph optimization. -- Run with ONNX Runtime via [`ORTModelForXXX` classes](https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort), +Once exported to ONNX format, a model can be: +- optimized for inference via techniques such as [graph optimization](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization) and [quantization](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization). +- run with ONNX Runtime via [`ORTModelForXXX` classes](https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort), which follow the same `AutoModel` API as the one you are used to in 🤗 Transformers. -- Run with [optimized inference pipelines](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/pipelines), -which has the same API as the [`pipeline`] function in 🤗 Transformers. +- run with [optimized inference pipelines](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/pipelines), +which has the same API as the [`pipeline`] function in 🤗 Transformers. -To explore all these features, check out the [🤗 Optimum library](https://github.com/huggingface/optimum). +🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come +ready-made for a number of model architectures, and are designed to be easily extendable to other architectures. - +For the list of ready-made configurations, please refer to [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/onnx/overview). -Ready-made configurations include the following architectures: - - - -- ALBERT -- BART -- BEiT -- BERT -- BigBird -- BigBird-Pegasus -- Blenderbot -- BlenderbotSmall -- BLOOM -- CamemBERT -- Chinese-CLIP -- CLIP -- CodeGen -- Conditional DETR -- ConvBERT -- ConvNeXT -- Data2VecText -- Data2VecVision -- DeBERTa -- DeBERTa-v2 -- DeiT -- DETR -- DistilBERT -- EfficientNet -- ELECTRA -- ERNIE -- FlauBERT -- GPT Neo -- GPT-J -- GPT-Sw3 -- GroupViT -- I-BERT -- ImageGPT -- LayoutLM -- LayoutLMv3 -- LeViT -- Longformer -- LongT5 -- M2M100 -- Marian -- mBART -- MEGA -- MobileBERT -- MobileNetV1 -- MobileNetV2 -- MobileViT -- MT5 -- OpenAI GPT-2 -- OWL-ViT -- Perceiver -- PLBart -- PoolFormer -- RemBERT -- ResNet -- RoBERTa -- RoBERTa-PreLayerNorm -- RoFormer -- SegFormer -- SqueezeBERT -- SwiftFormer -- Swin Transformer -- T5 -- Table Transformer -- Vision Encoder decoder -- ViT -- Whisper -- X-MOD -- XLM -- XLM-RoBERTa -- XLM-RoBERTa-XL -- YOLOS - -In the next two sections, we'll show you how to: - -* Export a supported model using the `transformers.onnx` package. -* Export a custom model for an unsupported architecture. - -## Exporting a model to ONNX - - - -The recommended way of exporting a model is now to use -[`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli), -do not worry it is very similar to `transformers.onnx`! +There are two ways to export a 🤗 Transformers model to ONNX, here we show both: - +- export with 🤗 Optimum via CLI. +- export with 🤗 Optimum with `optimum.onnxruntime`. -To export a 🤗 Transformers model to ONNX, you'll first need to install some extra -dependencies: +### Exporting a 🤗 Transformers model to ONNX with CLI + +To export a 🤗 Transformers model to ONNX, first install an extra dependency: ```bash -pip install transformers[onnx] +pip install optimum[exporters] ``` -The `transformers.onnx` package can then be used as a Python module: +To check out all available arguments, refer to the [🤗 Optimum docs](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli), +or view help in command line: ```bash -python -m transformers.onnx --help - -usage: Hugging Face Transformers ONNX exporter [-h] -m MODEL [--feature {causal-lm, ...}] [--opset OPSET] [--atol ATOL] output - -positional arguments: - output Path indicating where to store generated ONNX model. - -optional arguments: - -h, --help show this help message and exit - -m MODEL, --model MODEL - Model ID on huggingface.co or path on disk to load model from. - --feature {causal-lm, ...} - The type of features to export the model with. - --opset OPSET ONNX opset version to export the model with. - --atol ATOL Absolute difference tolerance when validating the model. +optimum-cli export onnx --help ``` -Exporting a checkpoint using a ready-made configuration can be done as follows: +To export a model's checkpoint from the 🤗 Hub, for example, `distilbert-base-uncased-distilled-squad`, run the following command: ```bash -python -m transformers.onnx --model=distilbert-base-uncased onnx/ +optimum-cli export onnx --model distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/ ``` -You should see the following logs: +You should see the logs indicating progress and showing where the resulting `model.onnx` is saved, like this: ```bash -Validating ONNX model... - -[✓] ONNX model output names match reference model ({'last_hidden_state'}) - - Validating ONNX Model output "last_hidden_state": - -[✓] (2, 8, 768) matches (2, 8, 768) - -[✓] all values close (atol: 1e-05) -All good, model saved at: onnx/model.onnx -``` +Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx... + -[✓] ONNX model output names match reference model (start_logits, end_logits) + - Validating ONNX Model output "start_logits": + -[✓] (2, 16) matches (2, 16) + -[✓] all values close (atol: 0.0001) + - Validating ONNX Model output "end_logits": + -[✓] (2, 16) matches (2, 16) + -[✓] all values close (atol: 0.0001) +The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx +``` + +The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you +saved both the model's weights and tokenizer files in the same directory (`local_path`). When using CLI, pass the +`local_path` to the `model` argument instead of the checkpoint name on 🤗 Hub and provide the `--task` argument. +You can review the list of supported tasks in the [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/task_manager). +If `task` argument is not provided, it will default to the model architecture without any task specific head. -This exports an ONNX graph of the checkpoint defined by the `--model` argument. In this -example, it is `distilbert-base-uncased`, but it can be any checkpoint on the Hugging -Face Hub or one that's stored locally. +```bash +optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/ +``` The resulting `model.onnx` file can then be run on one of the [many accelerators](https://onnx.ai/supported-tools.html#deployModel) that support the ONNX @@ -195,348 +103,104 @@ Runtime](https://onnxruntime.ai/) as follows: ```python >>> from transformers import AutoTokenizer ->>> from onnxruntime import InferenceSession - ->>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") ->>> session = InferenceSession("onnx/model.onnx") ->>> # ONNX Runtime expects NumPy arrays as input ->>> inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np") ->>> outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs)) -``` - -The required output names (like `["last_hidden_state"]`) can be obtained by taking a -look at the ONNX configuration of each model. For example, for DistilBERT we have: - -```python ->>> from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig - ->>> config = DistilBertConfig() ->>> onnx_config = DistilBertOnnxConfig(config) ->>> print(list(onnx_config.outputs.keys())) -["last_hidden_state"] -``` +>>> from optimum.onnxruntime import ORTModelForQuestionAnswering -The process is identical for TensorFlow checkpoints on the Hub. For example, we can -export a pure TensorFlow checkpoint from the [Keras -organization](https://huggingface.co/keras-io) as follows: - -```bash -python -m transformers.onnx --model=keras-io/transformers-qa onnx/ -``` - -To export a model that's stored locally, you'll need to have the model's weights and -tokenizer files stored in a directory. For example, we can load and save a checkpoint as -follows: - - -```python ->>> from transformers import AutoTokenizer, AutoModelForSequenceClassification - ->>> # Load tokenizer and PyTorch weights form the Hub ->>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") ->>> pt_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") ->>> # Save to disk ->>> tokenizer.save_pretrained("local-pt-checkpoint") ->>> pt_model.save_pretrained("local-pt-checkpoint") -``` - -Once the checkpoint is saved, we can export it to ONNX by pointing the `--model` -argument of the `transformers.onnx` package to the desired directory: - -```bash -python -m transformers.onnx --model=local-pt-checkpoint onnx/ -``` - -```python ->>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification - ->>> # Load tokenizer and TensorFlow weights from the Hub ->>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") ->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") ->>> # Save to disk ->>> tokenizer.save_pretrained("local-tf-checkpoint") ->>> tf_model.save_pretrained("local-tf-checkpoint") +>>> tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx") +>>> model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx") +>>> inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt") +>>> outputs = model(**inputs) ``` -Once the checkpoint is saved, we can export it to ONNX by pointing the `--model` -argument of the `transformers.onnx` package to the desired directory: +The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would +export a pure TensorFlow checkpoint from the [Keras organization](https://huggingface.co/keras-io): ```bash -python -m transformers.onnx --model=local-tf-checkpoint onnx/ +optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/ ``` - -## Selecting features for different model tasks +### Exporting a 🤗 Transformers model to ONNX with `optimum.onnxruntime` - - -The recommended way of exporting a model is now to use `optimum.exporters.onnx`. -You can check the [🤗 Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#selecting-a-task) -to learn how to select a task. - - - -Each ready-made configuration comes with a set of _features_ that enable you to export -models for different types of tasks. As shown in the table below, each feature is -associated with a different `AutoClass`: - -| Feature | Auto Class | -| ------------------------------------ | ------------------------------------ | -| `causal-lm`, `causal-lm-with-past` | `AutoModelForCausalLM` | -| `default`, `default-with-past` | `AutoModel` | -| `masked-lm` | `AutoModelForMaskedLM` | -| `question-answering` | `AutoModelForQuestionAnswering` | -| `seq2seq-lm`, `seq2seq-lm-with-past` | `AutoModelForSeq2SeqLM` | -| `sequence-classification` | `AutoModelForSequenceClassification` | -| `token-classification` | `AutoModelForTokenClassification` | - -For each configuration, you can find the list of supported features via the -[`~transformers.onnx.FeaturesManager`]. For example, for DistilBERT we have: +Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: ```python ->>> from transformers.onnx.features import FeaturesManager - ->>> distilbert_features = list(FeaturesManager.get_supported_features_for_model_type("distilbert").keys()) ->>> print(distilbert_features) -["default", "masked-lm", "causal-lm", "sequence-classification", "token-classification", "question-answering"] -``` - -You can then pass one of these features to the `--feature` argument in the -`transformers.onnx` package. For example, to export a text-classification model we can -pick a fine-tuned model from the Hub and run: +>>> from optimum.onnxruntime import ORTModelForSequenceClassification +>>> from transformers import AutoTokenizer -```bash -python -m transformers.onnx --model=distilbert-base-uncased-finetuned-sst-2-english \ - --feature=sequence-classification onnx/ -``` +>>> model_checkpoint = "distilbert_base_uncased_squad" +>>> save_directory = "onnx/" -This displays the following logs: +>>> # Load a model from transformers and export it to ONNX +>>> ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True) +>>> tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) -```bash -Validating ONNX model... - -[✓] ONNX model output names match reference model ({'logits'}) - - Validating ONNX Model output "logits": - -[✓] (2, 2) matches (2, 2) - -[✓] all values close (atol: 1e-05) -All good, model saved at: onnx/model.onnx +>>> # Save the onnx model and tokenizer +>>> ort_model.save_pretrained(save_directory) +>>> tokenizer.save_pretrained(save_directory) ``` -Notice that in this case, the output names from the fine-tuned model are `logits` -instead of the `last_hidden_state` we saw with the `distilbert-base-uncased` checkpoint -earlier. This is expected since the fine-tuned model has a sequence classification head. - - - -The features that have a `with-past` suffix (like `causal-lm-with-past`) correspond to -model classes with precomputed hidden states (key and values in the attention blocks) -that can be used for fast autoregressive decoding. - - - - - -For `VisionEncoderDecoder` type models, the encoder and decoder parts are -exported separately as two ONNX files named `encoder_model.onnx` and `decoder_model.onnx` respectively. - - - - -## Exporting a model for an unsupported architecture - - +### Exporting a model for an unsupported architecture If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is -supported in [`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/package_reference/configuration#supported-architectures), -and if it is not, [contribute to 🤗 Optimum](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/contribute) +supported in [`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/exporters/onnx/overview), +and if it is not, [contribute to 🤗 Optimum](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/contribute) directly. - - -If you wish to export a model whose architecture is not natively supported by the -library, there are three main steps to follow: - -1. Implement a custom ONNX configuration. -2. Export the model to ONNX. -3. Validate the outputs of the PyTorch and exported models. - -In this section, we'll look at how DistilBERT was implemented to show what's involved -with each step. - -### Implementing a custom ONNX configuration - -Let's start with the ONNX configuration object. We provide three abstract classes that -you should inherit from, depending on the type of model architecture you wish to export: - -* Encoder-based models inherit from [`~onnx.config.OnnxConfig`] -* Decoder-based models inherit from [`~onnx.config.OnnxConfigWithPast`] -* Encoder-decoder models inherit from [`~onnx.config.OnnxSeq2SeqConfigWithPast`] - - - -A good way to implement a custom ONNX configuration is to look at the existing -implementation in the `configuration_.py` file of a similar architecture. - - - -Since DistilBERT is an encoder-based model, its configuration inherits from -`OnnxConfig`: +### Exporting a model with `transformers.onnx` -```python ->>> from typing import Mapping, OrderedDict ->>> from transformers.onnx import OnnxConfig - - ->>> class DistilBertOnnxConfig(OnnxConfig): -... @property -... def inputs(self) -> Mapping[str, Mapping[int, str]]: -... return OrderedDict( -... [ -... ("input_ids", {0: "batch", 1: "sequence"}), -... ("attention_mask", {0: "batch", 1: "sequence"}), -... ] -... ) -``` - -Every configuration object must implement the `inputs` property and return a mapping, -where each key corresponds to an expected input, and each value indicates the axis of -that input. For DistilBERT, we can see that two inputs are required: `input_ids` and -`attention_mask`. These inputs have the same shape of `(batch_size, sequence_length)` -which is why we see the same axes used in the configuration. + - - -Notice that `inputs` property for `DistilBertOnnxConfig` returns an `OrderedDict`. This -ensures that the inputs are matched with their relative position within the -`PreTrainedModel.forward()` method when tracing the graph. We recommend using an -`OrderedDict` for the `inputs` and `outputs` properties when implementing custom ONNX -configurations. +`tranformers.onnx` is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions. -Once you have implemented an ONNX configuration, you can instantiate it by providing the -base model's configuration as follows: - -```python ->>> from transformers import AutoConfig - ->>> config = AutoConfig.from_pretrained("distilbert-base-uncased") ->>> onnx_config = DistilBertOnnxConfig(config) -``` - -The resulting object has several useful properties. For example, you can view the ONNX -operator set that will be used during the export: - -```python ->>> print(onnx_config.default_onnx_opset) -11 -``` - -You can also view the outputs associated with the model as follows: +To export a 🤗 Transformers model to ONNX with `tranformers.onnx`, install extra dependencies: -```python ->>> print(onnx_config.outputs) -OrderedDict([("last_hidden_state", {0: "batch", 1: "sequence"})]) +```bash +pip install transformers[onnx] ``` -Notice that the outputs property follows the same structure as the inputs; it returns an -`OrderedDict` of named outputs and their shapes. The output structure is linked to the -choice of feature that the configuration is initialised with. By default, the ONNX -configuration is initialized with the `default` feature that corresponds to exporting a -model loaded with the `AutoModel` class. If you want to export a model for another task, -just provide a different feature to the `task` argument when you initialize the ONNX -configuration. For example, if we wished to export DistilBERT with a sequence -classification head, we could use: +Use `transformers.onnx` package as a Python module to export a checkpoint using a ready-made configuration: -```python ->>> from transformers import AutoConfig - ->>> config = AutoConfig.from_pretrained("distilbert-base-uncased") ->>> onnx_config_for_seq_clf = DistilBertOnnxConfig(config, task="sequence-classification") ->>> print(onnx_config_for_seq_clf.outputs) -OrderedDict([('logits', {0: 'batch'})]) +```bash +python -m transformers.onnx --model=distilbert-base-uncased onnx/ ``` - - -All of the base properties and methods associated with [`~onnx.config.OnnxConfig`] and -the other configuration classes can be overridden if needed. Check out [`BartOnnxConfig`] -for an advanced example. - - - -### Exporting the model - -Once you have implemented the ONNX configuration, the next step is to export the model. -Here we can use the `export()` function provided by the `transformers.onnx` package. -This function expects the ONNX configuration, along with the base model and tokenizer, -and the path to save the exported file: +This exports an ONNX graph of the checkpoint defined by the `--model` argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally. +The resulting `model.onnx` file can then be run on one of the many accelerators that support the ONNX standard. For example, +load and run the model with ONNX Runtime as follows: ```python ->>> from pathlib import Path ->>> from transformers.onnx import export ->>> from transformers import AutoTokenizer, AutoModel - ->>> onnx_path = Path("model.onnx") ->>> model_ckpt = "distilbert-base-uncased" ->>> base_model = AutoModel.from_pretrained(model_ckpt) ->>> tokenizer = AutoTokenizer.from_pretrained(model_ckpt) +>>> from transformers import AutoTokenizer +>>> from onnxruntime import InferenceSession ->>> onnx_inputs, onnx_outputs = export(tokenizer, base_model, onnx_config, onnx_config.default_onnx_opset, onnx_path) +>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") +>>> session = InferenceSession("onnx/model.onnx") +>>> # ONNX Runtime expects NumPy arrays as input +>>> inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np") +>>> outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs)) ``` -The `onnx_inputs` and `onnx_outputs` returned by the `export()` function are lists of -the keys defined in the `inputs` and `outputs` properties of the configuration. Once the -model is exported, you can test that the model is well formed as follows: +The required output names (like `["last_hidden_state"]`) can be obtained by taking a look at the ONNX configuration of +each model. For example, for DistilBERT we have: ```python ->>> import onnx +>>> from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig ->>> onnx_model = onnx.load("model.onnx") ->>> onnx.checker.check_model(onnx_model) +>>> config = DistilBertConfig() +>>> onnx_config = DistilBertOnnxConfig(config) +>>> print(list(onnx_config.outputs.keys())) +["last_hidden_state"] ``` - +The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so: -If your model is larger than 2GB, you will see that many additional files are created -during the export. This is _expected_ because ONNX uses [Protocol -Buffers](https://developers.google.com/protocol-buffers/) to store the model and these -have a size limit of 2GB. See the [ONNX -documentation](https://github.com/onnx/onnx/blob/master/docs/ExternalData.md) for -instructions on how to load models with external data. - - - -### Validating the model outputs - -The final step is to validate that the outputs from the base and exported model agree -within some absolute tolerance. Here we can use the `validate_model_outputs()` function -provided by the `transformers.onnx` package as follows: - -```python ->>> from transformers.onnx import validate_model_outputs - ->>> validate_model_outputs( -... onnx_config, tokenizer, base_model, onnx_path, onnx_outputs, onnx_config.atol_for_validation -... ) +```bash +python -m transformers.onnx --model=keras-io/transformers-qa onnx/ ``` -This function uses the [`~transformers.onnx.OnnxConfig.generate_dummy_inputs`] method to -generate inputs for the base and exported model, and the absolute tolerance can be -defined in the configuration. We generally find numerical agreement in the 1e-6 to 1e-4 -range, although anything smaller than 1e-3 is likely to be OK. - -## Contributing a new configuration to 🤗 Transformers +To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. `local-pt-checkpoint`), +then export it to ONNX by pointing the `--model` argument of the `transformers.onnx` package to the desired directory: -We are looking to expand the set of ready-made configurations and welcome contributions -from the community! If you would like to contribute your addition to the library, you -will need to: - -* Implement the ONNX configuration in the corresponding `configuration_.py` -file -* Include the model architecture and corresponding features in - [`~onnx.features.FeatureManager`] -* Add your model architecture to the tests in `test_onnx_v2.py` - -Check out how the configuration for [IBERT was -contributed](https://github.com/huggingface/transformers/pull/14868/files) to get an -idea of what's involved. +```bash +python -m transformers.onnx --model=local-pt-checkpoint onnx/ +``` \ No newline at end of file diff --git a/docs/source/en/tflite.mdx b/docs/source/en/tflite.mdx new file mode 100644 index 00000000000000..23e08478ba82af --- /dev/null +++ b/docs/source/en/tflite.mdx @@ -0,0 +1,58 @@ + + +# Export to TFLite + +[TensorFlow Lite](https://www.tensorflow.org/lite/guide) is a lightweight framework for deploying machine learning models +on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. +TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and +power consumption. +A TensorFlow Lite model is represented in a special efficient portable format identified by the `.tflite` file extension. + +🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the `exporters.tflite` module. +For the list of supported model architectures, please refer to [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/tflite/overview). + +To export a model to TFLite, install the required dependencies: + +```bash +pip install optimum[exporters-tf] +``` + +To check out all available arguments, refer to the [🤗 Optimum docs](https://huggingface.co/docs/optimum/main/en/exporters/tflite/usage_guides/export_a_model), +or view help in command line: + +```bash +optimum-cli export tflite --help +``` + +To export a model's checkpoint from the 🤗 Hub, for example, `bert-base-uncased`, run the following command: + +```bash +optimum-cli export tflite --model bert-base-uncased --sequence_length 128 bert_tflite/ +``` + +You should see the logs indicating progress and showing where the resulting `model.tflite` is saved, like this: + +```bash +Validating TFLite model... + -[✓] TFLite model output names match reference model (logits) + - Validating TFLite Model output "logits": + -[✓] (1, 128, 30522) matches (1, 128, 30522) + -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05) +The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05: +- logits: max diff = 5.817413330078125e-05. + The exported model was saved at: bert_tflite + ``` + +The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you +saved both the model's weights and tokenizer files in the same directory (`local_path`). When using CLI, pass the +`local_path` to the `model` argument instead of the checkpoint name on 🤗 Hub. \ No newline at end of file diff --git a/utils/check_table.py b/utils/check_table.py index e7e31cfee3bc79..80593881a39ccc 100644 --- a/utils/check_table.py +++ b/utils/check_table.py @@ -173,56 +173,9 @@ def check_model_table(overwrite=False): ) -def has_onnx(model_type): - """ - Returns whether `model_type` is supported by ONNX (by checking if there is an ONNX config) or not. - """ - config_mapping = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING - if model_type not in config_mapping: - return False - config = config_mapping[model_type] - config_module = config.__module__ - module = transformers_module - for part in config_module.split(".")[1:]: - module = getattr(module, part) - config_name = config.__name__ - onnx_config_name = config_name.replace("Config", "OnnxConfig") - return hasattr(module, onnx_config_name) - - -def get_onnx_model_list(): - """ - Return the list of models supporting ONNX. - """ - config_mapping = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING - model_names = config_mapping = transformers_module.models.auto.configuration_auto.MODEL_NAMES_MAPPING - onnx_model_types = [model_type for model_type in config_mapping.keys() if has_onnx(model_type)] - onnx_model_names = [model_names[model_type] for model_type in onnx_model_types] - onnx_model_names.sort(key=lambda x: x.upper()) - return "\n".join([f"- {name}" for name in onnx_model_names]) + "\n" - - -def check_onnx_model_list(overwrite=False): - """Check the model list in the serialization.mdx is consistent with the state of the lib and maybe `overwrite`.""" - current_list, start_index, end_index, lines = _find_text_in_file( - filename=os.path.join(PATH_TO_DOCS, "serialization.mdx"), - start_prompt="", - end_prompt="In the next two sections, we'll show you how to:", - ) - new_list = get_onnx_model_list() - - if current_list != new_list: - if overwrite: - with open(os.path.join(PATH_TO_DOCS, "serialization.mdx"), "w", encoding="utf-8", newline="\n") as f: - f.writelines(lines[:start_index] + [new_list] + lines[end_index:]) - else: - raise ValueError("The list of ONNX-supported models needs an update. Run `make fix-copies` to fix this.") - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") args = parser.parse_args() check_model_table(args.fix_and_overwrite) - check_onnx_model_list(args.fix_and_overwrite)