Skip to content

Al 160 add onnx support #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 40 additions & 9 deletions src/transformers/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from abc import ABC, abstractmethod
from contextlib import contextmanager
from itertools import chain
from multiprocessing import cpu_count
from os.path import abspath, exists
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from uuid import UUID
Expand Down Expand Up @@ -512,6 +513,8 @@ def __init__(
args_parser: ArgumentHandler = None,
device: int = -1,
binary_output: bool = False,
use_onnx: bool = False,
onnx_path: Optional[str] = None,
):

if framework is None:
Expand All @@ -523,6 +526,8 @@ def __init__(
self.modelcard = modelcard
self.framework = framework
self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else "cuda:{}".format(device))
self.use_onnx = use_onnx
self.onnx_path = onnx_path
self.binary_output = binary_output
self._args_parser = args_parser or DefaultArgumentHandler()

Expand Down Expand Up @@ -1585,6 +1590,29 @@ def __init__(
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING if self.framework == "tf" else MODEL_FOR_QUESTION_ANSWERING_MAPPING
)

if self.use_onnx:
# Do onnx loading here
# Set env variables if not set - optional: make it configurable
os.environ.setdefault("OMP_NUM_THREADS", str(cpu_count()))
os.environ.setdefault("OMP_WAIT_POLICY", "ACTIVE")
# Import onnxrumtime objects
from onnxruntime import ExecutionMode, GraphOptimizationLevel, InferenceSession, SessionOptions

# Setup ONNX config params
onnx_exec_mode = kwargs.pop("onnx_exec_mode", ExecutionMode.ORT_SEQUENTIAL)
onnx_inter_op_num_threads = kwargs.pop("onnx_inter_op_num_threads", 1)
onnx_graph_optimization_level = kwargs.pop(
"onnx_graph_optimization_level", GraphOptimizationLevel.ORT_ENABLE_ALL
)
onnx_exec_providers = kwargs.pop("onnx_exec_providers", ["CPUExecutionProvider"])
# Configure options
options = SessionOptions()
options.execution_mode = onnx_exec_mode
options.inter_op_num_threads = onnx_inter_op_num_threads
options.graph_optimization_level = onnx_graph_optimization_level
# Load model
self.model = InferenceSession(self.onnx_path, options, providers=onnx_exec_providers)

@staticmethod
def create_sample(
question: Union[str, List[str]], context: Union[str, List[str]]
Expand Down Expand Up @@ -1687,16 +1715,19 @@ def __call__(self, *args, **kwargs):

# Manage tensor allocation on correct device
with self.device_placement():
if self.framework == "tf":
fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
start, end = self.model(fw_args)[:2]
start, end = start.numpy(), end.numpy()
if not self.use_onnx:
if self.framework == "tf":
fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
start, end = self.model(fw_args)[:2]
start, end = start.numpy(), end.numpy()
else:
with torch.no_grad():
# Retrieve the score for the context tokens only (removing question tokens)
fw_args = {k: torch.tensor(v, device=self.device) for (k, v) in fw_args.items()}
start, end = self.model(**fw_args)[:2]
start, end = start.cpu().numpy(), end.cpu().numpy()
else:
with torch.no_grad():
# Retrieve the score for the context tokens only (removing question tokens)
fw_args = {k: torch.tensor(v, device=self.device) for (k, v) in fw_args.items()}
start, end = self.model(**fw_args)[:2]
start, end = start.cpu().numpy(), end.cpu().numpy()
start, end = self.model.run(None, fw_args)[:2]

min_null_score = 1000000 # large and positive
answers = []
Expand Down
8 changes: 8 additions & 0 deletions tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,14 @@ def test_torch_question_answering(self):
nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name)
self._test_qa_pipeline(nlp)

# Uncomment when onnx model available
# model_name = "deepset/bert-base-cased-squad2"
# use_onnx = True
# onnx_path = "/Users/binoydalal/Downloads/bert-base-cased-squad2-optimized-quantized.onnx"
# nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name, use_onnx=use_onnx,
# onnx_path=onnx_path)
# self._test_qa_pipeline(nlp)

@require_tf
def test_tf_question_answering(self):
for model_name in QA_FINETUNED_MODELS:
Expand Down