File tree Expand file tree Collapse file tree 6 files changed +9
-5
lines changed
examples/onnxrt/nlp/huggingface_model
question_answering/quantization/ptq_static
text_generation/llama/quantization Expand file tree Collapse file tree 6 files changed +9
-5
lines changed Original file line number Diff line number Diff line change @@ -483,6 +483,8 @@ def eval_func(model, *args):
483483 if model_args .model_name_or_path == 'mrm8488/spanbert-finetuned-squadv1' :
484484 fp32_op_names = ['/bert/embeddings/word_embeddings/Gather' ,
485485 '/bert/encoder/layer.[5-7|9]/output/dense/MatMul' ]
486+ elif model_args .model_name_or_path == 'salti/bert-base-multilingual-cased-finetuned-squad' :
487+ fp32_op_names = ['/bert/encoder/layer.[4-5]/output/dense/MatMul' ]
486488 elif model_args .model_name_or_path == 'distilbert-base-uncased-distilled-squad' :
487489 fp32_op_names = ['/distilbert/transformer/layer.[1-5]/ffn/lin[1-2]/MatMul' ]
488490 elif model_args .model_name_or_path == 'deepset/roberta-large-squad2' :
Original file line number Diff line number Diff line change 2525import onnxruntime as ort
2626from torch .nn .functional import pad
2727from torch .utils .data import DataLoader
28- from intel_extension_for_transformers .evaluation .lm_eval import evaluate
28+ from intel_extension_for_transformers .llm . evaluation .lm_eval import evaluate
2929from optimum .onnxruntime import ORTModelForCausalLM
3030from transformers import LlamaConfig , LlamaTokenizer
3131
Original file line number Diff line number Diff line change 1- git+https://github.com/intel/intel-extension-for-transformers.git@b8302f99a93e5f09a80431cee2fb384755062664
21git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
2+ intel-extension-for-transformers
33torch
44transformers
55accelerate
Original file line number Diff line number Diff line change @@ -12,6 +12,8 @@ pip install -r requirements.txt
1212```
1313> Note: Validated ONNX Runtime [ Version] ( /docs/source/installation_guide.md#validated-software-environment ) .
1414
15+ > Note: Weight-only quantization in Intel® Neural Compressor is still under development. We encourage you to use the ` master ` branch to access the latest features.
16+
1517## 2. Prepare Model
1618
1719``` bash
Original file line number Diff line number Diff line change 2626import onnxruntime as ort
2727from torch .nn .functional import pad
2828from torch .utils .data import DataLoader
29- from intel_extension_for_transformers .evaluation .lm_eval import evaluate
29+ from intel_extension_for_transformers .llm . evaluation .lm_eval import evaluate
3030from optimum .onnxruntime import ORTModelForCausalLM
3131from transformers import LlamaConfig , LlamaTokenizer
3232
Original file line number Diff line number Diff line change 1- git+https://github.com/intel/intel-extension-for-transformers.git@b8302f99a93e5f09a80431cee2fb384755062664
21git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
2+ intel-extension-for-transformers
33torch
44transformers
55accelerate
@@ -8,4 +8,4 @@ onnxruntime
88onnxruntime-extensions ; python_version < '3.11'
99datasets
1010optimum
11- evaluate
11+ evaluate
You can’t perform that action at this time.
0 commit comments