Skip to content

Commit

Permalink
[TRANSFORMATIONS] Introduce a new method of testing SDPAToPA transfor…
Browse files Browse the repository at this point in the history
…mation (#27067)

[TRANSFORMATIONS] Introduce a new method of testing SDPAToPA
transformation

Introduce a new method of testing the SDPAToPA transformation by not
only checking if PagedAttentionExtension nodes appeared in a graph, but
also check if the changes in number of nodes involved in the
transformation aligned with the reference numbers for each model.

Add a script for fast generation of reference values.

Signed-off-by: Andrii Staikov <andrii.staikov@intel.com>

- Tickets:
	* CVS-152290
  • Loading branch information
CuriousPanCake authored Oct 19, 2024
1 parent f33e255 commit c5025cc
Show file tree
Hide file tree
Showing 5 changed files with 731 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/job_pytorch_models_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ jobs:
if: ${{ inputs.model_scope == 'precommit' }}
run: |
export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH
python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2
python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -vvv -s --tb=short -n 2
env:
TEST_DEVICE: CPU
USE_SYSTEM_CACHE: False
Expand Down
94 changes: 94 additions & 0 deletions tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

'''
Use this script if you need to regenerate reference diffs for each model
to test SDPAToPA transformation.
The script will produce sdpa2pa_ref_diff.txt (or sdpa2pa_ref_diff_cache_eviction.txt
if using cache-eviction) containing a map in the
following format with nodes number changes for each model:
ref_diff_map = {
"hf-internal-testing/tiny-random-LlamaForCausalLM" : {
"PagedAttentionExtension" : 2,
"ScaledDotProductAttention" : -2,
"Parameter" : 7,
"ReadValue" : -4,
"Assign" : -4,
},
"hf-internal-testing/tiny-random-CohereForCausalLM" : {
"PagedAttentionExtension" : 2,
"ScaledDotProductAttention" : -2,
"Parameter" : 7,
"ReadValue" : -4,
"Assign" : -4,
},
.
.
.
}
The map has to be pasted into sdpa2pa_ref_diff.py (same directory) for
includes to test SDPAToPA transformation.
Run the script by using 'python generate_ref_diffs.py' or 'python generate_ref_diffs.py True'
for generating the same map, but utilizing cache-eviction.
'''

import os
import sys
from pathlib import Path
import models_hub_common.utils as utils
from openvino._offline_transformations import paged_attention_transformation
from openvino._pyopenvino.op import _PagedAttentionExtension, Parameter, Result
from optimum.intel import OVModelForCausalLM

nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign")

def main():
use_cache_eviction = False
if len(sys.argv) >= 2:
use_cache_eviction = sys.argv[1].lower() in 'true'

OUTPUT_FILE = Path(os.path.join(os.path.dirname(__file__)), 'sdpa2pa_ref_diff' + ('_cache_eviction.txt' if use_cache_eviction else '.txt'))

if OUTPUT_FILE.exists() and OUTPUT_FILE.is_file():
OUTPUT_FILE.unlink()

with open(OUTPUT_FILE, 'w') as file:
model_list = utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))
print(OUTPUT_FILE)
print('ref_diff_map_cache_eviction = {' if use_cache_eviction else 'ref_diff_map = {', file=file)

for model_id, _, _, _ in model_list:
# wrapping in try/catch block to continue printing models even if one has failed
try:
model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True)
except:
continue

before_map = {}
for op in model.model.get_ordered_ops():
if op.get_type_name() in nodes_to_compare:
before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1

# wrapping in try/catch block to continue printing models even if one has failed
try:
paged_attention_transformation(model.model, use_cache_eviction, use_cache_eviction)
except:
continue

after_map = {}
for op in model.model.get_ordered_ops():
if op.get_type_name() in nodes_to_compare:
after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1

print(f'\t"{model_id}" : {{', file=file)
for op in set(after_map.keys()) | set(before_map.keys()):
print(f'\t\t"{op}" : {after_map.get(op, 0) - before_map.get(op, 0)},', file=file)
print('\t},', file=file)
print('}', file=file)

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,4 @@ Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi
facebook/opt-125m,https://huggingface.co/facebook/opt-125m
facebook/opt-350m,https://huggingface.co/facebook/opt-350m
katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
hf-internal-testing/tiny-random-BioGptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BioGptForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820
hf-internal-testing/tiny-random-XGLMForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-XGLMForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820
katuni4ka/tiny-random-orion,https://huggingface.co/katuni4ka/tiny-random-orion,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820
katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
Loading

0 comments on commit c5025cc

Please sign in to comment.