24
24
import torch
25
25
from vllm import LLM , SamplingParams
26
26
27
- MODELS = ["deepseek-ai/DeepSeek-V2-Lite " ]
27
+ MODELS = ["Qwen/Qwen2.5-0.5B-Instruct " ]
28
28
29
29
30
30
@pytest .mark .parametrize ("model" , MODELS )
@@ -34,34 +34,49 @@ def test_models(
34
34
max_tokens : int ,
35
35
monkeypatch : pytest .MonkeyPatch ,
36
36
) -> None :
37
- return
38
-
39
37
prompts = "The president of the United States is"
40
38
41
39
sampling_params = SamplingParams (
42
40
max_tokens = max_tokens ,
43
41
temperature = 0.0 ,
44
42
)
45
43
46
- vllm_model = LLM (model , long_prefill_token_threshold = 4 , enforce_eager = True )
47
- output_chunked = vllm_model .generate (prompts , sampling_params )
48
- logprobs_chunked = output_chunked .outputs [0 ].logprobs
44
+ vllm_model = LLM (model ,
45
+ long_prefill_token_threshold = 20 ,
46
+ enforce_eager = True ,
47
+ trust_remote_code = True )
48
+ output1 = vllm_model .generate (prompts , sampling_params )
49
49
del vllm_model
50
50
torch .npu .empty_cache ()
51
51
52
52
vllm_model = LLM (model ,
53
53
enforce_eager = True ,
54
+ trust_remote_code = True ,
54
55
additional_config = {
55
56
'ascend_scheduler_config' : {
56
57
'enabled' : True
57
58
},
58
59
})
59
- output = vllm_model .generate (prompts , sampling_params )
60
- logprobs = output .outputs [0 ].logprobs
60
+ output2 = vllm_model .generate (prompts , sampling_params )
61
61
del vllm_model
62
62
torch .npu .empty_cache ()
63
63
64
- logprobs_similarity = torch .cosine_similarity (logprobs_chunked .flatten (),
65
- logprobs .flatten (),
66
- dim = 0 )
67
- assert logprobs_similarity > 0.95
64
+ # Extract the generated token IDs for comparison
65
+ token_ids1 = output1 [0 ].outputs [0 ].token_ids
66
+ token_ids2 = output2 [0 ].outputs [0 ].token_ids
67
+
68
+ print (f"Token IDs 1: { token_ids1 } " )
69
+ print (f"Token IDs 2: { token_ids2 } " )
70
+
71
+ # Convert token IDs to tensors and calculate cosine similarity
72
+ # Take the length of a shorter sequence to ensure consistent dimensions
73
+ min_len = min (len (token_ids1 ), len (token_ids2 ))
74
+
75
+ tensor1 = torch .tensor (token_ids1 [:min_len ], dtype = torch .float32 )
76
+ tensor2 = torch .tensor (token_ids2 [:min_len ], dtype = torch .float32 )
77
+
78
+ # Calculate similarity using torch.cosine_similarity
79
+ similarity = torch .cosine_similarity (tensor1 , tensor2 , dim = 0 )
80
+ print (f"Token IDs cosine similarity: { similarity .item ()} " )
81
+
82
+ assert similarity > 0.95
0 commit comments