4848 torch .float32 : (5e-3 , 5e-3 ),
4949 torch .bfloat16 : (3e-2 , 2e-2 ),
5050}
51- # TODO: Modify this based on platform
52- DEVICES = [
51+
52+ pytestmark = pytest .mark .skipif (
53+ not (current_platform .is_cuda_alike () or current_platform .is_cpu ()),
54+ reason = "Backend not supported" )
55+
56+ DEVICES = ([
5357 f"cuda:{ i } " for i in range (1 if torch .cuda .device_count () == 1 else 2 )
54- ]
58+ ] if current_platform . is_cuda_alike () else [ "cpu" ])
5559
5660#For GPU, we will launch different triton kernels between the prefill and decode
5761# stages, so we need to verify this. prefill stage(True) or decode stage(False)
@@ -198,6 +202,10 @@ def check_punica_wrapper(punica_wrapper) -> bool:
198202 from vllm .lora .punica_wrapper .punica_gpu import PunicaWrapperGPU
199203
200204 return type (punica_wrapper ) is PunicaWrapperGPU
205+ elif current_platform .is_cpu ():
206+ from vllm .lora .punica_wrapper .punica_cpu import PunicaWrapperCPU
207+
208+ return type (punica_wrapper ) is PunicaWrapperCPU
201209 else :
202210 return False
203211
@@ -211,7 +219,8 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
211219 # For multi-GPU testing of Triton kernel, we must explicitly set the CUDA
212220 # device, see: https://github.com/triton-lang/triton/issues/2925
213221 # Same below.
214- torch .cuda .set_device (device )
222+ if current_platform .is_cuda_alike ():
223+ torch .cuda .set_device (device )
215224
216225 torch .set_default_device (device )
217226 max_loras = 8
@@ -313,7 +322,9 @@ def create_random_embedding_layer():
313322def test_embeddings_with_new_embeddings (dist_init , num_loras , device ,
314323 vocab_size , stage ) -> None :
315324
316- torch .cuda .set_device (device )
325+ if current_platform .is_cuda_alike ():
326+ torch .cuda .set_device (device )
327+
317328 torch .set_default_device (device )
318329 max_loras = 8
319330 punica_wrapper = get_punica_wrapper (8192 , 256 , device )
@@ -450,7 +461,9 @@ def create_random_embedding_layer():
450461def test_lm_head_logits_processor (dist_init , num_loras , device , vocab_size ,
451462 stage ) -> None :
452463
453- torch .cuda .set_device (device )
464+ if current_platform .is_cuda_alike ():
465+ torch .cuda .set_device (device )
466+
454467 torch .set_default_device (device )
455468 max_loras = 8
456469 punica_wrapper = get_punica_wrapper (8192 , 256 , device )
@@ -582,7 +595,9 @@ def _pretest():
582595def test_linear_replicated (dist_init , num_loras , device , stage ,
583596 bias_enabled ) -> None :
584597
585- torch .cuda .set_device (device )
598+ if current_platform .is_cuda_alike ():
599+ torch .cuda .set_device (device )
600+
586601 torch .set_default_device (device )
587602 punica_wrapper = get_punica_wrapper (8192 , 256 , device )
588603 assert check_punica_wrapper (punica_wrapper )
@@ -695,7 +710,9 @@ def create_random_linear_replicated_layer():
695710def test_linear_parallel (dist_init , num_loras , orientation , fully_shard ,
696711 device , stage , bias_enabled ) -> None :
697712
698- torch .cuda .set_device (device )
713+ if current_platform .is_cuda_alike ():
714+ torch .cuda .set_device (device )
715+
699716 torch .set_default_device (device )
700717 punica_wrapper = get_punica_wrapper (8192 , 256 , device )
701718 assert check_punica_wrapper (punica_wrapper )
@@ -818,7 +835,9 @@ def create_random_linear_parallel_layer():
818835def test_column_parallel_packed (dist_init , num_loras , repeats , fully_shard ,
819836 device , stage , bias_enabled ) -> None :
820837
821- torch .cuda .set_device (device )
838+ if current_platform .is_cuda_alike ():
839+ torch .cuda .set_device (device )
840+
822841 torch .set_default_device (device )
823842 punica_wrapper = get_punica_wrapper (8192 , 256 , device )
824843 assert check_punica_wrapper (punica_wrapper )
@@ -971,6 +990,8 @@ class FakeConfig:
971990@pytest .mark .parametrize ("rotary_dim" , [None , 32 ])
972991@pytest .mark .parametrize ("head_size" , [32 , 108 ])
973992@pytest .mark .parametrize ("seq_len" , [11 , 1024 ])
993+ @pytest .mark .skipif (not current_platform .is_cuda_alike (),
994+ reason = "Only CUDA backends are supported" )
974995def test_rotary_embedding_long_context (dist_init , num_loras , device ,
975996 scaling_factors , max_position ,
976997 is_neox_style , rotary_dim , head_size ,
0 commit comments