File tree Expand file tree Collapse file tree 1 file changed +8
-8
lines changed Expand file tree Collapse file tree 1 file changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -1433,15 +1433,15 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
14331433 recommend_to_remove = True )
14341434 return False
14351435
1436- # Need at least Ampere for now (FA support required).
1437- # Skip this check if we are running on a non-GPU platform,
1438- # or if the device capability is not available
1439- # (e.g. in a Ray actor without GPUs).
1436+ # Triton v3.3 has f16 conversion regression issue on Turing and Volta,
1437+ # which broke fp16 inference
1438+ # see: https://github.com/triton-lang/triton/issues/6698
14401439 if (current_platform .is_cuda ()
1441- and current_platform .get_device_capability ()
1442- and current_platform .get_device_capability ().major < 8 ):
1443- _raise_or_fallback (feature_name = "Compute Capability < 8.0" ,
1444- recommend_to_remove = False )
1440+ and not current_platform .has_device_capability (80 )
1441+ and model_config .dtype == torch .float16 ):
1442+ _raise_or_fallback (
1443+ feature_name = "Compute Capability < 8.0 with FP16" ,
1444+ recommend_to_remove = False )
14451445 return False
14461446
14471447 if self .kv_cache_dtype != "auto" :
You can’t perform that action at this time.
0 commit comments