File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -746,13 +746,14 @@ def _task_to_convert(task: TaskOption) -> ConvertType:
746
746
747
747
self .pooler_config = self ._init_pooler_config ()
748
748
749
- self .dtype = _get_and_verify_dtype (
749
+ self .dtype : torch . dtype = _get_and_verify_dtype (
750
750
self .model ,
751
751
self .hf_config ,
752
752
self .dtype ,
753
753
is_pooling_model = self .runner_type == "pooling" ,
754
754
revision = self .revision ,
755
755
)
756
+ self .head_dtype : torch .dtype = self ._get_head_dtype ()
756
757
757
758
# Interleaved attention is not supported by some backends in V0
758
759
if (not self .disable_sliding_window
@@ -1778,8 +1779,10 @@ def get_and_verify_max_len(self, max_model_len: int):
1778
1779
logger .info ("Using max model len %s" , max_model_len )
1779
1780
return max_model_len
1780
1781
1781
- @property
1782
- def head_dtype (self ) -> torch .dtype :
1782
+ def _get_head_dtype (self ) -> torch .dtype :
1783
+ if torch .float32 not in current_platform .supported_dtypes :
1784
+ return self .dtype
1785
+
1783
1786
if envs .VLLM_USING_FP32_HEAD :
1784
1787
return torch .float32
1785
1788
You can’t perform that action at this time.
0 commit comments