2828 LazyImport ,
2929 check_to_quantized ,
3030 clear_memory ,
31+ download_hf_model ,
3132 flatten_list ,
3233 get_block_names ,
3334 get_gguf_architecture ,
@@ -73,7 +74,10 @@ def create_model_class(
7374 low_cpu_mem_usage = False ,
7475 model_type = convert_hf_to_gguf .ModelType .TEXT ,
7576):
76- tmp_work_dir = Path (os .path .join (output_dir , TMP_DIR_NAME ))
77+ tmp_work_dir = model .name_or_path
78+ os .makedirs (output_dir , exist_ok = True )
79+ if not os .path .isdir (tmp_work_dir ):
80+ tmp_work_dir = download_hf_model (tmp_work_dir )
7781 with torch .inference_mode ():
7882 model_architecture = get_gguf_architecture (tmp_work_dir , model_type = model_type )
7983 try :
@@ -95,7 +99,7 @@ def create_model_class(
9599 output_type = FTYPE_MAP .get (output_type .lower ())
96100
97101 model_instance = model_class (
98- dir_model = tmp_work_dir ,
102+ dir_model = Path ( tmp_work_dir ) ,
99103 ftype = output_type ,
100104 fname_out = Path (output_dir ),
101105 is_big_endian = False ,
@@ -126,19 +130,10 @@ def pack_gguf_layer(
126130):
127131 """Export the model to gguf format."""
128132 global gguf_model_instance_global
129- tmp_work_dir = Path (os .path .join (output_dir , TMP_DIR_NAME ))
130- if output_dir is not None and os .path .exists (output_dir ) and not os .path .exists (tmp_work_dir ):
133+ if output_dir is not None and os .path .exists (output_dir ):
131134 logger .warning_once (f"{ output_dir } already exists, this may cause model conflict" )
132- tmp_work_dir = Path (os .path .join (output_dir , TMP_DIR_NAME ))
133135 if "gguf_model_instance_global" not in globals ():
134136 config = model .config
135- config .save_pretrained (tmp_work_dir )
136- if tokenizer is not None and hasattr (tokenizer , "save_pretrained" ):
137- tokenizer .save_pretrained (tmp_work_dir )
138- if processor is not None :
139- processor .save_pretrained (tmp_work_dir )
140- if image_processor is not None :
141- image_processor .save_pretrained (tmp_work_dir )
142137
143138 gguf_model_instance_global = [
144139 create_model_class (
@@ -201,27 +196,11 @@ def pack_gguf_layer(
201196@torch .inference_mode ()
202197def save_quantized_as_gguf (output_dir , backend = "gguf:q4_0" , layer_config = None , vlm = False , ** kwargs ):
203198 """Export the model to gguf format."""
204- tmp_work_dir = Path (os .path .join (output_dir , TMP_DIR_NAME ))
205- if output_dir is not None and os .path .exists (output_dir ) and not os .path .exists (tmp_work_dir ):
206- logger .warning (f"{ output_dir } already exists, this may cause model conflict" )
207-
208199 st = time .time ()
209200 global gguf_model_instance_global
210201
211202 model = kwargs ["model" ]
212203 if "gguf_model_instance_global" not in globals ():
213- config = model .config
214- config .save_pretrained (tmp_work_dir )
215- tokenizer = kwargs .get ("tokenizer" , None )
216- if tokenizer is not None :
217- tokenizer .save_pretrained (tmp_work_dir )
218- processor = kwargs .get ("processor" , None )
219- if processor is not None :
220- processor .save_pretrained (tmp_work_dir )
221- image_processor = kwargs .get ("image_processor" , None )
222- if image_processor is not None :
223- image_processor .save_pretrained (tmp_work_dir )
224-
225204 gguf_model_instance_global = [
226205 create_model_class (output_dir , model , layer_config , backend , model_type = convert_hf_to_gguf .ModelType .TEXT )
227206 ]
@@ -237,6 +216,5 @@ def save_quantized_as_gguf(output_dir, backend="gguf:q4_0", layer_config=None, v
237216 rt = time .time () - st
238217 logger .info (f"Model successfully exported to { gguf_model .fname_out } , running time={ rt } " )
239218 del gguf_model_instance_global
240- shutil .rmtree (tmp_work_dir , ignore_errors = True )
241219
242220 return model
0 commit comments