@@ -26,76 +26,78 @@ def is_hpex_available():
2626 return True
2727
2828
29- class TestAutoRound (unittest .TestCase ):
30- @classmethod
31- def setUpClass (self ):
32- model_name = "facebook/opt-125m"
33- self .model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
34- self .tokenizer = AutoTokenizer .from_pretrained (model_name , trust_remote_code = True )
35- self .llm_dataloader = LLMDataLoader ()
36-
37- @classmethod
38- def tearDownClass (self ):
39- shutil .rmtree ("./saved" , ignore_errors = True )
40- shutil .rmtree ("runs" , ignore_errors = True )
41-
42- def test_autogptq_format_hpu_inference (self ):
43- if not is_hpex_available ():
44- return
45- try :
46- import auto_gptq
47- except :
48- return
49- bits , group_size , sym = 4 , 128 , False
50- autoround = AutoRound (
51- self .model ,
52- self .tokenizer ,
53- bits = bits ,
54- group_size = group_size ,
55- sym = sym ,
56- iters = 2 ,
57- seqlen = 2 ,
58- dataset = self .llm_dataloader ,
59- )
60- autoround .quantize ()
61- quantized_model_path = "./saved"
62-
63- autoround .save_quantized (output_dir = quantized_model_path , inplace = False , format = "auto_gptq" )
64- model = (
65- AutoModelForCausalLM .from_pretrained (quantized_model_path , device_map = "auto" , trust_remote_code = True )
66- .to ("hpu" )
67- .to (torch .float32 )
68- )
69- tokenizer = AutoTokenizer .from_pretrained (quantized_model_path )
70- text = "There is a girl who likes adventure,"
71- inputs = tokenizer (text , return_tensors = "pt" ).to (model .device )
72- print (tokenizer .decode (model .generate (** inputs , max_new_tokens = 50 )[0 ]))
73- shutil .rmtree ("./saved" , ignore_errors = True )
74-
75- def test_autoround_format_hpu_inference (self ):
76- if not is_hpex_available ():
77- return
78- bits , group_size , sym = 4 , 128 , False
79- autoround = AutoRound (
80- self .model ,
81- self .tokenizer ,
82- bits = bits ,
83- group_size = group_size ,
84- sym = sym ,
85- iters = 2 ,
86- seqlen = 2 ,
87- dataset = self .llm_dataloader ,
88- )
89- autoround .quantize ()
90- quantized_model_path = "./saved"
91-
92- autoround .save_quantized (output_dir = quantized_model_path , inplace = False , format = "auto_round" )
93-
94- model = (
95- AutoModelForCausalLM .from_pretrained (quantized_model_path , device_map = "auto" ).to ("hpu" ).to (torch .float32 )
96- )
97- tokenizer = AutoTokenizer .from_pretrained (quantized_model_path )
98- text = "There is a girl who likes adventure,"
99- inputs = tokenizer (text , return_tensors = "pt" ).to (model .device )
100- print (tokenizer .decode (model .generate (** inputs , max_new_tokens = 50 )[0 ]))
101- shutil .rmtree ("./saved" , ignore_errors = True )
29+ # TODO: This test case is temporarily commented out since it not tested for a long time. We need to add it back and change it into pytest format.
30+
31+ # class TestAutoRound(unittest.TestCase):
32+ # @classmethod
33+ # def setUpClass(self):
34+ # model_name = "facebook/opt-125m"
35+ # self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
36+ # self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
37+ # self.llm_dataloader = LLMDataLoader()
38+
39+ # @classmethod
40+ # def tearDownClass(self):
41+ # shutil.rmtree("./saved", ignore_errors=True)
42+ # shutil.rmtree("runs", ignore_errors=True)
43+
44+ # def test_autogptq_format_hpu_inference(self):
45+ # if not is_hpex_available():
46+ # return
47+ # try:
48+ # import auto_gptq
49+ # except:
50+ # return
51+ # bits, group_size, sym = 4, 128, False
52+ # autoround = AutoRound(
53+ # self.model,
54+ # self.tokenizer,
55+ # bits=bits,
56+ # group_size=group_size,
57+ # sym=sym,
58+ # iters=2,
59+ # seqlen=2,
60+ # dataset=self.llm_dataloader,
61+ # )
62+ # autoround.quantize()
63+ # quantized_model_path = "./saved"
64+
65+ # autoround.save_quantized(output_dir=quantized_model_path, inplace=False, format="auto_gptq")
66+ # model = (
67+ # AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="auto", trust_remote_code=True)
68+ # .to("hpu")
69+ # .to(torch.float32)
70+ # )
71+ # tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
72+ # text = "There is a girl who likes adventure,"
73+ # inputs = tokenizer(text, return_tensors="pt").to(model.device)
74+ # print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0]))
75+ # shutil.rmtree("./saved", ignore_errors=True)
76+
77+ # def test_autoround_format_hpu_inference(self):
78+ # if not is_hpex_available():
79+ # return
80+ # bits, group_size, sym = 4, 128, False
81+ # autoround = AutoRound(
82+ # self.model,
83+ # self.tokenizer,
84+ # bits=bits,
85+ # group_size=group_size,
86+ # sym=sym,
87+ # iters=2,
88+ # seqlen=2,
89+ # dataset=self.llm_dataloader,
90+ # )
91+ # autoround.quantize()
92+ # quantized_model_path = "./saved"
93+
94+ # autoround.save_quantized(output_dir=quantized_model_path, inplace=False, format="auto_round")
95+
96+ # model = (
97+ # AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="auto").to("hpu").to(torch.float32)
98+ # )
99+ # tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
100+ # text = "There is a girl who likes adventure,"
101+ # inputs = tokenizer(text, return_tensors="pt").to(model.device)
102+ # print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0]))
103+ # shutil.rmtree("./saved", ignore_errors=True)
0 commit comments