|
21 | 21 | ] |
22 | 22 |
|
23 | 23 |
|
24 | | -# @pytest.mark.skipif( |
25 | | -# not current_platform.is_cpu() and not current_platform.is_xpu() and not current_platform.is_cuda(), |
26 | | -# reason="only supports CPU/XPU/CUDA backend.", |
27 | | -# ) |
28 | | -# @pytest.mark.parametrize("model", MODELS) |
29 | | -# def test_auto_round(model): |
30 | | -# # Sample prompts. |
31 | | -# prompts = [ |
32 | | -# "The capital of France is", |
33 | | -# "The future of AI is", |
34 | | -# ] |
35 | | -# # Create a sampling params object. |
36 | | -# sampling_params = SamplingParams(temperature=0.8, top_p=0.95) |
37 | | -# # Create an LLM. |
38 | | -# QUANTIZATION = "auto-round" |
39 | | -# llm = LLM(model=model, quantization=QUANTIZATION, trust_remote_code=True, tensor_parallel_size=1) |
40 | | -# # Generate texts from the prompts. |
41 | | -# # The output is a list of RequestOutput objects |
42 | | -# # that contain the prompt, generated text, and other information. |
43 | | -# outputs = llm.generate(prompts, sampling_params) |
44 | | -# # Print the outputs. |
45 | | -# for output in outputs: |
46 | | -# prompt = output.prompt |
47 | | -# generated_text = output.outputs[0].text |
48 | | -# if "France" in prompt: |
49 | | -# assert "Paris" in generated_text |
50 | | -# |
51 | | -# |
52 | | -# @pytest.mark.parametrize("model", MODELS) |
53 | | -# def test_vllm_lm_eval(model): |
54 | | -# if shutil.which("auto-round") is None: |
55 | | -# pytest.skip("auto-round CLI not available") |
56 | | -# |
57 | | -# env = os.environ.copy() |
58 | | -# env["VLLM_SKIP_WARMUP"] = "true" |
59 | | -# env["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" |
60 | | -# |
61 | | -# cmd = [ |
62 | | -# "auto-round", |
63 | | -# "--model", |
64 | | -# model, |
65 | | -# "--eval", |
66 | | -# "--tasks", |
67 | | -# "lambada_openai", |
68 | | -# "--eval_bs", |
69 | | -# "8", |
70 | | -# "--limit", |
71 | | -# "10", |
72 | | -# "--vllm", |
73 | | -# ] |
74 | | -# |
75 | | -# proc = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) |
76 | | -# assert proc.returncode == 0, f"auto-round failed (rc={proc.returncode}):\n{proc.stdout}" |
| 24 | +@pytest.mark.skipif( |
| 25 | + not current_platform.is_cpu() and not current_platform.is_xpu() and not current_platform.is_cuda(), |
| 26 | + reason="only supports CPU/XPU/CUDA backend.", |
| 27 | +) |
| 28 | +@pytest.mark.parametrize("model", MODELS) |
| 29 | +def test_auto_round(model): |
| 30 | + # Sample prompts. |
| 31 | + prompts = [ |
| 32 | + "The capital of France is", |
| 33 | + "The future of AI is", |
| 34 | + ] |
| 35 | + # Create a sampling params object. |
| 36 | + sampling_params = SamplingParams(temperature=0.8, top_p=0.95) |
| 37 | + # Create an LLM. |
| 38 | + QUANTIZATION = "auto-round" |
| 39 | + llm = LLM(model=model, quantization=QUANTIZATION, trust_remote_code=True, tensor_parallel_size=1) |
| 40 | + # Generate texts from the prompts. |
| 41 | + # The output is a list of RequestOutput objects |
| 42 | + # that contain the prompt, generated text, and other information. |
| 43 | + outputs = llm.generate(prompts, sampling_params) |
| 44 | + # Print the outputs. |
| 45 | + for output in outputs: |
| 46 | + prompt = output.prompt |
| 47 | + generated_text = output.outputs[0].text |
| 48 | + if "France" in prompt: |
| 49 | + assert "Paris" in generated_text |
| 50 | + |
| 51 | + |
| 52 | +@pytest.mark.parametrize("model", MODELS) |
| 53 | +def test_vllm_lm_eval(model): |
| 54 | + if shutil.which("auto-round") is None: |
| 55 | + pytest.skip("auto-round CLI not available") |
| 56 | + |
| 57 | + env = os.environ.copy() |
| 58 | + env["VLLM_SKIP_WARMUP"] = "true" |
| 59 | + env["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" |
| 60 | + |
| 61 | + cmd = [ |
| 62 | + "auto-round", |
| 63 | + "--model", |
| 64 | + model, |
| 65 | + "--eval", |
| 66 | + "--tasks", |
| 67 | + "lambada_openai", |
| 68 | + "--eval_bs", |
| 69 | + "8", |
| 70 | + "--eval_backend", |
| 71 | + "vllm", |
| 72 | + "--limit", |
| 73 | + "10", |
| 74 | + ] |
| 75 | + |
| 76 | + proc = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) |
| 77 | + assert proc.returncode == 0, f"auto-round failed (rc={proc.returncode}):\n{proc.stdout}" |
0 commit comments