Skip to content

Commit 5e22944

Browse files
fix vllm backend evaluation (#872)
* fix vllm backend evaluation Signed-off-by: He, Xin3 <xin3.he@intel.com> * change default to hf Signed-off-by: He, Xin3 <xin3.he@intel.com> * update per comments Signed-off-by: He, Xin3 <xin3.he@intel.com> * Update eval_cli.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: He, Xin3 <xin3.he@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent eb5cdcd commit 5e22944

File tree

2 files changed

+71
-54
lines changed

2 files changed

+71
-54
lines changed

auto_round/eval/eval_cli.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
import argparse
1515
import os
16+
import time
1617

1718
from auto_round.utils import (
1819
clear_memory,
@@ -66,6 +67,13 @@ def __init__(self, *args, **kwargs):
6667
help="Limit the number of examples per task. "
6768
"If <1, limit is a percentage of the total number of examples.",
6869
)
70+
self.add_argument(
71+
"--eval_backend",
72+
default="hf",
73+
type=str,
74+
choices=["hf", "vllm"],
75+
help="Use hf backend for evaluation by default.",
76+
)
6977
# vllm related arguments
7078
self.add_argument("--revision", default=None, type=str, help="model revision for vllm")
7179
self.add_argument("--tokenizer", default=None, type=str, help="tokenizer to use with vllm")
@@ -104,7 +112,15 @@ def _eval_init(tasks, model_path, device, disable_trust_remote_code=False, dtype
104112

105113

106114
def eval(args):
107-
import time
115+
if args.eval_backend == "vllm":
116+
try:
117+
assert isinstance(args.model, str), "vllm evaluation only supports model name or path."
118+
eval_with_vllm(args)
119+
return
120+
except Exception as e: # pragma: no cover
121+
print(f"vllm evaluation failed: {e}, fallback to default hf backend evaluation.")
122+
args.eval_backend = "hf"
123+
clear_memory()
108124

109125
tasks, model_args, device_str = _eval_init(
110126
args.tasks, args.model, args.device_map, args.disable_trust_remote_code, args.eval_model_dtype

test/test_cuda/test_vllm.py

Lines changed: 54 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -21,56 +21,57 @@
2121
]
2222

2323

24-
# @pytest.mark.skipif(
25-
# not current_platform.is_cpu() and not current_platform.is_xpu() and not current_platform.is_cuda(),
26-
# reason="only supports CPU/XPU/CUDA backend.",
27-
# )
28-
# @pytest.mark.parametrize("model", MODELS)
29-
# def test_auto_round(model):
30-
# # Sample prompts.
31-
# prompts = [
32-
# "The capital of France is",
33-
# "The future of AI is",
34-
# ]
35-
# # Create a sampling params object.
36-
# sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
37-
# # Create an LLM.
38-
# QUANTIZATION = "auto-round"
39-
# llm = LLM(model=model, quantization=QUANTIZATION, trust_remote_code=True, tensor_parallel_size=1)
40-
# # Generate texts from the prompts.
41-
# # The output is a list of RequestOutput objects
42-
# # that contain the prompt, generated text, and other information.
43-
# outputs = llm.generate(prompts, sampling_params)
44-
# # Print the outputs.
45-
# for output in outputs:
46-
# prompt = output.prompt
47-
# generated_text = output.outputs[0].text
48-
# if "France" in prompt:
49-
# assert "Paris" in generated_text
50-
#
51-
#
52-
# @pytest.mark.parametrize("model", MODELS)
53-
# def test_vllm_lm_eval(model):
54-
# if shutil.which("auto-round") is None:
55-
# pytest.skip("auto-round CLI not available")
56-
#
57-
# env = os.environ.copy()
58-
# env["VLLM_SKIP_WARMUP"] = "true"
59-
# env["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
60-
#
61-
# cmd = [
62-
# "auto-round",
63-
# "--model",
64-
# model,
65-
# "--eval",
66-
# "--tasks",
67-
# "lambada_openai",
68-
# "--eval_bs",
69-
# "8",
70-
# "--limit",
71-
# "10",
72-
# "--vllm",
73-
# ]
74-
#
75-
# proc = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
76-
# assert proc.returncode == 0, f"auto-round failed (rc={proc.returncode}):\n{proc.stdout}"
24+
@pytest.mark.skipif(
25+
not current_platform.is_cpu() and not current_platform.is_xpu() and not current_platform.is_cuda(),
26+
reason="only supports CPU/XPU/CUDA backend.",
27+
)
28+
@pytest.mark.parametrize("model", MODELS)
29+
def test_auto_round(model):
30+
# Sample prompts.
31+
prompts = [
32+
"The capital of France is",
33+
"The future of AI is",
34+
]
35+
# Create a sampling params object.
36+
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
37+
# Create an LLM.
38+
QUANTIZATION = "auto-round"
39+
llm = LLM(model=model, quantization=QUANTIZATION, trust_remote_code=True, tensor_parallel_size=1)
40+
# Generate texts from the prompts.
41+
# The output is a list of RequestOutput objects
42+
# that contain the prompt, generated text, and other information.
43+
outputs = llm.generate(prompts, sampling_params)
44+
# Print the outputs.
45+
for output in outputs:
46+
prompt = output.prompt
47+
generated_text = output.outputs[0].text
48+
if "France" in prompt:
49+
assert "Paris" in generated_text
50+
51+
52+
@pytest.mark.parametrize("model", MODELS)
53+
def test_vllm_lm_eval(model):
54+
if shutil.which("auto-round") is None:
55+
pytest.skip("auto-round CLI not available")
56+
57+
env = os.environ.copy()
58+
env["VLLM_SKIP_WARMUP"] = "true"
59+
env["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
60+
61+
cmd = [
62+
"auto-round",
63+
"--model",
64+
model,
65+
"--eval",
66+
"--tasks",
67+
"lambada_openai",
68+
"--eval_bs",
69+
"8",
70+
"--eval_backend",
71+
"vllm",
72+
"--limit",
73+
"10",
74+
]
75+
76+
proc = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
77+
assert proc.returncode == 0, f"auto-round failed (rc={proc.returncode}):\n{proc.stdout}"

0 commit comments

Comments
 (0)