Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/benchmarks/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
# NOTE(woosuk): If the request cannot be processed in a single batch,
# the engine will automatically process the request in multiple batches.
llm = LLM(**dataclasses.asdict(engine_args))
assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
"Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
)

Expand Down
2 changes: 2 additions & 0 deletions fastdeploy/entrypoints/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

def main():
import fastdeploy.entrypoints.cli.benchmark.main
import fastdeploy.entrypoints.cli.collect_env
import fastdeploy.entrypoints.cli.openai
import fastdeploy.entrypoints.cli.run_batch
import fastdeploy.entrypoints.cli.serve
Expand All @@ -34,6 +35,7 @@ def main():
fastdeploy.entrypoints.cli.openai,
fastdeploy.entrypoints.cli.benchmark.main,
fastdeploy.entrypoints.cli.serve,
fastdeploy.entrypoints.cli.collect_env,
]

parser = FlexibleArgumentParser(description="FastDeploy CLI")
Expand Down
4 changes: 2 additions & 2 deletions tests/benchmarks/test_latency_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_main(self, mock_tqdm, mock_randint, mock_llm):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg

mock_randint.return_value = np.zeros((8, 32))
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_main_profile_error(self, mock_exit, mock_llm):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg

# Build args using parser
Expand Down
Loading