Skip to content

Commit

Permalink
add shared blocks for prefill
Browse files Browse the repository at this point in the history
  • Loading branch information
blinkbear committed Nov 1, 2024
1 parent 71f378e commit ef84d8f
Show file tree
Hide file tree
Showing 8 changed files with 315 additions and 134 deletions.
10 changes: 5 additions & 5 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ def sample_sharegpt_requests(
prompt_len = len(prompt_token_ids)
prompt_len_list.append(prompt_len)
output_len = len(completion_token_ids
) if fixed_output_len is None else fixed_output_len
if prompt_len < 0 or output_len < 100:
# Prune too short sequences.
continue
if prompt_len > 1024 or prompt_len + output_len > 2048:
)
# if prompt_len < 0:
# # Prune too short sequences.
# continue
if prompt_len + output_len > 2048:
# Prune too long sequences.
continue
filtered_dataset.append((prompt, prompt_len, output_len))
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/result/analysis/result_analysis_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def __():
@app.cell
def __(base_dir, os):
# _date = "20240918"
date = "20241027"
counters = [1598]
selected_qps = 10
date = "20241030"
counters = [0,118,119,123,125,126]
selected_qps = 2
e2e_result_dir_names = [
os.path.join(base_dir, date, str(counter)) for counter in counters
]
Expand Down Expand Up @@ -517,7 +517,7 @@ def bar_plot_2(_long_df, required_metric_name_list, required_metric_type_list):
def __(bar_plot_2, long_df, plt):
required_metric_name = "P99"
# bar_plot_1(_long_df, required_metric_name=required_metric_name)
required_metric_name_list = ["Median", "P99"]
required_metric_name_list = ["Mean", "P99"]
required_metric_type_list = ["TTFT", "TPOT"]
bar_plot_2(long_df, required_metric_name_list, required_metric_type_list)
plt.subplots_adjust(wspace=0.2, hspace=0.6)
Expand Down Expand Up @@ -779,9 +779,9 @@ def __(execute_result_dir_names, os, pd, selected_qps):
_detailed_result_df["Running"]
/ _detailed_result_df["GPU KV cache usage"]
)
_detailed_result_df["Resource Rate"] = (
_detailed_result_df["Total Throuhgput"] = (
_detailed_result_df["Avg prompt throughput"]
/ _detailed_result_df["Avg generation throughput"]
+ _detailed_result_df["Avg generation throughput"]
)
if "sjf" in _file:
# continue
Expand All @@ -802,7 +802,7 @@ def __(execute_result_dfs, plt, sns):
plt.figure(figsize=(16, 6), dpi=150)
# Subplot 1: Avg generation throughput
metric_labels = [
"Avg prompt throughput",
"Total Throuhgput",
"Avg generation throughput",
"Running",
"Pending",
Expand Down
Loading

0 comments on commit ef84d8f

Please sign in to comment.