Skip to content

Commit c77d79f

Browse files
committed
stuff
1 parent 9c90421 commit c77d79f

File tree

4 files changed

+36
-40
lines changed

4 files changed

+36
-40
lines changed
Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11

22
# Santacoder
3-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 1 2040 5 0 v2_
4-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 32 2040 5 0 v2_
5-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 256 2040 5 0 v2_
3+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 1 2040 5 0
4+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 32 2040 5 0
5+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 256 2040 5 0
66

7-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 1 2040 11 1 v2_
8-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 32 2040 11 1 v2_
9-
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 256 2040 11 1 v2_
7+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 1 2040 11 1
8+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 32 2040 11 1
9+
./scripts/run_textgen_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 256 2040 11 1
1010

1111
# Large model
12-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 11 0 v2_
13-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 11 0 v2_
14-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 11 0 v2_
15-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 11 0 v2_ # OOM?
12+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 11 0
13+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 11 0
14+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 11 0
15+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 11 0 # OOM?
1616

17-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 29 1 v2_ 1
18-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 29 1 v2_ 1
19-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 29 1 v2_ 1
20-
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 29 1 v2_ 1 # OOM?
17+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 29 1 1
18+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 29 1 1
19+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 29 1 1
20+
./scripts/run_textgen_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 29 1 1 # OOM?

scripts/run_textgen_benchmark_breakdown.sh

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,41 @@ MAX_NEW_TOKENS=${4:-2040}
1010
# Prime number to see key length padding effect.
1111
TOKEN_STEP=${5:-5}
1212
STEP_ID=${6:-""}
13-
FILE_PREFIX=${7:-""}
14-
CYCLES=${8:-10}
13+
CYCLES=${7:-10}
1514

16-
SAVE_DIR=data/benchmarks/v2
15+
SAVE_DIR=data/benchmarks/v3
1716
#BATCH_SIZES="1 2 4 8 16 24 32 48 64 96 128 160 224 256"
18-
RUN="python3 src/main.py --max_log_outputs=0 --dtype=float16 --device=cuda --custom_generate --breakdown_latency --ignore_oom --no_fast_init"
17+
RUN="python3 src/main.py --pipeline_class=TG_Pipeline --max_log_outputs=0 --dtype=float16 --device=cuda --custom_generate --breakdown_latency --ignore_oom --no_fast_init "
1918

2019

21-
RUNTIME=("")
22-
RUNTIME_NAMES=("base")
23-
24-
ATTN=( \
25-
"--pipeline_class=TG_Pipeline" \
26-
)
27-
ATTN_NAME=( \
28-
"textgen" \
29-
)
20+
IMPL=("flash" "santa" "causal" "vector" "bigcode")
3021

3122

3223
STEP=("--no_prefill" "--no_cache")
3324
STEP_NAME=("decode" "prefill")
3425

35-
COMMON="--pretrained_model=$MODEL_PATH --tokenizer=$MODEL_PATH --cycles=$CYCLES --max_input_length=1 --max_new_tokens=$MAX_NEW_TOKENS --key_length_step=$TOKEN_STEP --batch_size=$BATCH_SIZE predict_last_token=True"
26+
COMMON="--pretrained_model=$MODEL_PATH --tokenizer=$MODEL_PATH --cycles=$CYCLES --max_input_length=1 --max_new_tokens=$MAX_NEW_TOKENS --key_length_step=$TOKEN_STEP --batch_size=$BATCH_SIZE"
3627

3728
run () { # run(step, runtime, attn)
38-
FILE_NAME="$SAVE_DIR"/"$MODEL_NAME"_bs_"$BATCH_SIZE"_tok_"$MAX_NEW_TOKENS"_step_"$TOKEN_STEP"_"${STEP_NAME[$1]}"/"$FILE_PREFIX""${RUNTIME_NAMES[$2]}"_"${ATTN_NAME[$3]}".json
29+
FILE_NAME="$SAVE_DIR"/"$MODEL_NAME"_bs_"$BATCH_SIZE"_tok_"$MAX_NEW_TOKENS"_"${STEP_NAME[$1]}"_step_"$TOKEN_STEP"_"$CYCLES"/"${IMPL[$2]}".json
3930
if [ -f "$FILE_NAME" ];
4031
then
4132
echo "Skipping existing $FILE_NAME"
4233
else
43-
CMD="$RUN $COMMON ${RUNTIME[$2]} ${ATTN[$3]} ${STEP[$1]} --save=$FILE_NAME"
34+
CMD="MODEL_TYPE=${IMPL[$2]} $RUN $COMMON ${STEP[$1]} --save=$FILE_NAME"
4435
echo "$CMD"
4536
$CMD
4637
fi
4738
}
4839

49-
if [ "${STEP_ID}" -eq "0" ]
50-
then
51-
# Decode (default attn only)
52-
run 0 0 0
53-
else
54-
# Prefill
55-
run 1 0 0
56-
fi
40+
for impl in {0..4}
41+
do
42+
if [ "${STEP_ID}" -eq "0" ]
43+
then
44+
# Decode (default attn only)
45+
run 0 $impl
46+
else
47+
# Prefill
48+
run 1 $impl
49+
fi
50+
done

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ def main(argv: Optional[List[str]] = None) -> None:
179179
benchmark_metrics[Metrics.MEMORY_RESERVED_MAX] = torch.cuda.max_memory_reserved()
180180

181181
t3 = time.perf_counter()
182-
benchmark_metrics[Metrics.RUNTIME_BENCHMARK] = t3 - t2
183182
benchmark_metrics[Metrics.RUNTIME_TOTAL] = t3 - t0
184183

185184
if len(all_metrics) > 0:
185+
benchmark_metrics[Metrics.RUNTIME_BENCHMARK] = t3 - t2
186186
benchmark_metrics.update(pipeline.aggregate_metrics(all_metrics))
187187

188188
benchmark_metrics = Metrics.reorder_metrics(benchmark_metrics)

src/pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ def _load_pretrained(self, pretrained_model: str):
516516
pretrained_model, revision = parse_revision(pretrained_model)
517517

518518
with fast_init(self.device) if self.fast_init else contextlib.nullcontext():
519-
return get_model(pretrained_model, revision, False, False)
519+
return get_model(pretrained_model, revision, False, None)
520520

521521
def _generate_hf(self, inputs: Dict, max_new_tokens: int, use_cache: bool):
522522
raise NotImplementedError()
@@ -716,6 +716,8 @@ def __call__(
716716
Metrics.LATENCY_E2E: t1 - t0,
717717
}
718718

719+
output_text=[i+o for i, o in zip(text, output_text)]
720+
719721
return output_text, metrics
720722

721723

0 commit comments

Comments
 (0)