Skip to content

Commit 2ad418c

Browse files
committed
Use model fast forward
1 parent c77d79f commit 2ad418c

File tree

4 files changed

+21
-18
lines changed

4 files changed

+21
-18
lines changed

scripts/run_all_benchmark_breakdown.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
./scripts/run_benchmark_breakdown.sh santacoder bigcode/gpt_bigcode-santacoder 256 2040 11 1 v2_
1010

1111
# Large model
12-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 11 0 v2_
13-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 11 0 v2_
14-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 11 0 v2_
15-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 11 0 v2_ # OOM?
12+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 1 8190 11 0 v2_
13+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 8 8190 11 0 v2_
14+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 32 8190 11 0 v2_
15+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 256 8190 11 0 v2_ # OOM?
1616

17-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 1 8190 29 1 v2_ 1
18-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 8 8190 29 1 v2_ 1
19-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 32 8190 29 1 v2_ 1
20-
./scripts/run_benchmark_breakdown.sh large_model ./data/bigcode_large-model 256 8190 29 1 v2_ 1 # OOM?
17+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 1 8190 29 1 v2_ 1
18+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 8 8190 29 1 v2_ 1
19+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 32 8190 29 1 v2_ 1
20+
./scripts/run_benchmark_breakdown.sh starcoder ./data/bigcode_large-model 256 8190 29 1 v2_ 1 # OOM?

scripts/run_textgen_benchmark_breakdown.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ CYCLES=${7:-10}
1414

1515
SAVE_DIR=data/benchmarks/v3
1616
#BATCH_SIZES="1 2 4 8 16 24 32 48 64 96 128 160 224 256"
17-
RUN="python3 src/main.py --pipeline_class=TG_Pipeline --max_log_outputs=0 --dtype=float16 --device=cuda --custom_generate --breakdown_latency --ignore_oom --no_fast_init "
17+
RUN="python3 -m src.main --pipeline_class=TG_Pipeline --max_log_outputs=0 --dtype=float16 --device=cuda --custom_generate --breakdown_latency --ignore_oom --no_fast_init "
1818

1919

20-
IMPL=("flash" "santa" "causal" "vector" "bigcode")
20+
IMPL=("flash" "causal" "vector" "bigcode")
2121

2222

23-
STEP=("--no_prefill" "--no_cache")
23+
STEP=("" "--no_cache")
2424
STEP_NAME=("decode" "prefill")
2525

2626
COMMON="--pretrained_model=$MODEL_PATH --tokenizer=$MODEL_PATH --cycles=$CYCLES --max_input_length=1 --max_new_tokens=$MAX_NEW_TOKENS --key_length_step=$TOKEN_STEP --batch_size=$BATCH_SIZE"
@@ -31,13 +31,14 @@ run () { # run(step, runtime, attn)
3131
then
3232
echo "Skipping existing $FILE_NAME"
3333
else
34-
CMD="MODEL_TYPE=${IMPL[$2]} $RUN $COMMON ${STEP[$1]} --save=$FILE_NAME"
35-
echo "$CMD"
34+
export MODEL_TYPE="${IMPL[$2]}"
35+
CMD="$RUN $COMMON ${STEP[$1]} --save=$FILE_NAME"
36+
echo "MODEL_TYPE=${IMPL[$2]} $CMD"
3637
$CMD
3738
fi
3839
}
3940

40-
for impl in {0..4}
41+
for impl in {0..3}
4142
do
4243
if [ "${STEP_ID}" -eq "0" ]
4344
then

src/parse_breakdown_results.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ def main(argv: Optional[List[str]] = None) -> None:
6262
dirname = args.input_dir.stem
6363
if title is None:
6464
try:
65-
name, _, bs, _, _, _, _, step = dirname.rsplit("_", 7)
66-
title = f"{name} {step}, bs = {bs}"
65+
name, _, bs, _, _, _, _, step, cycles = dirname.rsplit("_", 8)
66+
title = f"{name}, bs = {bs} (s={step}, c={cycles})"
6767
except ValueError:
6868
title = dirname
6969

src/pipeline.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,8 +619,10 @@ def _generate_textgen(
619619
with torch.inference_mode():
620620
for key_length in range(input_length, output_length, key_length_step):
621621
try:
622-
if key_length_step > 1 or not use_cache or not do_prefill:
623-
self._update_generate_batch(batch, use_cache, do_prefill, key_length)
622+
if (key_length_step > 1 and key_length>key_length) or not use_cache or not do_prefill:
623+
if not hasattr(self.model,"fast_forward"):
624+
raise NotImplementedError()
625+
self.model.fast_forward(batch, key_length, use_cache)
624626
last_time = self._get_time(breakdown_latency)
625627
generated, batch = self.model.generate_token(batch)
626628
t2 = self._get_time(breakdown_latency)

0 commit comments

Comments
 (0)