-
Notifications
You must be signed in to change notification settings - Fork 45
saving measurements during compile and run time #108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,19 @@ | |
import argparse | ||
import logging | ||
import os | ||
import time | ||
from typing import List, Optional | ||
|
||
import QEfficient | ||
from QEfficient.cloud.export import get_onnx_model_path | ||
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv | ||
from QEfficient.utils import check_and_assign_cache_dir, get_qpc_dir_path, load_hf_tokenizer, qpc_exists | ||
from QEfficient.utils import ( | ||
check_and_assign_cache_dir, | ||
get_qpc_dir_path, | ||
load_hf_tokenizer, | ||
qpc_exists, | ||
tabulate_measurements, | ||
) | ||
from QEfficient.utils.logging_utils import logger | ||
|
||
|
||
|
@@ -35,6 +42,7 @@ def main( | |
local_model_dir: Optional[str] = None, | ||
cache_dir: Optional[str] = None, | ||
hf_token: Optional[str] = None, | ||
benchmark: bool = False, | ||
) -> None: | ||
""" | ||
1. Check if compiled qpc for given config already exists, if it does jump to execute, else | ||
|
@@ -78,8 +86,11 @@ def main( | |
) | ||
|
||
# Handle qpc generation | ||
|
||
if qpc_exists(qpc_dir_path): | ||
logger.info(f"Pre-compiled qpc found at {qpc_dir_path}! Executing with given prompt") | ||
compile_time = "pre-compiled" | ||
|
||
else: | ||
# Handle onnx model generation | ||
onnx_model_path = get_onnx_model_path( | ||
|
@@ -89,6 +100,9 @@ def main( | |
######### | ||
# Compile | ||
######### | ||
|
||
compile_start_time = time.perf_counter() | ||
|
||
_ = QEfficient.compile( | ||
onnx_path=onnx_model_path, | ||
qpc_path=os.path.dirname( | ||
|
@@ -106,10 +120,13 @@ def main( | |
full_batch_size=full_batch_size, | ||
) | ||
|
||
compile_time = (time.perf_counter() - compile_start_time) // 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can also be moved under the if condition of benchmark flag, also why keeping it integer? |
||
|
||
######### | ||
# Execute | ||
######### | ||
cloud_ai_100_exec_kv( | ||
|
||
execinfo = cloud_ai_100_exec_kv( | ||
tokenizer=tokenizer, | ||
qpc_path=qpc_dir_path, | ||
device_id=device_group, | ||
|
@@ -119,6 +136,27 @@ def main( | |
full_batch_size=full_batch_size, | ||
) | ||
|
||
######### | ||
# Log | ||
######### | ||
|
||
if benchmark: | ||
_ = tabulate_measurements( | ||
model_name=model_name, | ||
tokenizer=tokenizer, | ||
prompt=prompt, | ||
batch_size=batch_size, | ||
full_batch_size=full_batch_size, | ||
prompt_len=prompt_len, | ||
ctx_len=ctx_len, | ||
num_cores=num_cores, | ||
device_group=device_group, | ||
mxfp6=mxfp6, | ||
mxint8=mxint8, | ||
compile_time=compile_time, | ||
execinfo=execinfo, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
|
@@ -198,9 +236,15 @@ def main( | |
default=None, | ||
help="Set full batch size to enable continuous batching mode, default is None", | ||
) | ||
|
||
parser.add_argument( | ||
"--benchmark", | ||
"-b", | ||
action="store_true", | ||
help="store measurements into a csv table at model_card_dir", | ||
) | ||
args = parser.parse_args() | ||
if args.verbose: | ||
logger.setLevel(logging.INFO) | ||
del args.verbose # type: ignore | ||
|
||
main(**args.__dict__) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,4 +17,5 @@ | |
onnx_exists, | ||
padding_check_and_fix, | ||
qpc_exists, | ||
tabulate_measurements, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
# | ||
# ----------------------------------------------------------------------------- | ||
|
||
import csv | ||
import os | ||
from typing import List, Optional, Tuple, Union | ||
|
||
|
@@ -277,3 +278,59 @@ def get_num_layers_from_config(config): | |
raise ValueError("Invalid model configuration: n_layer/n_layers or num_hidden_layers not found.") | ||
|
||
return n_layer | ||
|
||
|
||
def tabulate_measurements( | ||
model_name, | ||
tokenizer, | ||
prompt, | ||
batch_size, | ||
full_batch_size, | ||
prompt_len, | ||
ctx_len, | ||
num_cores, | ||
device_group, | ||
mxfp6, | ||
mxint8, | ||
compile_time, | ||
execinfo, | ||
): | ||
input_len = max([len(x) for x in tokenizer(prompt, return_tensors="np").input_ids]) | ||
|
||
fields = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two more fields mos and aic_enable_depth_first can be added here. |
||
"MODEL\nNAME": model_name, | ||
"BATCH\nSIZE": batch_size, | ||
"FULL\nBATCH_SIZE": full_batch_size, | ||
"CPL": prompt_len, | ||
"PL": input_len, | ||
"GL": int(execinfo.total_decode_tokens + 1), | ||
"CL": ctx_len, | ||
"CORES": num_cores, | ||
"NUM\nSOCS": len(device_group) if device_group else 1, | ||
"DEVICE\nID": device_group, | ||
"MXFP6\nW": mxfp6, | ||
"MXINT8\n$KV": mxint8, | ||
"COMPILE\nTIME (S)": compile_time, | ||
"PREFILL\nTIME (S)": round(execinfo.prefill_time, 2), | ||
"DECODE\nTOK/S": round(execinfo.decode_perf, 2), | ||
"TOTAL\nTOK/S": round(execinfo.total_perf, 2), | ||
"TOTAL\nTIME (S)": round(execinfo.total_time, 2), | ||
} | ||
|
||
model_card_dir = os.path.join(QEFF_MODELS_DIR, str(model_name)) | ||
model_name = model_name.replace("/", "-") | ||
file_name = f"{model_card_dir}/{model_name}_benchmarking.csv" | ||
|
||
quic-rishinr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
try: | ||
os.makedirs(model_card_dir, exist_ok=True) | ||
if not os.path.exists(file_name): | ||
with open(file_name, "w") as csvfile: | ||
csvwriter = csv.writer(csvfile) | ||
csvwriter.writerow(list(fields.keys())) | ||
with open(file_name, "a", newline="") as csvfile: | ||
csvwriter = csv.writer(csvfile) | ||
csvwriter.writerow(list(fields.values())) | ||
except OSError as e: | ||
print(f"An error occurred while handling file {file_name}: {e}") | ||
|
||
return file_name |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you update the doc string with this flag usage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Couldn't find. Can you point me to the right doc string path?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can add it on line number 71