Skip to content

feat: add args for profiling engine caching #3329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions tools/perf/perf_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ def run_dynamo(model, input_tensors, params, precision, batch_size):
min_block_size=params.get("min_block_size", 1),
debug=False,
truncate_long_and_double=params.get("truncate", False),
immutable_weights=params.get("immutable_weights", True),
strip_engine_weights=params.get("strip_engine_weights", False),
refit_identical_engine_weights=params.get(
"refit_identical_engine_weights", False
),
cache_built_engines=params.get("cache_built_engines", False),
reuse_cached_engines=params.get("reuse_cached_engines", False),
)
end_compile = timeit.default_timer()
compile_time_s = end_compile - start_compile
Expand Down Expand Up @@ -585,6 +592,31 @@ def run(
type=str,
help="Path of the output file where performance summary is written.",
)
arg_parser.add_argument(
"--immutable_weights",
action="store_true",
help="Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored.",
)
arg_parser.add_argument(
"--strip_engine_weights",
action="store_true",
help="Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required.",
)
arg_parser.add_argument(
"--refit_identical_engine_weights",
action="store_true",
help="Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs.",
)
arg_parser.add_argument(
"--cache_built_engines",
action="store_true",
help="Whether to save the compiled TRT engines to storage.",
)
arg_parser.add_argument(
"--reuse_cached_engines",
action="store_true",
help="Whether to load the compiled TRT engines from storage.",
)
args = arg_parser.parse_args()

# Create random input tensor of certain size
Expand All @@ -605,9 +637,9 @@ def run(
# Load PyTorch Model, if provided
if len(model_name_torch) > 0 and os.path.exists(model_name_torch):
print("Loading user provided torch model: ", model_name_torch)
model_torch = torch.load(model_name_torch).eval()
model_torch = torch.load(model_name_torch).cuda().eval()
elif model_name_torch in BENCHMARK_MODELS:
model_torch = BENCHMARK_MODELS[model_name_torch]["model"].eval()
model_torch = BENCHMARK_MODELS[model_name_torch]["model"].cuda().eval()

# If neither model type was provided
if (model is None) and (model_torch is None):
Expand Down
Loading