Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
from copy import deepcopy
Expand Down Expand Up @@ -132,9 +120,11 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]:
for result in top_results:
run_config = deepcopy(result.run_config())
model_parameters = self._get_model_parameters(model_name)
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
parameter_search = ParameterSearch(
config=self._config,
model_parameters=model_parameters,
perf_analyzer_flags=perf_analyzer_flags,
skip_parameter_sweep=True,
)
for parameter in parameter_search.search_parameters():
Expand All @@ -151,6 +141,12 @@ def _get_model_parameters(self, model_name: str) -> Dict:

return {}

def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
for model in self._models:
if model_name == model.model_name():
return model.perf_analyzer_flags()
return {}

def _set_parameter(
self, run_config: RunConfig, model_parameters: Dict, parameter: int
) -> RunConfig:
Expand Down
28 changes: 14 additions & 14 deletions model_analyzer/config/generate/model_profile_spec.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from copy import deepcopy
from typing import List
Expand All @@ -22,6 +10,7 @@
ConfigModelProfileSpec,
)
from model_analyzer.device.gpu_device import GPUDevice
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
from model_analyzer.triton.client.client import TritonClient
from model_analyzer.triton.model.model_config import ModelConfig

Expand Down Expand Up @@ -72,3 +61,14 @@ def supports_dynamic_batching(self) -> bool:
def is_ensemble(self) -> bool:
"""Returns true if the model is an ensemble"""
return "ensemble_scheduling" in self._default_model_config

def is_load_specified(self) -> bool:
"""
Returns true if the model's PA config has specified any of the
inference load args (such as concurrency). Else returns false
"""
load_args = PerfAnalyzerConfig.get_inference_load_args()
pa_flags = self.perf_analyzer_flags()
if pa_flags is None:
return False
return any(e in pa_flags for e in load_args)
36 changes: 14 additions & 22 deletions model_analyzer/config/generate/perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
from typing import Generator, List, Optional
Expand Down Expand Up @@ -169,10 +157,12 @@ def set_last_results(
self._parameter_results.extend(measurement)

def _create_parameter_list(self) -> List[int]:
# The two possible parameters are request rate or concurrency
# Concurrency is the default and will be used unless the user specifies
# request rate, either as a model parameter or a config option
if self._cli_config.is_request_rate_specified(self._model_parameters):
# Determines the inference load (concurrency or request-rate or request-intervals)
# and creates the list of values to use. If nothing is specified by the user, then
# concurrency will be used.
if "request-intervals" in self._perf_analyzer_flags:
return [self._perf_analyzer_flags["request-intervals"]]
elif self._cli_config.is_request_rate_specified(self._model_parameters):
return self._create_request_rate_list()
else:
return self._create_concurrency_list()
Expand Down Expand Up @@ -207,7 +197,7 @@ def _generate_perf_configs(self) -> None:
for params in utils.generate_parameter_combinations(
perf_config_non_parameter_values
):
configs_with_concurrency = []
configs_with_inference_load = []
for parameter in self._parameters:
new_perf_config = PerfAnalyzerConfig()

Expand All @@ -217,16 +207,18 @@ def _generate_perf_configs(self) -> None:

new_perf_config.update_config(params)

if self._cli_config.is_request_rate_specified(self._model_parameters):
if "request-intervals" in self._perf_analyzer_flags:
pass
elif self._cli_config.is_request_rate_specified(self._model_parameters):
new_perf_config.update_config({"request-rate-range": parameter})
else:
new_perf_config.update_config({"concurrency-range": parameter})

# User provided flags can override the search parameters
new_perf_config.update_config(self._perf_analyzer_flags)

configs_with_concurrency.append(new_perf_config)
self._configs.append(configs_with_concurrency)
configs_with_inference_load.append(new_perf_config)
self._configs.append(configs_with_inference_load)

def _create_non_parameter_perf_config_values(self) -> dict:
perf_config_values = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
from copy import deepcopy
from typing import Generator, List, Optional

from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
Expand All @@ -30,7 +17,6 @@
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.run.run_config import RunConfig
from model_analyzer.constants import LOGGER_NAME
from model_analyzer.result.parameter_search import ParameterSearch
from model_analyzer.result.result_manager import ResultManager
from model_analyzer.result.run_config_measurement import RunConfigMeasurement

Expand Down
44 changes: 16 additions & 28 deletions model_analyzer/config/generate/quick_run_config_generator.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
from sys import maxsize
Expand Down Expand Up @@ -507,13 +495,13 @@ def _get_next_perf_analyzer_config(

perf_analyzer_config.update_config_from_profile_config(model_name, self._config)

concurrency = self._calculate_concurrency(dimension_values)

perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"concurrency-range": concurrency,
}
perf_analyzer_config.update_config(perf_config_params)
if not model.is_load_specified():
concurrency = self._calculate_concurrency(dimension_values)
perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"concurrency-range": concurrency,
}
perf_analyzer_config.update_config(perf_config_params)

perf_analyzer_config.update_config(model.perf_analyzer_flags())
return perf_analyzer_config
Expand Down Expand Up @@ -703,13 +691,13 @@ def _create_default_perf_analyzer_config(
model_config.get_field("name"), self._config
)

default_concurrency = self._calculate_default_concurrency(model_config)

perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"concurrency-range": default_concurrency,
}
default_perf_analyzer_config.update_config(perf_config_params)
if not model.is_load_specified():
default_concurrency = self._calculate_default_concurrency(model_config)
perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"concurrency-range": default_concurrency,
}
default_perf_analyzer_config.update_config(perf_config_params)

default_perf_analyzer_config.update_config(model.perf_analyzer_flags())

Expand Down
38 changes: 23 additions & 15 deletions model_analyzer/perf_analyzer/perf_config.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from typing import List

Expand Down Expand Up @@ -98,6 +86,13 @@ class PerfAnalyzerConfig:
"collect-metrics",
]

# Only one of these args can be sent to PA, as each one controls the inference load in a different way
inference_load_args = [
"concurrency-range",
"request-rate-range",
"request-intervals",
]

def __init__(self):
"""
Construct a PerfAnalyzerConfig
Expand All @@ -108,7 +103,9 @@ def __init__(self):
self._options = {
"-m": None,
"-x": None,
"-b": None,
# Default to batch size of 1. This would be handled by PA if unspecified,
# but we want to be explicit so we can properly print/track values
"-b": 1,
"-u": None,
"-i": None,
"-f": None,
Expand Down Expand Up @@ -160,6 +157,16 @@ def additive_keys(cls):

return cls.additive_args[:]

@classmethod
def get_inference_load_args(cls):
"""
Returns
-------
list of str
The Perf Analyzer args that control the inference load
"""
return cls.inference_load_args

def update_config(self, params=None):
"""
Allows setting values from a params dict
Expand Down Expand Up @@ -275,6 +282,7 @@ def extract_model_specific_parameters(self):
"batch-size": self._options["-b"],
"concurrency-range": self._args["concurrency-range"],
"request-rate-range": self._args["request-rate-range"],
"request-intervals": self._args["request-intervals"],
}

@classmethod
Expand Down
Loading
Loading