Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# These owners will be the default owners for everything in the repo.
# Unless a later match takes precedence,they will be requested for review when someone opens a pull request.
* @mlcommons/endpoints-developers
* @mlcommons/endpoints-developers

/.github/CODEOWNERS @mlcommons/systems

Expand Down
13 changes: 6 additions & 7 deletions .github/workflows/cla.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@

name: "cla-bot"
on:
issue_comment:
types: [created]
pull_request_target:
types: [opened,closed,synchronize]
types: [opened, closed, synchronize]

jobs:
cla-check:
Expand All @@ -17,16 +16,16 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# the below token should have repo scope and must be manually added by you in the repository's secret
PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
PERSONAL_ACCESS_TOKEN: ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
with:
path-to-signatures: 'cla-bot/v1/cla.json'
path-to-signatures: "cla-bot/v1/cla.json"
# branch should not be protected
branch: 'main'
branch: "main"
allowlist: user1,bot*
remote-organization-name: mlcommons
remote-repository-name: systems
#below are the optional inputs - If the optional inputs are not given, then default values will be taken

#below are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
#remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
#create-file-commit-message: 'For example: Creating file for storing CLA Signatures'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
pip install pre-commit

- name: Run pre-commit
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:

- name: Run tests
run: |
pytest -xv -m "not slow" --cov=src --cov-report=xml --cov-report=html
pytest -xv -m "not slow and not performance" --cov=src --cov-report=xml --cov-report=html

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Contributing

The best way to contribute to the MLCommons is to get involved with one of our many project communities. You can find more information about getting involved with MLCommons [here](https://mlcommons.org/community/).
The best way to contribute to the MLCommons is to get involved with one of our many project communities. You can find more information about getting involved with MLCommons [here](https://mlcommons.org/community/).

Generally we encourage people to become MLCommons members if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.

Expand Down
345 changes: 172 additions & 173 deletions LICENSE.md

Large diffs are not rendered by default.

56 changes: 32 additions & 24 deletions src/inference_endpoint/load_generator/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 +67,39 @@ def _run_test(
tokenizer_override: AutoTokenizer | None = None,
):
with self.event_recorder:
EventRecorder.record_event(SessionEvent.TEST_STARTED, time.monotonic_ns())
for issued_sample in load_generator:
# In the future, we'll want to push this to some thread or process that
# performs output verification / accuracy checks.
self.sample_uuid_map[issued_sample.sample.uuid] = issued_sample

self.event_recorder.should_check_idle = True
EventRecorder.record_event(SessionEvent.LOADGEN_STOP, time.monotonic_ns())
start_time = time.monotonic()
while self.event_recorder.n_inflight_samples != 0:
if (
max_shutdown_timeout_s is not None
and time.monotonic() - start_time > max_shutdown_timeout_s
):
raise TimeoutError(
f"Max shutdown timeout of {max_shutdown_timeout_s}s reached"
)
self.end_event.wait(timeout=10.0)
self.logger.info(
f"Waiting for the test to end... {self.event_recorder.n_inflight_samples} samples remaining"
try:
EventRecorder.record_event(
SessionEvent.TEST_STARTED, time.monotonic_ns()
)

if stop_sample_issuer_on_test_end:
load_generator.sample_issuer.shutdown()
EventRecorder.record_event(SessionEvent.TEST_ENDED, time.monotonic_ns())
for issued_sample in load_generator:
# In the future, we'll want to push this to some thread or process that
# performs output verification / accuracy checks.
self.sample_uuid_map[issued_sample.sample.uuid] = issued_sample

self.event_recorder.should_check_idle = True
EventRecorder.record_event(
SessionEvent.LOADGEN_STOP, time.monotonic_ns()
)
start_time = time.monotonic()
while self.event_recorder.n_inflight_samples != 0:
if (
max_shutdown_timeout_s is not None
and time.monotonic() - start_time > max_shutdown_timeout_s
):
raise TimeoutError(
f"Max shutdown timeout of {max_shutdown_timeout_s}s reached"
)
self.end_event.wait(timeout=10.0)
self.logger.info(
f"Waiting for the test to end... {self.event_recorder.n_inflight_samples} samples remaining"
)
except Exception as e:
logger.error(f"Error running benchmark session: {e}")
raise e
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using raise e resets the stack trace to this line, which can hide the original location where the exception was raised. For better debuggability, it's preferable to use a bare raise statement inside an except block. This re-raises the caught exception while preserving its full original traceback.

Suggested change
raise e
raise

finally:
if stop_sample_issuer_on_test_end:
load_generator.sample_issuer.shutdown()
EventRecorder.record_event(SessionEvent.TEST_ENDED, time.monotonic_ns())

self.event_recorder.wait_for_writes()

Expand Down
10 changes: 5 additions & 5 deletions tests/unit/config/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
OSLDistribution,
OSLDistributionType,
SubmissionReference,
TestType,
)
from inference_endpoint.config.schema import TestType as BenchmarkTestType


class TestOSLDistribution:
Expand Down Expand Up @@ -105,19 +105,19 @@ def test_minimal_config(self):
"""Test minimal valid configuration."""
config = BenchmarkConfig(
name="test",
type=TestType.OFFLINE,
type=BenchmarkTestType.OFFLINE,
datasets=[{"name": "test", "type": "performance", "path": "test.pkl"}],
)
assert config.name == "test"
assert config.type == TestType.OFFLINE
assert config.type == BenchmarkTestType.OFFLINE
assert len(config.datasets) == 1

def test_submission_config(self):
"""Test official submission configuration."""
config = BenchmarkConfig(
name="submission",
version="1.0",
type=TestType.SUBMISSION,
type=BenchmarkTestType.SUBMISSION,
submission_ref=SubmissionReference(
model="llama-2-70b", ruleset="mlperf-inference-v6.0"
),
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_multiple_accuracy_datasets(self):
"""Test config with multiple accuracy datasets."""
config = BenchmarkConfig(
name="multi-acc",
type=TestType.SUBMISSION,
type=BenchmarkTestType.SUBMISSION,
datasets=[
{
"name": "gpqa",
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/config/test_yaml_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
LoadPattern,
LoadPatternType,
Settings,
TestType,
)
from inference_endpoint.config.schema import TestType as BenchmarkTestType
from inference_endpoint.config.yaml_loader import ConfigError, ConfigLoader


Expand Down Expand Up @@ -69,7 +69,7 @@ def test_load_valid_yaml(self, tmp_path):

config = ConfigLoader.load_yaml(config_file)
assert config.name == "test-config"
assert config.type == TestType.OFFLINE
assert config.type == BenchmarkTestType.OFFLINE
assert len(config.datasets) == 1

def test_load_nonexistent_file(self):
Expand All @@ -87,15 +87,15 @@ def test_load_invalid_yaml(self, tmp_path):

def test_create_default_offline_config(self):
"""Test creating default offline config."""
config = BenchmarkConfig.create_default_config(TestType.OFFLINE)
config = BenchmarkConfig.create_default_config(BenchmarkTestType.OFFLINE)
assert isinstance(config, BenchmarkConfig)
assert config.settings.load_pattern.type == LoadPatternType.MAX_THROUGHPUT
assert config.settings.runtime.min_duration_ms == 600000
assert config.settings.client.workers == 4

def test_create_default_online_config(self):
"""Test creating default online config."""
config = BenchmarkConfig.create_default_config(TestType.ONLINE)
config = BenchmarkConfig.create_default_config(BenchmarkTestType.ONLINE)
assert isinstance(config, BenchmarkConfig)
assert config.settings.load_pattern.type == LoadPatternType.POISSON
assert config.settings.load_pattern.target_qps == 10.0
Expand All @@ -104,7 +104,7 @@ def test_create_default_online_config(self):
def test_serialize_deserialize_roundtrip(self, tmp_path):
"""Test BenchmarkConfig.to_yaml_file() and from_yaml_file() roundtrip."""
# Create a config
original = BenchmarkConfig.create_default_config(TestType.OFFLINE)
original = BenchmarkConfig.create_default_config(BenchmarkTestType.OFFLINE)

# Save to YAML
yaml_file = tmp_path / "test_config.yaml"
Expand All @@ -122,7 +122,7 @@ def test_serialize_deserialize_roundtrip(self, tmp_path):

def test_to_yaml_file_creates_directory(self, tmp_path):
"""Test that to_yaml_file creates parent directories."""
config = BenchmarkConfig.create_default_config(TestType.ONLINE)
config = BenchmarkConfig.create_default_config(BenchmarkTestType.ONLINE)

# Save to nested path that doesn't exist
nested_path = tmp_path / "subdir" / "nested" / "config.yaml"
Expand All @@ -138,7 +138,7 @@ def test_validate_concurrency_error_when_insufficient(self):
# Create a BenchmarkConfig with insufficient max_concurrency
config = BenchmarkConfig(
name="test",
type=TestType.ONLINE,
type=BenchmarkTestType.ONLINE,
datasets=[],
endpoint_config=EndpointConfig(endpoint="http://test:8000"),
settings=Settings(
Expand All @@ -163,7 +163,7 @@ def test_validate_concurrency_sufficient(self):
"""Test validation passes when max_concurrency >= target_concurrency."""
config = BenchmarkConfig(
name="test",
type=TestType.ONLINE,
type=BenchmarkTestType.ONLINE,
datasets=[],
endpoint_config=EndpointConfig(endpoint="http://test:8000"),
settings=Settings(
Expand Down
Loading