mlcommons · arekay-nv · Nov 6, 2025 · Nov 3, 2025 · Nov 6, 2025 · Nov 6, 2025
@@ -1,6 +1,6 @@
 # These owners will be the default owners for everything in the repo.
 # Unless a later match takes precedence,they will be requested for review when someone opens a pull request.
-* @mlcommons/endpoints-developers 
+* @mlcommons/endpoints-developers
 
 /.github/CODEOWNERS @mlcommons/systems
 

@@ -1,10 +1,9 @@
-
 name: "cla-bot"
 on:
   issue_comment:
     types: [created]
   pull_request_target:
-    types: [opened,closed,synchronize]
+    types: [opened, closed, synchronize]
 
 jobs:
   cla-check:
@@ -17,16 +16,16 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           # the below token should have repo scope and must be manually added by you in the repository's secret
-          PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
+          PERSONAL_ACCESS_TOKEN: ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
         with:
-          path-to-signatures: 'cla-bot/v1/cla.json'
+          path-to-signatures: "cla-bot/v1/cla.json"
           # branch should not be protected
-          branch: 'main'
+          branch: "main"
           allowlist: user1,bot*
           remote-organization-name: mlcommons
           remote-repository-name: systems
-          
-         #below are the optional inputs - If the optional inputs are not given, then default values will be taken
+
+          #below are the optional inputs - If the optional inputs are not given, then default values will be taken
           #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
           #remote-repository-name:  enter the  remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
           #create-file-commit-message: 'For example: Creating file for storing CLA Signatures'

@@ -22,6 +22,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
+          pip install -e .
           pip install pre-commit
 
       - name: Run pre-commit

@@ -29,7 +29,7 @@ jobs:
 
       - name: Run tests
         run: |
-          pytest -xv -m "not slow" --cov=src --cov-report=xml --cov-report=html
+          pytest -xv -m "not slow and not performance" --cov=src --cov-report=xml --cov-report=html
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

@@ -1,6 +1,6 @@
 ## Contributing
 
-The best way to contribute to the MLCommons is to get involved with one of our many project communities. You can find more information about getting involved with MLCommons [here](https://mlcommons.org/community/). 
+The best way to contribute to the MLCommons is to get involved with one of our many project communities. You can find more information about getting involved with MLCommons [here](https://mlcommons.org/community/).
 
 Generally we encourage people to become MLCommons members if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.
 

@@ -67,31 +67,39 @@ def _run_test(
         tokenizer_override: AutoTokenizer | None = None,
     ):
         with self.event_recorder:
-            EventRecorder.record_event(SessionEvent.TEST_STARTED, time.monotonic_ns())
-            for issued_sample in load_generator:
-                # In the future, we'll want to push this to some thread or process that
-                # performs output verification / accuracy checks.
-                self.sample_uuid_map[issued_sample.sample.uuid] = issued_sample
-
-            self.event_recorder.should_check_idle = True
-            EventRecorder.record_event(SessionEvent.LOADGEN_STOP, time.monotonic_ns())
-            start_time = time.monotonic()
-            while self.event_recorder.n_inflight_samples != 0:
-                if (
-                    max_shutdown_timeout_s is not None
-                    and time.monotonic() - start_time > max_shutdown_timeout_s
-                ):
-                    raise TimeoutError(
-                        f"Max shutdown timeout of {max_shutdown_timeout_s}s reached"
-                    )
-                self.end_event.wait(timeout=10.0)
-                self.logger.info(
-                    f"Waiting for the test to end... {self.event_recorder.n_inflight_samples} samples remaining"
+            try:
+                EventRecorder.record_event(
+                    SessionEvent.TEST_STARTED, time.monotonic_ns()
                 )
-
-            if stop_sample_issuer_on_test_end:
-                load_generator.sample_issuer.shutdown()
-            EventRecorder.record_event(SessionEvent.TEST_ENDED, time.monotonic_ns())
+                for issued_sample in load_generator:
+                    # In the future, we'll want to push this to some thread or process that
+                    # performs output verification / accuracy checks.
+                    self.sample_uuid_map[issued_sample.sample.uuid] = issued_sample
+
+                self.event_recorder.should_check_idle = True
+                EventRecorder.record_event(
+                    SessionEvent.LOADGEN_STOP, time.monotonic_ns()
+                )
+                start_time = time.monotonic()
+                while self.event_recorder.n_inflight_samples != 0:
+                    if (
+                        max_shutdown_timeout_s is not None
+                        and time.monotonic() - start_time > max_shutdown_timeout_s
+                    ):
+                        raise TimeoutError(
+                            f"Max shutdown timeout of {max_shutdown_timeout_s}s reached"
+                        )
+                    self.end_event.wait(timeout=10.0)
+                    self.logger.info(
+                        f"Waiting for the test to end... {self.event_recorder.n_inflight_samples} samples remaining"
+                    )
+            except Exception as e:
+                logger.error(f"Error running benchmark session: {e}")
+                raise e
-                raise e
+                raise
-                raise e
+                raise
+            finally:
+                if stop_sample_issuer_on_test_end:
+                    load_generator.sample_issuer.shutdown()
+                EventRecorder.record_event(SessionEvent.TEST_ENDED, time.monotonic_ns())
 
             self.event_recorder.wait_for_writes()
 

@@ -24,8 +24,8 @@
     OSLDistribution,
     OSLDistributionType,
     SubmissionReference,
-    TestType,
 )
+from inference_endpoint.config.schema import TestType as BenchmarkTestType
 
 
 class TestOSLDistribution:
@@ -105,19 +105,19 @@ def test_minimal_config(self):
         """Test minimal valid configuration."""
         config = BenchmarkConfig(
             name="test",
-            type=TestType.OFFLINE,
+            type=BenchmarkTestType.OFFLINE,
             datasets=[{"name": "test", "type": "performance", "path": "test.pkl"}],
         )
         assert config.name == "test"
-        assert config.type == TestType.OFFLINE
+        assert config.type == BenchmarkTestType.OFFLINE
         assert len(config.datasets) == 1
 
     def test_submission_config(self):
         """Test official submission configuration."""
         config = BenchmarkConfig(
             name="submission",
             version="1.0",
-            type=TestType.SUBMISSION,
+            type=BenchmarkTestType.SUBMISSION,
             submission_ref=SubmissionReference(
                 model="llama-2-70b", ruleset="mlperf-inference-v6.0"
             ),
@@ -146,7 +146,7 @@ def test_multiple_accuracy_datasets(self):
         """Test config with multiple accuracy datasets."""
         config = BenchmarkConfig(
             name="multi-acc",
-            type=TestType.SUBMISSION,
+            type=BenchmarkTestType.SUBMISSION,
             datasets=[
                 {
                     "name": "gpqa",

@@ -25,8 +25,8 @@
     LoadPattern,
     LoadPatternType,
     Settings,
-    TestType,
 )
+from inference_endpoint.config.schema import TestType as BenchmarkTestType
 from inference_endpoint.config.yaml_loader import ConfigError, ConfigLoader
 
 
@@ -69,7 +69,7 @@ def test_load_valid_yaml(self, tmp_path):
 
         config = ConfigLoader.load_yaml(config_file)
         assert config.name == "test-config"
-        assert config.type == TestType.OFFLINE
+        assert config.type == BenchmarkTestType.OFFLINE
         assert len(config.datasets) == 1
 
     def test_load_nonexistent_file(self):
@@ -87,15 +87,15 @@ def test_load_invalid_yaml(self, tmp_path):
 
     def test_create_default_offline_config(self):
         """Test creating default offline config."""
-        config = BenchmarkConfig.create_default_config(TestType.OFFLINE)
+        config = BenchmarkConfig.create_default_config(BenchmarkTestType.OFFLINE)
         assert isinstance(config, BenchmarkConfig)
         assert config.settings.load_pattern.type == LoadPatternType.MAX_THROUGHPUT
         assert config.settings.runtime.min_duration_ms == 600000
         assert config.settings.client.workers == 4
 
     def test_create_default_online_config(self):
         """Test creating default online config."""
-        config = BenchmarkConfig.create_default_config(TestType.ONLINE)
+        config = BenchmarkConfig.create_default_config(BenchmarkTestType.ONLINE)
         assert isinstance(config, BenchmarkConfig)
         assert config.settings.load_pattern.type == LoadPatternType.POISSON
         assert config.settings.load_pattern.target_qps == 10.0
@@ -104,7 +104,7 @@ def test_create_default_online_config(self):
     def test_serialize_deserialize_roundtrip(self, tmp_path):
         """Test BenchmarkConfig.to_yaml_file() and from_yaml_file() roundtrip."""
         # Create a config
-        original = BenchmarkConfig.create_default_config(TestType.OFFLINE)
+        original = BenchmarkConfig.create_default_config(BenchmarkTestType.OFFLINE)
 
         # Save to YAML
         yaml_file = tmp_path / "test_config.yaml"
@@ -122,7 +122,7 @@ def test_serialize_deserialize_roundtrip(self, tmp_path):
 
     def test_to_yaml_file_creates_directory(self, tmp_path):
         """Test that to_yaml_file creates parent directories."""
-        config = BenchmarkConfig.create_default_config(TestType.ONLINE)
+        config = BenchmarkConfig.create_default_config(BenchmarkTestType.ONLINE)
 
         # Save to nested path that doesn't exist
         nested_path = tmp_path / "subdir" / "nested" / "config.yaml"
@@ -138,7 +138,7 @@ def test_validate_concurrency_error_when_insufficient(self):
         # Create a BenchmarkConfig with insufficient max_concurrency
         config = BenchmarkConfig(
             name="test",
-            type=TestType.ONLINE,
+            type=BenchmarkTestType.ONLINE,
             datasets=[],
             endpoint_config=EndpointConfig(endpoint="http://test:8000"),
             settings=Settings(
@@ -163,7 +163,7 @@ def test_validate_concurrency_sufficient(self):
         """Test validation passes when max_concurrency >= target_concurrency."""
         config = BenchmarkConfig(
             name="test",
-            type=TestType.ONLINE,
+            type=BenchmarkTestType.ONLINE,
             datasets=[],
             endpoint_config=EndpointConfig(endpoint="http://test:8000"),
             settings=Settings(