mlcommons · arjunsuresh · Nov 17, 2024 · Nov 16, 2024 · Nov 16, 2024 · Nov 16, 2024
@@ -6,7 +6,7 @@ on:
 
 jobs:
   run_amd:
-      if: github.repository_owner == 'gateoverflow'
+      if: github.repository_owner == 'gateoverflow_off'
       runs-on: [ self-hosted, linux, x64, GO-spr ]
       strategy:
         fail-fast: false

@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "54 11 * * *" #to be adjusted
+    - cron: "54 22 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
@@ -19,6 +19,12 @@ jobs:
           system: [ "GO-spr", "phoenix", "i9" ] 
           python-version: [ "3.12" ]
           model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9" ]
+          exclude:
+           - system: i9
+             model: gptj-99.9
+           - system: phoenix
+             model: gptj-99.9
+
       steps:
       - name: Test MLPerf Inference NVIDIA ${{ matrix.model }}
         run: |

@@ -91,9 +91,6 @@ def preprocess(i):
 
     RUN_CMD = ""
     state['RUN'] = {}
-    test_list = ["TEST01", "TEST04", "TEST05"]
-    if env['CM_MODEL'] in ["rnnt", "bert-99", "bert-99.9", "dlrm-99", "dlrm-99.9", "3d-unet-99", "3d-unet-99.9"]:
-        test_list.remove("TEST04")
 
     scenario = env['CM_MLPERF_LOADGEN_SCENARIO']
     state['RUN'][scenario] = {}

@@ -331,6 +331,18 @@ variations:
 
 
 
+  nvidia-original,gptj_:
+    env:
+      BUILD_TRTLLM: 1
+
+  nvidia-original,llama2-70b_:
+    env:
+      BUILD_TRTLLM: 1
+
+  nvidia-original,mixtral-8x7b:
+    env:
+      BUILD_TRTLLM: 1
+
   nvidia-original,r4.1-dev_default,gptj_:
     docker:
       image_name: mlperf-inference-nvidia-v4.1-dev-llm
@@ -352,8 +364,6 @@ variations:
             _tp-size.:
               - CM_NVIDIA_TP_SIZE
 
-    env:
-      BUILD_TRTLLM: 1
 
   nvidia-original,r4.1-dev_default,llama2-70b_:
     docker:

@@ -54,13 +54,6 @@ def preprocess(i):
 
     RUN_CMD = ""
     state['RUN'] = {}
-    test_list = ["TEST01", "TEST04", "TEST05"]
-    if env['CM_MODEL'] in ["rnnt", "bert-99", "bert-99.9", "dlrm-v2-99", "dlrm-v2-99.9", "3d-unet-99", "3d-unet-99.9"]:
-        test_list.remove("TEST04")
-    if "gpt-" in env['CM_MODEL']:
-        test_list.remove("TEST05")
-        test_list.remove("TEST04")
-        test_list.remove("TEST01")
 
     scenario = env['CM_MLPERF_LOADGEN_SCENARIO']
     state['RUN'][scenario] = {}
@@ -189,8 +182,8 @@ def preprocess(i):
             test = env.get("CM_MLPERF_LOADGEN_COMPLIANCE_TEST", "TEST01")
             if test == "TEST01":
                 metric_value = str(float(metric_value) * float(env.get("CM_MLPERF_TEST01_SERVER_ADJUST_FACTOR", 0.96)))
-            if test == "TEST05":
-                metric_value = str(float(metric_value) * float(env.get("CM_MLPERF_TEST05_SERVER_ADJUST_FACTOR", 0.97)))
+            #if test == "TEST05":
+            #    metric_value = str(float(metric_value) * float(env.get("CM_MLPERF_TEST05_SERVER_ADJUST_FACTOR", 0.97)))
             if test == "TEST04":
                 metric_value = str(float(metric_value) * float(env.get("CM_MLPERF_TEST04_SERVER_ADJUST_FACTOR", 0.97)))
 

@@ -25,6 +25,6 @@ resnet50:
     target_qps: 35000.0
 retinanet:
   Offline:
-    target_qps: 700.0
+    target_qps: 850.0
   Server:
-    target_qps: 650.0
+    target_qps: 630.0
@@ -25,6 +25,6 @@ resnet50:
     target_qps: 35000.0
 retinanet:
   Offline:
-    target_qps: 700.0
+    target_qps: 850.0
   Server:
-    target_qps: 650.0
+    target_qps: 630.0
@@ -189,7 +189,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res,
         result['power'] = power_result
         result['power_efficiency'] = power_efficiency_result
 
-    compliance_list = [ "TEST01", "TEST05", "TEST04", "TEST06" ]
+    compliance_list = [ "TEST01", "TEST04", "TEST06" ]
     if division == "closed":
         for test in compliance_list:
             test_path = os.path.join(result_path, test)
@@ -244,7 +244,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res,
 def get_result_table(results):
 
 
-    headers = ["Model", "Scenario", "Accuracy", "Throughput", "Latency (in ms)", "Power Efficiency (in samples/J)", "TEST01", "TEST05", "TEST04"]
+    headers = ["Model", "Scenario", "Accuracy", "Throughput", "Latency (in ms)", "Power Efficiency (in samples/J)", "TEST01", "TEST04"]
     table = []
     for model in results:
         for scenario in results[model]:
@@ -286,7 +286,7 @@ def get_result_table(results):
                     row.append("-")
 
             val1 = results[model][scenario].get('TEST01')
-            val2 = results[model][scenario].get('TEST05')
+            #val2 = results[model][scenario].get('TEST05')
             val3 = results[model][scenario].get('TEST04')
 
             #if results[model][scenario].get('power','') != '':
@@ -296,27 +296,16 @@ def get_result_table(results):
                 if not results[model][scenario].get('power_valid', True):
                     val = "X "+val
                 row.append(val)
-            elif val1 or val2 or val3: #Don't output unless there are any further column data
+            elif val1 or val3: #Don't output unless there are any further column data
                 row.append(None)
 
             if val1:
                 row.append(val1)
-                if val2:
-                    row.append(val2)
-                    if val3:
-                        row.append(val3)
-                elif val3:
-                    row.append("missing")
+                if val3:
                     row.append(val3)
 
             else:
-                if val2:
-                    row.append("missing")
-                    row.append(val2)
-                    if val3:
-                        row.append(val3)
-                elif val3:
-                    row.append("missing")
+                if val3:
                     row.append("missing")
                     row.append(val3)