mlcommons · arjunsuresh · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "03 01 * * *" #to be adjusted
+    - cron: "03 12 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
@@ -36,6 +36,6 @@ jobs:
           pip install --upgrade cm4mlops
           pip install tabulate
 
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --adr.submission-checker-src.tags=_branch.dev --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
 
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
@@ -1692,6 +1692,7 @@ docker:
   pre_run_cmds:
     #- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update
     - cm pull repo
+    - cm rm cache --tags=inference,src -f
   mounts:
    - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}"
    - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}"

@@ -1,54 +1,42 @@
 3d-unet-99:
-  MultiStream:
-    target_latency: 80
   Offline:
-    target_qps: 1.0
-  Server:
-    target_qps: 1.0
+    target_qps: 4.0
   SingleStream:
     target_latency: 10
 3d-unet-99.9:
-  MultiStream:
-    target_latency: 80
   Offline:
-    target_qps: 1.0
-  Server:
-    target_qps: 1.0
+    target_qps: 4.0
   SingleStream:
     target_latency: 10
 bert-99:
-  MultiStream:
-    target_latency: 80
   Offline:
-    target_qps: 1.0
+    target_qps: 4000.0
   Server:
-    target_qps: 1.0
+    target_qps: 3800.0
   SingleStream:
-    target_latency: 10
+    target_latency: 1
 bert-99.9:
-  MultiStream:
-    target_latency: 80
   Offline:
-    target_qps: 1.0
+    target_qps: 2000.0
   Server:
-    target_qps: 1.0
+    target_qps: 2000.0
   SingleStream:
     target_latency: 10
 resnet50:
   MultiStream:
     target_latency: '432111'
   Offline:
-    target_qps: '37959.4'
+    target_qps: '42959.4'
   Server:
-    target_qps: 1.0
+    target_qps: 35000.0
   SingleStream:
     target_latency: '226895'
 retinanet:
   MultiStream:
     target_latency: 80
   Offline:
-    target_qps: 1.0
+    target_qps: 700.0
   Server:
-    target_qps: 1.0
+    target_qps: 650.0
   SingleStream:
     target_latency: 10
@@ -0,0 +1,42 @@
+3d-unet-99:
+  Offline:
+    target_qps: 4.0
+  SingleStream:
+    target_latency: 10
+3d-unet-99.9:
+  Offline:
+    target_qps: 4.0
+  SingleStream:
+    target_latency: 10
+bert-99:
+  Offline:
+    target_qps: 4000.0
+  Server:
+    target_qps: 3800.0
+  SingleStream:
+    target_latency: 1
+bert-99.9:
+  Offline:
+    target_qps: 2000.0
+  Server:
+    target_qps: 2000.0
+  SingleStream:
+    target_latency: 10
+resnet50:
+  MultiStream:
+    target_latency: '432111'
+  Offline:
+    target_qps: '42959.4'
+  Server:
+    target_qps: 35000.0
+  SingleStream:
+    target_latency: '226895'
+retinanet:
+  MultiStream:
+    target_latency: 80
+  Offline:
+    target_qps: 700.0
+  Server:
+    target_qps: 650.0
+  SingleStream:
+    target_latency: 10
@@ -42,6 +42,12 @@
       "skip_if_env": {
         "CM_MLPERF_INFERENCE_SUBMISSION_DIR": [ "on" ]
       }
+    },
+    {
+      "tags": "preprocess,mlperf,inference,submission",
+      "enable_if_env": {
+        "CM_TMP_MLPERF_INFERENCE_PREPROCESS_SUBMISSION": [ "on" ]
+      }
     }
   ],
   "post_deps": [
@@ -80,6 +86,8 @@
     "repo_name": "CM_MLPERF_RESULTS_GIT_REPO_NAME",
     "repo_owner": "CM_MLPERF_RESULTS_GIT_REPO_OWNER",
     "repo_branch": "CM_MLPERF_RESULTS_GIT_REPO_BRANCH",
+    "preprocess": "CM_TMP_MLPERF_INFERENCE_PREPROCESS_SUBMISSION",
+    "preprocess_submission": "CM_TMP_MLPERF_INFERENCE_PREPROCESS_SUBMISSION",
     "push_to_github": "CM_MLPERF_RESULT_PUSH_TO_GITHUB",
     "extra_model_benchmark_map": "CM_MLPERF_EXTRA_MODEL_MAPPING",
     "power": "CM_MLPERF_POWER",