flexaihq · gfursin · Sep 26, 2024 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024
diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
@@ -0,0 +1,34 @@
+name: OpenAI Code Review
+
+on:
+  pull_request_target:
+    types: [opened, synchronize]
+    paths:
+      - 'automation/**'
+      - 'script/**'
+      - '!**.md'
+
+permissions:
+  issues: write
+  pull-requests: write
+
+jobs:
+  code_review:
+    runs-on: ubuntu-latest
+    if: github.repository_owner == 'gateoverflow' && github.event.pull_request.changed_files > 0
+    steps:
+      # Run code review via OpenAI 
+      # Step to run the OpenAI Code Review using the GATEOverflow action
+    - name: Run OpenAI Code Review
+      uses: GATEOverflow/genai-code-review@v1
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}  # GitHub token for authentication
+        openai_api_key: ${{ secrets.OPENAI_API_KEY }}  # OpenAI API key for accessing the GPT model
+        github_pr_id: ${{ github.event.pull_request.number }}  # ID of the pull request to review
+        openai_model: "gpt-4o"  # Model to use for the code review
+        openai_temperature: 0.5  # Temperature setting for the model's output
+        openai_max_tokens: 2048  # Maximum number of tokens for the model's response
+        mode: "files"  # Mode of review, can be "files" or "diff"
+        language: "en"  # Language for the review output
+        custom_prompt: "" # Optional custom prompt for the model
+      continue-on-error: true  # Allow the workflow to continue even if this step fails
diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml
@@ -4,22 +4,19 @@
 name: MLPerf inference GPT-J
 
 on:
-  push:
-    branches: [ "main", "dev", "mlperf-inference" ]
-    paths:
-      - '.github/workflows/test-mlperf-inference-gptj.yml'
-      - '**'
-      - '!**.md'
+  schedule:
+    - cron: "1 1 * * */3"
 
 jobs:
   build:
+    if: github.repository_owner == 'gateoverflow'
     runs-on: [ self-hosted, linux, x64 ]
     strategy:
       fail-fast: false
       matrix:
         python-version: [ "3.12" ]
         backend: [ "pytorch" ]
-        precision: [ "bfloat16" ]
+        precision: [ "float16" ]
 
     steps:
     - name: Install dependencies

diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -0,0 +1,47 @@
+name: MLPerf inference SDXL
+
+on:
+  schedule:
+    - cron: "1 2 * * *"
+
+jobs:
+  build_reference:
+    if: github.repository_owner == 'gateoverflow'
+    runs-on: [ self-hosted, linux, x64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        precision: [ "float16" ]
+    steps:
+    - name: Install dependencies
+      run: |
+        source gh_action/bin/deactivate || python3 -m venv gh_action
+        source gh_action/bin/activate
+        export CM_REPOS=$HOME/GH_CM
+        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+    - name: Test MLPerf Inference SDXL
+      run: |
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+
+  build_nvidia:
+      if: github.repository_owner == 'gateoverflow'
+      runs-on: [ self-hosted, linux, x64 ]
+      strategy:
+        fail-fast: false
+        matrix:
+          python-version: [ "3.12" ]
+          backend: [ "tensorrt" ]
+          precision: [ "float16" ]
+          implementation: [ "nvidia" ]
+      steps:
+      - name: Install dependencies
+        run: |
+          source gh_action/bin/deactivate || python3 -m venv gh_action
+          source gh_action/bin/activate
+          export CM_REPOS=$HOME/GH_CM
+          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+      - name: Test MLPerf Inference SDXL
+        run: |
+          cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
@@ -0,0 +1,56 @@
+name: MLPerf inference SDXL
+
+on:
+  schedule:
+    - cron: "43 1 * * *"
+
+jobs:
+  build_reference:
+    if: github.repository_owner == 'gateoverflow'
+    runs-on: [ self-hosted, linux, x64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        precision: [ "float16" ]
+        device: [ "cuda" ]
+    steps:
+    - name: Install dependencies
+      run: |
+        source gh_action/bin/deactivate || python3 -m venv gh_action
+        source gh_action/bin/activate
+        export CM_REPOS=$HOME/GH_CM
+        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+    - name: Test MLPerf Inference reference SDXL SCC 
+      env:
+        GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+      run: |
+        cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
+        cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
+
+  build_nvidia:
+      if: github.repository_owner == 'gateoverflow'
+      runs-on: [ self-hosted, linux, x64 ]
+      strategy:
+        fail-fast: false
+        matrix:
+          python-version: [ "3.12" ]
+          backend: [ "tensorrt" ]
+          precision: [ "float16" ]
+          implementation: [ "nvidia" ]
+      steps:
+      - name: Install dependencies
+        run: |
+          source gh_action/bin/deactivate || python3 -m venv gh_action
+          source gh_action/bin/activate
+          export CM_REPOS=$HOME/GH_CM
+          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+      - name: Test MLPerf Inference NVIDIA SDXL SCC
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+        run: |
+          cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
+          cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
+          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -185,8 +185,9 @@ deps:
   ## Pytorch (CPU)
   - tags: get,generic-python-lib,_torch
     names:
-    - ml-engine-pytorch
-    - pytorch
+      - torch
+      - ml-engine-pytorch
+      - pytorch
     skip_if_env:
       CM_MODEL:
         - dlrm-v2-99
@@ -216,6 +217,7 @@ deps:
   - tags: get,generic-python-lib,_torchvision
     names:
     - ml-engine-torchvision
+    - torchvision
     skip_if_env:
       CM_MODEL:
         - dlrm-v2-99
@@ -231,6 +233,7 @@ deps:
   - tags: get,generic-python-lib,_torchvision_cuda
     names:
     - ml-engine-torchvision
+    - torchvision
     enable_if_env:
       CM_MLPERF_BACKEND:
       - pytorch
@@ -695,6 +698,8 @@ variations:
     add_deps_recursive:
       pytorch:
         tags: _rocm
+      torchvision:
+        tags: _rocm
 
   rocm,sdxl:
     add_deps:
@@ -834,6 +839,9 @@ variations:
       MLPERF_TVM_TORCH_QUANTIZED_ENGINE: qnnpack
     deps:
     - tags: get,generic-python-lib,_torch
+      names:
+        - torch
+        - pytorch
     - tags: get,tvm
       names:
       - tvm
@@ -861,7 +869,6 @@ variations:
 
   gptj_:
     deps:
-    - tags: get,generic-python-lib,_torch
     - tags: get,generic-python-lib,_package.datasets
     - tags: get,generic-python-lib,_package.attrs
     - tags: get,generic-python-lib,_package.accelerate
@@ -1095,6 +1102,10 @@ variations:
       - dlrm-src
     # to force the version
     - tags: get,generic-python-lib,_torch
+      names:
+        - torch
+        - pytorch
+        - ml-engine-pytorch
       version: "1.13.1"
     - tags: get,generic-python-lib,_mlperf_logging
     - tags: get,generic-python-lib,_opencv-python
@@ -1247,9 +1258,9 @@ variations:
   bfloat16:
     group: precision
     add_deps_recursive:
-      ml-model-bfloat16:
+      ml-model-float16:
         tags:
-          _fp32
+          _fp16
     env:
       CM_MLPERF_QUANTIZATION: off
       CM_MLPERF_MODEL_PRECISION: bfloat16

diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py
@@ -296,6 +296,8 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
                  scenario_extra_options + mode_extra_options + \
                 " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \
                 " --model-path " + env['CM_ML_MODEL_PATH']
+        if env.get('CM_COCO2014_SAMPLE_ID_PATH','') != '':
+            cmd += " --ids-path " + env['CM_COCO2014_SAMPLE_ID_PATH']
 
     elif "llama2-70b" in env['CM_MODEL']:
         env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b")

diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -423,7 +423,7 @@ variations:
     group: model
     env:
       CM_MODEL: stable-diffusion-xl
-      CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
+      CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
       CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion"
       CM_ML_MODEL_INPUTS_DATA_TYPE: int32
       CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8
@@ -878,6 +878,8 @@ variations:
         tags: build,nvidia,inference,server
 
       - tags: reproduce,mlperf,inference,nvidia,harness,_preprocess_data
+        names:
+          - nvidia-preprocess-data
         inherit_variation_tags: true
         force_cache: true
         skip_inherit_variation_groups:
@@ -988,6 +990,8 @@ variations:
 
       - tags: reproduce,mlperf,inference,nvidia,harness,_preprocess_data
         inherit_variation_tags: true
+        names:
+          - nvidia-preprocess-data
         skip_inherit_variation_groups:
           - run-mode
           - loadgen-scenario

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
@@ -73,10 +73,11 @@ def preprocess(i):
     elif "stable-diffusion" in env["CM_MODEL"]:
         target_data_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'data', 'coco', 'SDXL')
         if not os.path.exists(target_data_path):
-            cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'")
+            os.makedirs(target_data_path)
+            #cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'")
             env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes'
-            cmds.append(f"cp -r \${CM_DATASET_PATH_ROOT}/captions/captions.tsv {target_data_path}/captions_5k_final.tsv" )
-            cmds.append(f"cp -r \${CM_DATASET_PATH_ROOT}/latents/latents.pt {target_data_path}/latents.pt" )
+            cmds.append(f"cp -r \$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv" )
+            cmds.append(f"cp -r \$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt" )
         fp16_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL', 'official_pytorch', 'fp16', 'stable_diffusion_fp16')
 
         if not os.path.exists(os.path.dirname(fp16_model_path)):

diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
@@ -385,6 +385,7 @@ variations:
       CM_IMAGENET_ACCURACY_DTYPE: int32
       CM_CNNDM_ACCURACY_DTYPE: int32
       CM_LIBRISPEECH_ACCURACY_DTYPE: int8
+      CM_DOCKER_USE_VIRTUAL_PYTHON: no
     prehook_deps:
       - names:
          - nvidia-original-mlperf-inference
@@ -1162,7 +1163,7 @@ variations:
       mlperf-inference-implementation:
         tags: _cuda
     deps:
-      - tags: get,cuda-devices
+      - tags: get,cuda-devices,_with-pycuda
         skip_if_env:
           CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
             - "yes"

diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py
@@ -180,8 +180,11 @@ def preprocess(i):
 
     f.write(EOL+'# Install python packages' + EOL)
     python = get_value(env, config, 'PYTHON', 'CM_DOCKERFILE_PYTHON')
-    f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
-    f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
+
+    docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes")
+    if str(docker_use_virtual_python).lower() not in [ "no", "0", "false"]:
+        f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
+        f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
     #f.write('RUN . /opt/venv/cm/bin/activate' + EOL)
     f.write('RUN {} -m pip install '.format(python) + " ".join(get_value(env, config, 'python-packages')) + ' ' + pip_extra_flags + ' ' + EOL)
 

diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml b/script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml
@@ -10,6 +10,8 @@ tags:
 - mlperf
 - inference
 uid: bb41f6e3608e4e8a
+input_mapping:
+  extra_cache_rm_tags: CM_CLEAN_EXTRA_CACHE_RM_TAGS
 deps:
   # Get Nvidia scratch space where data and models get downloaded
   - tags: get,mlperf,inference,nvidia,scratch,space

diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py
@@ -16,22 +16,29 @@ def preprocess(i):
 
     clean_cmd = ''
     cache_rm_tags = ''
+    extra_cache_rm_tags = env.get('CM_CLEAN_EXTRA_CACHE_RM_TAGS', '')
 
     if env.get('CM_MODEL', '') == 'sdxl':
         if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'downloaded_data':
             clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "data", "coco", "SDXL")} """
-            cache_rm_tags  = "nvidia-harness,_preprocessed_data,_sdxl"
+            cache_rm_tags  = "nvidia-harness,_preprocess_data,_sdxl"
         if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'preprocessed_data':
             clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "preprocessed_data", "coco2014-tokenized-sdxl")} """
-            cache_rm_tags  = "nvidia-harness,_preprocessed_data,_sdxl"
+            cache_rm_tags  = "nvidia-harness,_preprocess_data,_sdxl"
 
-    if clean_cmd != '':
-        env['CM_RUN_CMD'] = clean_cmd
+    cache_rm_tags = cache_rm_tags + extra_cache_rm_tags
 
     if cache_rm_tags:
-        r = cm.access({'action': 'rm', 'automation': 'cache', 'tags': cache_rm_tags})
+        r = cm.access({'action': 'rm', 'automation': 'cache', 'tags': cache_rm_tags, 'f': True})
+        print(r)
         if r['return'] != 0 and r['return'] != 16: ## ignore missing ones
             return r
+        if r['return'] == 0: # cache entry found
+            if clean_cmd != '':
+                env['CM_RUN_CMD'] = clean_cmd
+    else:
+        if clean_cmd != '':
+            env['CM_RUN_CMD'] = clean_cmd
 
     return {'return':0}
 

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
@@ -108,6 +108,8 @@ def preprocess(i):
                 elif "no such file" in checksum_result.stderr.lower():
                     #print(f"No file {env['CM_DOWNLOAD_FILENAME']}. Downloading through cmutil.")
                     cmutil_require_download = 1
+                elif checksum_result.returncode == 1:
+                    return {"return":1, "error":f"Error while checking checksum: {checksum_result.stderr}"}
                 else:
                     print(f"File {env['CM_DOWNLOAD_FILENAME']} already present, original checksum and computed checksum matches! Skipping Download..")
             else: