Bring back TIMM model inductor CI test (pytorch#87730)

Summary: pytorch#87588 has solved the inductor compilation speed regression, so we can try to run TIMM models with fewer shards and also enable pretained model downloading which should resolve the flakyness we have seen previously. cc @jansel @mlazos @soumith @voznesenskym @yanboliang @penguinwu @anijain2305 Pull Request resolved: pytorch#87730 Approved by: https://github.com/anijain2305
sands-lab · Oct 26, 2022 · 57b36bf · 57b36bf
1 parent 85ffbed
commit 57b36bf
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 5 deletions.
diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
@@ -22,8 +22,13 @@ jobs:
       cuda-arch-list: 8.6
       test-matrix: |
         { include: [
-          { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 1, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 2, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 3, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 4, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 5, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 6, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 7, num_shards: 7, runner: "linux.g5.4xlarge.nvidia.gpu" },
         ]}
 
   linux-bionic-cuda11_6-py3_10-gcc7-inductor-test:

diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
@@ -281,7 +281,7 @@ test_inductor_timm_shard() {
   TEST_REPORTS_DIR=/tmp/test-reports
   mkdir -p "$TEST_REPORTS_DIR"
   python benchmarks/dynamo/timm_models.py --ci --training --accuracy \
-    --device cuda --inductor --float32 --total-partitions 8 --partition-id "$1" \
+    --device cuda --inductor --float32 --total-partitions 5 --partition-id "$1" \
     --output "$TEST_REPORTS_DIR"/inductor_timm_"$1".csv
   python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_timm_"$1".csv
 }
@@ -749,6 +749,13 @@ elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SH
   install_triton
   install_huggingface
   test_inductor_huggingface_shard 0
+elif [[ "${TEST_CONFIG}" == *inductor* && $SHARD_NUMBER -lt 8 && $NUM_TEST_SHARDS -gt 1 ]]; then
+  install_torchvision
+  install_filelock
+  install_triton
+  install_timm
+  id=$((SHARD_NUMBER-3))
+  test_inductor_timm_shard $id
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
   test_without_numpy
   install_torchvision

diff --git a/benchmarks/dynamo/timm_models.py b/benchmarks/dynamo/timm_models.py
@@ -205,8 +205,7 @@ def load_model(
                     drop_rate=0.0,
                     drop_path_rate=None,
                     drop_block_rate=None,
-                    # Skip downloading pretrained models for speedy CI
-                    pretrained=not self.args.ci,
+                    pretrained=True,
                     # global_pool=kwargs.pop('gp', 'fast'),
                     # num_classes=kwargs.pop('num_classes', None),
                     # drop_rate=kwargs.pop('drop', 0.),