Azure · wangzelin007 · Apr 20, 2022 · Apr 2, 2022 · Apr 2, 2022 · Apr 2, 2022
diff --git a/.azure-pipelines/templates/automation_test.yml b/.azure-pipelines/templates/automation_test.yml
@@ -7,6 +7,11 @@ parameters:
 - name: profile
   type: string
   default: latest
+- name: instance
+  type: string
+  default: '1_1'
+  # 8_3 means we have 8 instances totally, and now we are scheduling modules on third instance
+  # 1_1 means we only have 1 instance, so we don't need to schedule modules
 - name: fullTest
   displayName: Run full test?
   type: boolean
@@ -56,7 +61,7 @@ steps:
         azdev test --no-exitfirst --repo=./ --src=HEAD --tgt=origin/$(System.PullRequest.TargetBranch) --cli-ci --profile ${{ parameters.profile }} --verbose --series --pytest-args "--durations=0"
       else
         echo "Running full test"
-        python scripts/ci/automation_full_test.py "${{ parameters.profile }}" "$serial_modules"
+        python scripts/ci/automation_full_test.py "${{ parameters.instance }}" "${{ parameters.profile }}" "$serial_modules"
       fi
     displayName: "azdev test"
     env:

diff --git a/azure-pipelines-full-tests.yml b/azure-pipelines-full-tests.yml
@@ -13,26 +13,6 @@ pr:
       - '*'
 
 jobs:
-- job: AutomationTest
-  displayName: Automation Test (Profile Latest)
-  timeoutInMinutes: 120
-  pool:
-    vmImage: 'ubuntu-20.04'
-  strategy:
-    matrix:
-      Python36:
-        python.version: '3.6'
-      Python38:
-        python.version: '3.8'
-      Python310:
-        python.version: '3.10'
-  steps:
-    - template: .azure-pipelines/templates/automation_test.yml
-      parameters:
-        pythonVersion: '$(python.version)'
-        profile: 'latest'
-        fullTest: true
-
 - job: AutomationTest20200901
   displayName: Automation Test (Profile 2020-09-01)
   timeoutInMinutes: 120
@@ -92,3 +72,99 @@ jobs:
         pythonVersion: '$(python.version)'
         profile: '2018-03-01-hybrid'
         fullTest: true
+
+- job: AutomationFullTestPython36ProfileLatest
+  displayName: Automation Full Test Python36 Profile Latest
+  timeoutInMinutes: 9999
+  strategy:
+    maxParallel: 8
+    matrix:
+      8_1:
+        Instance: 8_1
+      8_2:
+        Instance: 8_2
+      8_3:
+        Instance: 8_3
+      8_4:
+        Instance: 8_4
+      8_5:
+        Instance: 8_5
+      8_6:
+        Instance: 8_6
+      8_7:
+        Instance: 8_7
+      8_8:
+        Instance: 8_8
+  pool:
+    vmImage: 'ubuntu-20.04'
+  steps:
+    - template: .azure-pipelines/templates/automation_test.yml
+      parameters:
+        pythonVersion: '3.6'
+        profile: 'latest'
+        instance: $(Instance)
+        fullTest: true
+
+- job: AutomationFullTestPython38ProfileLatest
+  displayName: Automation Full Test Python38 Profile Latest
+  timeoutInMinutes: 9999
+  strategy:
+    maxParallel: 8
+    matrix:
+      8_1:
+        Instance: 8_1
+      8_2:
+        Instance: 8_2
+      8_3:
+        Instance: 8_3
+      8_4:
+        Instance: 8_4
+      8_5:
+        Instance: 8_5
+      8_6:
+        Instance: 8_6
+      8_7:
+        Instance: 8_7
+      8_8:
+        Instance: 8_8
+  pool:
+    vmImage: 'ubuntu-20.04'
+  steps:
+    - template: .azure-pipelines/templates/automation_test.yml
+      parameters:
+        pythonVersion: '3.8'
+        profile: 'latest'
+        instance: $(Instance)
+        fullTest: true
+
+- job: AutomationFullTestPython310ProfileLatest
+  displayName: Automation Full Test Python310 Profile Latest
+  timeoutInMinutes: 9999
+  strategy:
+    maxParallel: 8
+    matrix:
+      8_1:
+        Instance: 8_1
+      8_2:
+        Instance: 8_2
+      8_3:
+        Instance: 8_3
+      8_4:
+        Instance: 8_4
+      8_5:
+        Instance: 8_5
+      8_6:
+        Instance: 8_6
+      8_7:
+        Instance: 8_7
+      8_8:
+        Instance: 8_8
+  pool:
+    vmImage: 'ubuntu-20.04'
+  steps:
+    - template: .azure-pipelines/templates/automation_test.yml
+      parameters:
+        pythonVersion: '3.10'
+        profile: 'latest'
+        instance: $(Instance)
+        fullTest: true
@@ -15,28 +15,165 @@
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 logger.addHandler(ch)
-profile = sys.argv[1]
-serial_modules = sys.argv[2].split()
+
+instance_cnt, instance_idx = [int(i) for i in sys.argv[1].split('_')]
+profile = sys.argv[2]
+serial_modules = sys.argv[3].split()
+jobs = {
+            'acr': 45,
+            'acs': 62,
+            'advisor': 18,
+            'ams': 136,
+            'apim': 30,
+            'appconfig': 41,
+            'appservice': 150,  # series
+            # 'appservice': 157,  # parallel
+            'aro': 33,
+            'backup': 76,
+            'batch': 21,
+            'batchai': 24,
+            'billing': 21,
+            'botservice': 25,  # series
+            # 'botservice': 28,  # parallel
+            'cdn': 36,
+            'cloud': 18,  # series
+            # 'cloud': 22,  # parallel
+            'cognitiveservices': 24,
+            'config': 21,
+            'configure': 17,
+            'consumption': 21,
+            'container': 19,
+            'cosmosdb': 45,
+            'databoxedge': 25,
+            'deploymentmanager': 18,
+            'dla': 19,
+            'dls': 22,
+            'dms': 22,
+            'eventgrid': 24,
+            'eventhubs': 24,
+            'extension': 0,
+            'feedback': 31,
+            'find': 22,
+            'hdinsight': 34,
+            'identity': 18,
+            'interactive': 18,
+            'iot': 57,
+            'keyvault': 39,
+            'kusto': 23,
+            'lab': 19,
+            'managedservices': 18,
+            'maps': 19,
+            'marketplaceordering': 18,
+            'monitor': 66,
+            'natgateway': 22,
+            'netappfiles': 48,
+            'network': 364,  # series
+            # 'network': 182,  # parallel
+            'policyinsights': 20,
+            'privatedns': 29,
+            'profile': 20,
+            'rdbms': 89,
+            'redis': 31,
+            'relay': 22,
+            'reservations': 20,
+            'resource': 101,
+            'role': 38,
+            'search': 34,
+            'security': 23,
+            'servicebus': 24,
+            'serviceconnector': 56,
+            'servicefabric': 49,
+            'signalr': 20,
+            'sql': 117,
+            'sqlvm': 31,
+            'storage': 108,
+            'synapse': 45,
+            'util': 18,
+            'vm': 313,
+            'azure-cli': 16,
+            'azure-cli-core': 26,
+            'azure-cli-telemetry': 18,
+            'azure-cli-testsdk': 20,
+        }
 
 
 class AutomaticScheduling(object):
 
     def __init__(self):
+        """
+        self.jobs: Record the test time of each module
+        self.modules: All modules and core, ignore extensions
+        self.serial_modules: All modules which need to execute in serial mode
+        self.works: Record which modules each worker needs to test
+        self.instance_cnt:
+        The total number of concurrent automation full test pipeline instance with specify python version
+        Because we share the vm pool with azure-sdk team, so we can't set the number of concurrency arbitrarily
+        Best practice is to keep the number of concurrent tasks below 50
+        If you set a larger number of concurrency, it will cause many instances to be in the waiting state
+        And the network module has the largest number of test cases and can only be tested serially for now, so setting instance_cnt = 8 is sufficient
+        Total concurrent number: AutomationTest20200901 * 3 + AutomationTest20190301 * 3 + AutomationTest20180301 * 3 + AutomationFullTest * 8 * 3 (python_version) = 33
+        self.instance_idx:
+        The index of concurrent automation full test pipeline instance with specify python version
+        For example:
+        8_3 means we have 8 instances totally, and now we are scheduling modules on third instance
+        1_1 means we only have 1 instance, so we don't need to schedule modules
+        """
+        self.jobs = []
         self.modules = {}
         self.serial_modules = serial_modules
+        self.works = []
+        self.instance_cnt = instance_cnt
+        self.instance_idx = instance_idx
+        for i in range(self.instance_cnt):
+            worker = {}
+            self.works.append(worker)
         self.profile = profile
 
     def get_all_modules(self):
         result = get_path_table()
         # only get modules and core, ignore extensions
         self.modules = {**result['mod'], **result['core']}
 
-    def run_modules(self):
-        # divide all modules into parallel or serial execution
+    def append_new_modules(self):
+        # If add a new module, use average test time
+        avg_cost = int(sum(jobs.values()) / len(jobs.values()))
+        for module in self.modules:
+            if module not in jobs.keys():
+                jobs[module] = avg_cost
+        # sort jobs by time cost (desc)
+        self.jobs = sorted(jobs.items(), key=lambda item: -item[1])
+
+    def get_worker(self):
+        """
+        Use greedy algorithm distribute jobs to each worker
+        For each job, we assign it to the worker with the fewest jobs currently
+        :return worker number
+        """
+        for idx, worker in enumerate(self.works):
+            tmp_time = sum(worker.values()) if sum(worker.values()) else 0
+            if idx == 0:
+                worker_time = tmp_time
+                worker_num = idx
+            if tmp_time < worker_time:
+                worker_time = tmp_time
+                worker_num = idx
+        return worker_num
+
+    def get_instance_modules(self):
+        # get modules which need to execute in the pipeline instance with specific index
+        for k, v in self.jobs:
+            idx = self.get_worker()
+            self.works[idx][k] = v
+        # instance_idx: 1~n, python list index: 0~n-1
+        self.instance_idx -= 1
+        return self.works[self.instance_idx]
+
+    def run_instance_modules(self, instance_modules):
+        # divide the modules that the current instance needs to execute into parallel or serial execution
         error_flag = False
         serial_tests = []
         parallel_tests = []
-        for k, v in self.modules.items():
+        for k, v in instance_modules.items():
             if k in self.serial_modules:
                 serial_tests.append(k)
             else:
@@ -64,7 +201,9 @@ def main():
     logger.info("Start automation full test ...\n")
     autoschduling = AutomaticScheduling()
     autoschduling.get_all_modules()
-    sys.exit(1) if autoschduling.run_modules() else sys.exit(0)
+    autoschduling.append_new_modules()
+    instance_modules = autoschduling.get_instance_modules()
+    sys.exit(1) if autoschduling.run_instance_modules(instance_modules) else sys.exit(0)
 
 
 if __name__ == '__main__':