test(integration): replica-auto-balance when disabled disk scheduling

ref: 6508 Signed-off-by: Chin-Ya Huang <chin-ya.huang@suse.com>
longhorn · Dec 8, 2023 · 61d6ce2 · 61d6ce2
1 parent 20daee3
commit 61d6ce2
Showing 1 changed file with 79 additions and 0 deletions.
diff --git a/manager/integration/tests/test_zone.py b/manager/integration/tests/test_zone.py
@@ -8,6 +8,7 @@
 from common import pvc, pod  # NOQA
 from common import volume_name # NOQA
 
+from common import cleanup_node_disks
 from common import get_self_host_id
 
 from common import create_and_wait_pod
@@ -503,6 +504,84 @@ def test_replica_auto_balance_zone_best_effort(client, core_api, volume_name):
     assert z3_r_count == 2
 
 
+def test_replica_auto_balance_when_disabled_disk_scheduling_in_zone(client, core_api, volume_name):  # NOQA
+    """
+    Scenario: replica auto-balance when disk scheduling is disabled on nodes
+              in a zone.
+
+    Issue: https://github.com/longhorn/longhorn/issues/6508
+
+    Given `replica-soft-anti-affinity` setting is `true`.
+    And node-1 is in zone-1.
+        node-2 is in zone-2.
+        node-3 is in zone-3.
+    And disk scheduling is disabled on node-3.
+    And create a volume with 3 replicas.
+    And attach the volume to test pod node.
+    And 3 replicas running in zone-1 and zone-2.
+        0 replicas running in zone-3.
+
+    When set `replica-auto-balance` to `best-effort`.
+
+    Then 3 replicas running in zone-1 and zone-2.
+         0 replicas running in zone-3.
+    And replica count remains stable across zones and nodes.
+    """
+    # Set `replica-soft-anti-affinity` to `true`.
+    update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true")
+
+    # Assign nodes to respective zones
+    node1, node2, node3 = client.list_node()
+    set_k8s_node_zone_label(core_api, node1.name, ZONE1)
+    set_k8s_node_zone_label(core_api, node2.name, ZONE2)
+    set_k8s_node_zone_label(core_api, node3.name, ZONE3)
+    wait_longhorn_node_zone_updated(client)
+
+    # Disable disk scheduling on node 3
+    cleanup_node_disks(client, node3.name)
+
+    # Create a volume with 3 replicas
+    num_of_replicas = 3
+    volume = client.create_volume(name=volume_name,
+                                  numberOfReplicas=num_of_replicas)
+
+    # Wait for the volume to detach and attach it to the test pod node
+    volume = wait_for_volume_detached(client, volume_name)
+    volume.attach(hostId=get_self_host_id())
+
+    # Define a function to assert replica count
+    def assert_replica_count(is_stable=False):
+        for _ in range(RETRY_COUNTS):
+            time.sleep(RETRY_INTERVAL)
+
+            zone3_replica_count = get_zone_replica_count(
+                client, volume_name, ZONE3, chk_running=True)
+            assert zone3_replica_count == 0
+
+            total_replica_count = \
+                get_zone_replica_count(
+                    client, volume_name, ZONE1, chk_running=True) + \
+                get_zone_replica_count(
+                    client, volume_name, ZONE2, chk_running=True)
+
+            if is_stable:
+                assert total_replica_count == num_of_replicas
+            elif total_replica_count == num_of_replicas:
+                break
+
+        assert total_replica_count == 3
+
+    # Perform the initial assertion to ensure the replica count is as expected
+    assert_replica_count()
+
+    # Update the replica-auto-balance setting to `best-effort`
+    update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort")
+
+    # Perform the final assertion to ensure the replica count is as expected,
+    # and stable after the setting update
+    assert_replica_count(is_stable=True)
+
+
 def test_replica_auto_balance_when_replica_on_unschedulable_node(client, core_api, volume_name, request):  # NOQA
     """
     Scenario: replica auto-balance when replica already running on