debug: improve migration test (#11655)

Add unstaking and staking back to the db_migration test. Checking that #11569 would cause this version of test to fail. Still not covers GC failure of EpochInfo, but that's a good improvement on its own. Also, refactored to run nodes using `start_cluster`, as in other tests, instead of `subprocess.call`. This allows to override genesis and node configs.
near · Jun 24, 2024 · 4b92c88 · 4b92c88
1 parent 74ac5fe
commit 4b92c88
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 34 deletions.
diff --git a/pytest/lib/branches.py b/pytest/lib/branches.py
@@ -63,7 +63,7 @@ class Executables(typing.NamedTuple):
     def node_config(self) -> typing.Dict[str, typing.Any]:
         return {
             'local': True,
-            'neard_root': self.root,
+            'near_root': self.root,
             'binary_name': self.neard.name
         }
 

diff --git a/pytest/tests/sanity/db_migration.py b/pytest/tests/sanity/db_migration.py
@@ -5,24 +5,27 @@
 Makes sure that the node can still produce blocks.
 """
 
-import json
 import logging
-import os
 import sys
-import time
-import subprocess
-import base58
 import pathlib
 
 sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib'))
 
 import branches
 import cluster
-from transaction import sign_deploy_contract_tx, sign_function_call_tx
+from transaction import sign_deploy_contract_tx, sign_function_call_tx, sign_staking_tx
 import utils
 
 logging.basicConfig(level=logging.INFO)
 
+NUM_SHARDS = 4
+EPOCH_LENGTH = 5
+
+# Config to track all shards.
+node_config = {
+    "tracked_shards": list(range(NUM_SHARDS)),
+}
+
 
 def deploy_contract(node):
     hash_ = node.get_latest_block().hash_bytes
@@ -51,42 +54,68 @@ def send_some_tx(node):
     utils.wait_for_blocks(node, count=3)
 
 
+# Unstake and restake validator running `node` to ensure that some validator
+# kickout is recorded on DB.
+# Reproduces issue #11569.
+def unstake_and_stake(node, tx_sender_node):
+    account = tx_sender_node.get_account(node.signer_key.account_id)['result']
+    full_balance = int(account['amount']) + int(account['locked'])
+
+    logging.info(f'Unstaking {node.signer_key.account_id}...')
+    nonce = tx_sender_node.get_nonce_for_pk(node.signer_key.account_id,
+                                            node.signer_key.pk) + 10
+
+    hash_ = tx_sender_node.get_latest_block().hash_bytes
+    tx = sign_staking_tx(node.signer_key, node.validator_key, 0, nonce, hash_)
+
+    nonce += 10
+    res = tx_sender_node.send_tx_and_wait(tx, timeout=15)
+    assert 'error' not in res, res
+    assert 'Failure' not in res['result']['status'], res
+    utils.wait_for_blocks(tx_sender_node, count=EPOCH_LENGTH * 2)
+
+    logging.info(f'Restaking {node.signer_key.account_id}...')
+    tx = sign_staking_tx(node.signer_key, node.validator_key, full_balance // 2,
+                         nonce, hash_)
+    nonce += 10
+    res = tx_sender_node.send_tx_and_wait(tx, timeout=15)
+    assert 'error' not in res, res
+    assert 'Failure' not in res['result']['status'], res
+    utils.wait_for_blocks(tx_sender_node, count=EPOCH_LENGTH * 2)
+
+
 def main():
     executables = branches.prepare_ab_test()
     node_root = utils.get_near_tempdir('db_migration', clean=True)
 
     logging.info(f"The near root is {executables.stable.root}...")
     logging.info(f"The node root is {node_root}...")
 
-    # Init local node
-    subprocess.call((
-        executables.stable.neard,
-        "--home=%s" % node_root,
-        "init",
-        "--fast",
-    ))
-
-    # Adjust changes required since #7486.  This is needed because current
-    # stable release populates the deprecated migration configuration options.
-    # TODO(mina86): Remove this once we get stable release which doesn’t
-    # populate those fields by default.
-    config_path = node_root / 'config.json'
-    data = json.loads(config_path.read_text(encoding='utf-8'))
-    data.pop('db_migration_snapshot_path', None)
-    data.pop('use_db_migration_snapshot', None)
-    config_path.write_text(json.dumps(data), encoding='utf-8')
-
-    # Run stable node for few blocks.
-    logging.info("Starting the stable node...")
     config = executables.stable.node_config()
-    node = cluster.spin_up_node(config, executables.stable.root, str(node_root),
-                                0)
+    logging.info("Starting stable nodes...")
+    nodes = cluster.start_cluster(
+        2,
+        0,
+        NUM_SHARDS,
+        config,
+        [['epoch_length', EPOCH_LENGTH], [
+            "block_producer_kickout_threshold", 0
+        ], ["chunk_producer_kickout_threshold", 0]],
+        # Make sure nodes track all shards to:
+        # 1. Avoid state sync after restaking
+        # 2. Respond to all view queries
+        {
+            0: node_config,
+            1: node_config,
+        })
+    node = nodes[0]
 
     logging.info("Running the stable node...")
-    utils.wait_for_blocks(node, count=20)
+    utils.wait_for_blocks(node, count=EPOCH_LENGTH)
     logging.info("Blocks are being produced, sending some tx...")
     deploy_contract(node)
     send_some_tx(node)
+    unstake_and_stake(nodes[1], node)
 
     node.kill()
 
@@ -95,25 +124,24 @@ def main():
 
     # Run new node and verify it runs for a few more blocks.
     logging.info("Starting the current node...")
-    config = executables.current.node_config()
     node.near_root = executables.current.root
     node.binary_name = executables.current.neard
     node.start(boot_node=node)
 
     logging.info("Running the current node...")
-    utils.wait_for_blocks(node, count=20)
+    utils.wait_for_blocks(node, count=EPOCH_LENGTH * 4)
     logging.info("Blocks are being produced, sending some tx...")
     send_some_tx(node)
 
     logging.info(
-        "Currnet node has produced blocks... Stopping the current node... ")
+        "Current node has produced blocks... Stopping the current node... ")
 
     node.kill()
 
     logging.info("Restarting the current node...")
 
     node.start(boot_node=node)
-    utils.wait_for_blocks(node, count=20)
+    utils.wait_for_blocks(node, count=EPOCH_LENGTH * 4)
 
 
 if __name__ == "__main__":