From 4b92c88e6e7c0bb277f4013085c2e125931eafe7 Mon Sep 17 00:00:00 2001 From: Aleksandr Logunov Date: Mon, 24 Jun 2024 17:56:17 +0400 Subject: [PATCH] debug: improve migration test (#11655) Add unstaking and staking back to the db_migration test. Checking that #11569 would cause this version of test to fail. Still not covers GC failure of EpochInfo, but that's a good improvement on its own. Also, refactored to run nodes using `start_cluster`, as in other tests, instead of `subprocess.call`. This allows to override genesis and node configs. --- pytest/lib/branches.py | 2 +- pytest/tests/sanity/db_migration.py | 94 +++++++++++++++++++---------- 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/pytest/lib/branches.py b/pytest/lib/branches.py index 09fd857a0bb..3d435773811 100644 --- a/pytest/lib/branches.py +++ b/pytest/lib/branches.py @@ -63,7 +63,7 @@ class Executables(typing.NamedTuple): def node_config(self) -> typing.Dict[str, typing.Any]: return { 'local': True, - 'neard_root': self.root, + 'near_root': self.root, 'binary_name': self.neard.name } diff --git a/pytest/tests/sanity/db_migration.py b/pytest/tests/sanity/db_migration.py index d887efac60c..47a6d5dfaf2 100755 --- a/pytest/tests/sanity/db_migration.py +++ b/pytest/tests/sanity/db_migration.py @@ -5,24 +5,27 @@ Makes sure that the node can still produce blocks. """ -import json import logging -import os import sys -import time -import subprocess -import base58 import pathlib sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib')) import branches import cluster -from transaction import sign_deploy_contract_tx, sign_function_call_tx +from transaction import sign_deploy_contract_tx, sign_function_call_tx, sign_staking_tx import utils logging.basicConfig(level=logging.INFO) +NUM_SHARDS = 4 +EPOCH_LENGTH = 5 + +# Config to track all shards. +node_config = { + "tracked_shards": list(range(NUM_SHARDS)), +} + def deploy_contract(node): hash_ = node.get_latest_block().hash_bytes @@ -51,6 +54,36 @@ def send_some_tx(node): utils.wait_for_blocks(node, count=3) +# Unstake and restake validator running `node` to ensure that some validator +# kickout is recorded on DB. +# Reproduces issue #11569. +def unstake_and_stake(node, tx_sender_node): + account = tx_sender_node.get_account(node.signer_key.account_id)['result'] + full_balance = int(account['amount']) + int(account['locked']) + + logging.info(f'Unstaking {node.signer_key.account_id}...') + nonce = tx_sender_node.get_nonce_for_pk(node.signer_key.account_id, + node.signer_key.pk) + 10 + + hash_ = tx_sender_node.get_latest_block().hash_bytes + tx = sign_staking_tx(node.signer_key, node.validator_key, 0, nonce, hash_) + + nonce += 10 + res = tx_sender_node.send_tx_and_wait(tx, timeout=15) + assert 'error' not in res, res + assert 'Failure' not in res['result']['status'], res + utils.wait_for_blocks(tx_sender_node, count=EPOCH_LENGTH * 2) + + logging.info(f'Restaking {node.signer_key.account_id}...') + tx = sign_staking_tx(node.signer_key, node.validator_key, full_balance // 2, + nonce, hash_) + nonce += 10 + res = tx_sender_node.send_tx_and_wait(tx, timeout=15) + assert 'error' not in res, res + assert 'Failure' not in res['result']['status'], res + utils.wait_for_blocks(tx_sender_node, count=EPOCH_LENGTH * 2) + + def main(): executables = branches.prepare_ab_test() node_root = utils.get_near_tempdir('db_migration', clean=True) @@ -58,35 +91,31 @@ def main(): logging.info(f"The near root is {executables.stable.root}...") logging.info(f"The node root is {node_root}...") - # Init local node - subprocess.call(( - executables.stable.neard, - "--home=%s" % node_root, - "init", - "--fast", - )) - - # Adjust changes required since #7486. This is needed because current - # stable release populates the deprecated migration configuration options. - # TODO(mina86): Remove this once we get stable release which doesn’t - # populate those fields by default. - config_path = node_root / 'config.json' - data = json.loads(config_path.read_text(encoding='utf-8')) - data.pop('db_migration_snapshot_path', None) - data.pop('use_db_migration_snapshot', None) - config_path.write_text(json.dumps(data), encoding='utf-8') - - # Run stable node for few blocks. - logging.info("Starting the stable node...") config = executables.stable.node_config() - node = cluster.spin_up_node(config, executables.stable.root, str(node_root), - 0) + logging.info("Starting stable nodes...") + nodes = cluster.start_cluster( + 2, + 0, + NUM_SHARDS, + config, + [['epoch_length', EPOCH_LENGTH], [ + "block_producer_kickout_threshold", 0 + ], ["chunk_producer_kickout_threshold", 0]], + # Make sure nodes track all shards to: + # 1. Avoid state sync after restaking + # 2. Respond to all view queries + { + 0: node_config, + 1: node_config, + }) + node = nodes[0] logging.info("Running the stable node...") - utils.wait_for_blocks(node, count=20) + utils.wait_for_blocks(node, count=EPOCH_LENGTH) logging.info("Blocks are being produced, sending some tx...") deploy_contract(node) send_some_tx(node) + unstake_and_stake(nodes[1], node) node.kill() @@ -95,25 +124,24 @@ def main(): # Run new node and verify it runs for a few more blocks. logging.info("Starting the current node...") - config = executables.current.node_config() node.near_root = executables.current.root node.binary_name = executables.current.neard node.start(boot_node=node) logging.info("Running the current node...") - utils.wait_for_blocks(node, count=20) + utils.wait_for_blocks(node, count=EPOCH_LENGTH * 4) logging.info("Blocks are being produced, sending some tx...") send_some_tx(node) logging.info( - "Currnet node has produced blocks... Stopping the current node... ") + "Current node has produced blocks... Stopping the current node... ") node.kill() logging.info("Restarting the current node...") node.start(boot_node=node) - utils.wait_for_blocks(node, count=20) + utils.wait_for_blocks(node, count=EPOCH_LENGTH * 4) if __name__ == "__main__":