diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index 71e16807f..05844a35f 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -8,10 +8,12 @@ This user is named "backup". """ +import json import logging +import re import subprocess import time -from typing import Dict +from typing import Dict, List from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer from charms.mongodb.v0.helpers import ( @@ -63,6 +65,8 @@ REMAPPING_PATTERN = r"\ABackup doesn't match current cluster topology - it has different replica set names. Extra shards in the backup will cause this, for a simple example. The extra/unknown replica set names found in the backup are: ([^,\s]+)([.] Backup has no data for the config server or sole replicaset)?\Z" PBM_STATUS_CMD = ["status", "-o", "json"] MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" +RESTORE_MAX_ATTEMPTS = 5 +RESTORE_ATTEMPT_COOLDOWN = 15 class ResyncError(Exception): @@ -77,6 +81,22 @@ class PBMBusyError(Exception): """Raised when PBM is busy and cannot run another operation.""" +class RestoreError(Exception): + """Raised when backup operation is failed.""" + + +def _restore_retry_before_sleep(retry_state) -> None: + logger.error( + f"Attempt {retry_state.attempt_number} failed. {RESTORE_MAX_ATTEMPTS - retry_state.attempt_number} attempts left. Retrying after {RESTORE_ATTEMPT_COOLDOWN} seconds." + ), + + +def _restore_stop_condition(retry_state) -> bool: + if isinstance(retry_state.outcome.exception(), RestoreError): + return True + return retry_state.attempt_number >= RESTORE_MAX_ATTEMPTS + + class MongoDBBackups(Object): """Manages MongoDB backups.""" @@ -143,7 +163,13 @@ def _on_create_backup_action(self, event) -> None: event.fail(f"Cannot create backup {pbm_status.message}.") return - # TODO create backup + try: + self.charm.run_pbm_command(["backup"]) + event.set_results({"backup-status": "backup started"}) + self.charm.unit.status = MaintenanceStatus("backup started/running") + except (subprocess.CalledProcessError, ExecError, Exception) as e: + event.fail(f"Failed to backup MongoDB with error: {str(e)}") + return def _on_list_backups_action(self, event) -> None: if self.model.get_relation(S3_RELATION) is None: @@ -164,7 +190,12 @@ def _on_list_backups_action(self, event) -> None: event.fail(f"Cannot list backups: {pbm_status.message}.") return - # TODO list backups + try: + formatted_list = self._generate_backup_list_output() + event.set_results({"backups": formatted_list}) + except (subprocess.CalledProcessError, ExecError) as e: + event.fail(f"Failed to list MongoDB backups with error: {str(e)}") + return def _on_restore_action(self, event) -> None: if self.model.get_relation(S3_RELATION) is None: @@ -185,7 +216,6 @@ def _on_restore_action(self, event) -> None: # cannot restore backup if pbm is not ready. This could be due to: resyncing, incompatible, # options, incorrect credentials, creating a backup, or already performing a restore. pbm_status = self._get_pbm_status() - # TOD check status self.charm.unit.status = pbm_status if isinstance(pbm_status, MaintenanceStatus): event.fail("Please wait for current backup/restore to finish.") @@ -198,7 +228,15 @@ def _on_restore_action(self, event) -> None: event.fail(f"Cannot restore backup {pbm_status.message}.") return - # TODO restore backup + # sometimes when we are trying to restore pmb can be resyncing, so we need to retry + try: + self._try_to_restore(backup_id) + event.set_results({"restore-status": "restore started"}) + self.charm.unit.status = MaintenanceStatus("restore started/running") + except ResyncError: + raise + except RestoreError as restore_error: + event.fail(str(restore_error)) # BEGIN: helper functions @@ -357,3 +395,121 @@ def _get_pbm_status(self) -> StatusBase: # necessary to parse the output logger.error(f"Failed to get pbm status: {e}") return BlockedStatus("PBM error") + + def _generate_backup_list_output(self) -> str: + """Generates a list of backups in a formatted table. + + List contains successful, failed, and in progress backups in order of ascending time. + + Raises ExecError if pbm command fails. + """ + backup_list = [] + pbm_status = self.charm.run_pbm_command(["status", "--out=json"]) + # processes finished and failed backups + pbm_status = json.loads(pbm_status) + backups = pbm_status["backups"]["snapshot"] or [] + for backup in backups: + backup_status = "finished" + if backup["status"] == "error": + # backups from a different cluster have an error status, but they should show as + # finished + if self._backup_from_different_cluster(backup.get("error", "")): + backup_status = "finished" + else: + # display reason for failure if available + backup_status = "failed: " + backup.get("error", "N/A") + if backup["status"] not in ["error", "done"]: + backup_status = "in progress" + backup_list.append((backup["name"], backup["type"], backup_status)) + + # process in progress backups + running_backup = pbm_status["running"] + if running_backup.get("type", None) == "backup": + # backups are sorted in reverse order + last_reported_backup = backup_list[0] + # pbm will occasionally report backups that are currently running as failed, so it is + # necessary to correct the backup list in this case. + if last_reported_backup[0] == running_backup["name"]: + backup_list[0] = (last_reported_backup[0], last_reported_backup[1], "in progress") + else: + backup_list.append((running_backup["name"], "logical", "in progress")) + + # sort by time and return formatted output + return self._format_backup_list(sorted(backup_list, key=lambda pair: pair[0])) + + def _format_backup_list(self, backup_list: List[str]) -> str: + """Formats provided list of backups as a table.""" + backups = ["{:<21s} | {:<12s} | {:s}".format("backup-id", "backup-type", "backup-status")] + + backups.append("-" * len(backups[0])) + for backup_id, backup_type, backup_status in backup_list: + backups.append( + "{:<21s} | {:<12s} | {:s}".format(backup_id, backup_type, backup_status) + ) + + return "\n".join(backups) + + def _backup_from_different_cluster(self, backup_status: str) -> bool: + """Returns if a given backup was made on a different cluster.""" + return re.search(REMAPPING_PATTERN, backup_status) is not None + + def _try_to_restore(self, backup_id: str) -> None: + for attempt in Retrying( + stop=_restore_stop_condition, + wait=wait_fixed(RESTORE_ATTEMPT_COOLDOWN), + reraise=True, + before_sleep=_restore_retry_before_sleep, + ): + with attempt: + try: + remapping_args = self._remap_replicaset(backup_id) + self.charm.run_pbm_restore_command(backup_id, remapping_args) + except (subprocess.CalledProcessError, ExecError) as e: + if type(e) == subprocess.CalledProcessError: + error_message = e.output.decode("utf-8") + else: + error_message = str(e.stderr) + fail_message = f"Failed to restore MongoDB with error: {str(e)}" + + if "Resync" in error_message: + raise ResyncError + + if f"backup '{backup_id}' not found" in error_message: + fail_message = f"Backup id: {backup_id} does not exist in list of backups, please check list-backups for the available backup_ids." + + raise RestoreError(fail_message) + + def _remap_replicaset(self, backup_id: str) -> str: + """Returns options for remapping a replica set during a cluster migration restore. + + Args: + backup_id: str of the backup to check for remapping + + Raises: CalledProcessError + """ + pbm_status = self.charm.run_pbm_command(PBM_STATUS_CMD) + pbm_status = json.loads(pbm_status) + + # grab the error status from the backup if present + backups = pbm_status["backups"]["snapshot"] or [] + backup_status = "" + for backup in backups: + if not backup_id == backup["name"]: + continue + + backup_status = backup.get("error", "") + break + + if not self._backup_from_different_cluster(backup_status): + return "" + + # TODO in the future when we support conf servers and shards this will need to be more + # comprehensive. + old_cluster_name = re.search(REMAPPING_PATTERN, backup_status).group(1) + current_cluster_name = self.charm.app.name + logger.debug( + "Replica set remapping is necessary for restore, old cluster name: %s ; new cluster name: %s", + old_cluster_name, + current_cluster_name, + ) + return f"--replset-remapping {current_cluster_name}={old_cluster_name}" diff --git a/src/charm.py b/src/charm.py index 1355b0fd5..26e8310b8 100755 --- a/src/charm.py +++ b/src/charm.py @@ -47,6 +47,7 @@ ActiveStatus, BlockedStatus, Container, + MaintenanceStatus, Relation, RelationDataContent, SecretNotFoundError, @@ -507,6 +508,12 @@ def _on_get_password(self, event: ActionEvent) -> None: def _on_set_password(self, event: ActionEvent) -> None: """Set the password for the specified user.""" + # changing the backup password while a backup/restore is in progress can be disastrous + pbm_status = self.backups._get_pbm_status() + if isinstance(pbm_status, MaintenanceStatus): + event.fail("Cannot change password while a backup/restore is in progress.") + return + # only leader can write the new password into peer relation. if not self.unit.is_leader(): event.fail("The action can be run only on leader unit.") @@ -685,7 +692,7 @@ def _get_mongodb_config_for_user( password = self.get_secret(APP_SCOPE, user.get_password_key_name()) if not password: raise MissingSecretError( - "Password for {APP_SCOPE}, {user.get_username()} couldn't be retrieved" + f"Password for '{APP_SCOPE}', '{user.get_username()}' couldn't be retrieved" ) else: return MongoDBConfiguration( @@ -1127,6 +1134,13 @@ def run_pbm_command(self, cmd: List[str]) -> str: stdout, _ = process.wait_output() return stdout + def run_pbm_restore_command(self, backup_id: str, remapping_args: str) -> str: + """Executes a restore command in the workload container.""" + restore_cmd = ["restore", backup_id] + if remapping_args: + restore_cmd = restore_cmd + remapping_args.split(" ") + return self.run_pbm_command(restore_cmd) + def set_pbm_config_file(self) -> None: """Sets the pbm config file.""" container = self.unit.get_container(Config.CONTAINER_NAME) diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py index 8a4bceb41..1cfe58edf 100644 --- a/tests/integration/backup_tests/test_backups.py +++ b/tests/integration/backup_tests/test_backups.py @@ -2,12 +2,14 @@ # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. import asyncio +import logging import secrets import string import time from pathlib import Path import pytest +import pytest_asyncio import yaml from pytest_operator.plugin import OpsTest from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed @@ -19,27 +21,54 @@ TIMEOUT = 15 * 60 ENDPOINT = "s3-credentials" NEW_CLUSTER = "new-mongodb" -DATABASE_APP_NAME = "mongodb-k8s" METADATA = yaml.safe_load(Path("./metadata.yaml").read_text()) +DATABASE_APP_NAME = METADATA["name"] +logger = logging.getLogger(__name__) -@pytest.fixture() + +# TODO this should be refactored to remove duplication +@pytest_asyncio.fixture async def continuous_writes_to_db(ops_test: OpsTest): """Continuously writes to DB for the duration of the test.""" - await ha_helpers.start_continous_writes(ops_test, 1) + application_name = await ha_helpers.get_application_name(ops_test, "application") + + application_unit = ops_test.model.applications[application_name].units[0] + + clear_writes_action = await application_unit.run_action("clear-continuous-writes") + await clear_writes_action.wait() + + start_writes_action = await application_unit.run_action("start-continuous-writes") + await start_writes_action.wait() + yield - await ha_helpers.stop_continous_writes(ops_test) - await ha_helpers.clear_db_writes(ops_test) + + clear_writes_action = await application_unit.run_action("clear-continuous-writes") + await clear_writes_action.wait() -@pytest.fixture() +@pytest_asyncio.fixture async def add_writes_to_db(ops_test: OpsTest): """Adds writes to DB before test starts and clears writes at the end of the test.""" - await ha_helpers.start_continous_writes(ops_test, 1) + application_name = await ha_helpers.get_application_name(ops_test, "application") + + application_unit = ops_test.model.applications[application_name].units[0] + + clear_writes_action = await application_unit.run_action("clear-continuous-writes") + await clear_writes_action.wait() + + start_writes_action = await application_unit.run_action("start-continuous-writes") + await start_writes_action.wait() + time.sleep(20) - await ha_helpers.stop_continous_writes(ops_test) + + stop_writes_action = await application_unit.run_action("stop-continuous-writes") + await stop_writes_action.wait() + yield - await ha_helpers.clear_db_writes(ops_test) + + clear_writes_action = await application_unit.run_action("clear-continuous-writes") + await clear_writes_action.wait() @pytest.mark.abort_on_fail @@ -60,6 +89,14 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: # deploy the s3 integrator charm await ops_test.model.deploy(S3_APP_NAME, channel="edge") + # test application + application_name = await ha_helpers.get_application_name(ops_test, "application") + if not application_name: + application_name = await ha_helpers.deploy_and_scale_application(ops_test) + + db_app_name = await ha_helpers.get_application_name(ops_test, DATABASE_APP_NAME) + await ha_helpers.relate_mongodb_and_application(ops_test, db_app_name, application_name) + await ops_test.model.wait_for_idle() @@ -132,7 +169,6 @@ async def test_ready_correct_conf(ops_test: OpsTest) -> None: ) -@pytest.mark.skip("Not implemented yet") @pytest.mark.abort_on_fail async def test_create_and_list_backups(ops_test: OpsTest) -> None: db_unit = await helpers.get_leader_unit(ops_test) @@ -141,7 +177,6 @@ async def test_create_and_list_backups(ops_test: OpsTest) -> None: action = await db_unit.run_action(action_name="list-backups") list_result = await action.wait() backups = list_result.results["backups"] - assert backups, "backups not outputted" # verify backup is started action = await db_unit.run_action(action_name="create-backup") @@ -161,7 +196,6 @@ async def test_create_and_list_backups(ops_test: OpsTest) -> None: assert backups == 1, "Backup not created." -@pytest.mark.skip("Not implemented yet") @pytest.mark.abort_on_fail async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: """With writes in the DB test creating a backup while another one is running. @@ -248,12 +282,11 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: assert backups == 2, "Backup not created in bucket on AWS." -@pytest.mark.skip("Not implemented yet") @pytest.mark.abort_on_fail -async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: +async def test_restore(ops_test: OpsTest, continuous_writes_to_db) -> None: """Simple backup tests that verifies that writes are correctly restored.""" # count total writes - number_writes = await ha_helpers.count_writes(ops_test) + number_writes = await ha_helpers.get_total_writes(ops_test) assert number_writes > 0, "no writes to backup" # create a backup in the AWS bucket @@ -275,8 +308,12 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: # add writes to be cleared after restoring the backup. Note these are written to the same # collection that was backed up. - await helpers.insert_unwanted_data(ops_test) - new_number_of_writes = await ha_helpers.count_writes(ops_test) + application_name = await ha_helpers.get_application_name(ops_test, "application") + application_unit = ops_test.model.applications[application_name].units[0] + start_writes_action = await application_unit.run_action("start-continuous-writes") + await start_writes_action.wait() + time.sleep(20) + new_number_of_writes = await ha_helpers.get_total_writes(ops_test) assert new_number_of_writes > number_writes, "No writes to be cleared after restoring." # find most recent backup id and restore @@ -293,6 +330,7 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) + number_writes_restored = number_writes # initialize extra write count # verify all writes are present try: for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): @@ -303,11 +341,13 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: assert number_writes == number_writes_restored, "writes not correctly restored" -@pytest.mark.skip("Not implemented yet") +# TODO remove unstable mark once juju issue with secrets is resolved +@pytest.mark.unstable @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) -async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): +async def test_restore_new_cluster(ops_test: OpsTest, continuous_writes_to_db, cloud_provider): # configure test for the cloud provider db_app_name = await helpers.app_name(ops_test) + leader_unit = await helpers.get_leader_unit(ops_test, db_app_name) await helpers.set_credentials(ops_test, cloud=cloud_provider) if cloud_provider == "AWS": configuration_parameters = { @@ -328,23 +368,38 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) - # create a backup - writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name) + # sleep to allow for writes to be made + time.sleep(30) + writes_in_old_cluster = await ha_helpers.get_total_writes(ops_test) assert writes_in_old_cluster > 0, "old cluster has no writes." - await helpers.create_and_verify_backup(ops_test) + + # create a backup + action = await leader_unit.run_action(action_name="create-backup") + latest_backup = await action.wait() + assert latest_backup.status == "completed", "Backup not started." # save old password, since after restoring we will need this password to authenticate. - old_password = await ha_helpers.get_password(ops_test, db_app_name) + + action = await leader_unit.run_action("get-password", **{"username": "operator"}) + action = await action.wait() + old_password = action.results["password"] + + # TODO remove this workaround once issue with juju secrets is fixed + NEW_CLUSTER = get_new_cluster_name(cloud_provider) # noqa: N806 # deploy a new cluster with a different name db_charm = await ops_test.build_charm(".") - await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) + resources = {"mongodb-image": METADATA["resources"]["mongodb-image"]["upstream-source"]} + await ops_test.model.deploy( + db_charm, num_units=3, resources=resources, application_name=NEW_CLUSTER + ) + await asyncio.gather( ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), ) - db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) - action = await db_unit.run_action("set-password", **{"password": old_password}) + leader_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) + action = await leader_unit.run_action("set-password", **{"password": old_password}) action = await action.wait() assert action.status == "completed" @@ -362,19 +417,21 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr # verify that the listed backups from the old cluster are not listed as failed. assert ( - await helpers.count_failed_backups(db_unit) == 0 + await helpers.count_failed_backups(leader_unit) == 0 ), "Backups from old cluster are listed as failed" # find most recent backup id and restore - action = await db_unit.run_action(action_name="list-backups") + action = await leader_unit.run_action(action_name="list-backups") list_result = await action.wait() list_result = list_result.results["backups"] most_recent_backup = list_result.split("\n")[-1] backup_id = most_recent_backup.split()[0] - action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) + action = await leader_unit.run_action(action_name="restore", **{"backup-id": backup_id}) restore = await action.wait() assert restore.results["restore-status"] == "restore started", "restore not successful" + # initialize with old values + writes_in_new_cluster = writes_in_old_cluster # verify all writes are present try: for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): @@ -388,15 +445,17 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr writes_in_new_cluster == writes_in_old_cluster ), "new cluster writes do not match old cluster writes after restore" - await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) + # TODO there is an issue with on stop and secrets that need to be resolved before + # we can cleanup the new cluster, otherwise the test will fail. + + # await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) -@pytest.mark.skip("Not implemented yet") @pytest.mark.abort_on_fail async def test_update_backup_password(ops_test: OpsTest) -> None: """Verifies that after changing the backup password the pbm tool is updated and functional.""" db_app_name = await helpers.app_name(ops_test) - db_unit = await helpers.get_leader_unit(ops_test) + leader_unit = await helpers.get_leader_unit(ops_test) # wait for charm to be idle before setting password await asyncio.gather( @@ -404,7 +463,7 @@ async def test_update_backup_password(ops_test: OpsTest) -> None: ) parameters = {"username": "backup"} - action = await db_unit.run_action("set-password", **parameters) + action = await leader_unit.run_action("set-password", **parameters) action = await action.wait() assert action.status == "completed", "failed to set backup password" @@ -414,6 +473,12 @@ async def test_update_backup_password(ops_test: OpsTest) -> None: ) # verify we still have connection to pbm via creating a backup - action = await db_unit.run_action(action_name="create-backup") + action = await leader_unit.run_action(action_name="create-backup") backup_result = await action.wait() assert backup_result.results["backup-status"] == "backup started", "backup didn't start" + + +# TODO remove this workaround once issue with juju secrets is fixed +def get_new_cluster_name(cloud_provider: str) -> str: + """Generates a new cluster name.""" + return f"{NEW_CLUSTER}-{cloud_provider.lower()}" diff --git a/tests/unit/data/pbm_status.txt b/tests/unit/data/pbm_status.txt new file mode 100644 index 000000000..2bab378e4 --- /dev/null +++ b/tests/unit/data/pbm_status.txt @@ -0,0 +1 @@ +{"backups":{"type":"S3","path":"s3://pbm-test-bucket-1/mongodb-vm/demo","region":"us-west-2","snapshot":[{"name":"2000-02-14T14:09:43Z","size":465194,"status":"done","restoreTo":1676383789,"pbmVersion":"2.0.2","type":"logical"},{"name":"1900-02-14T13:59:14Z","size":464540,"status":"error","error":"internet not invented yet","type":"physical"}],"pitrChunks":{"size":0}},"cluster":[{"rs":"mongodb","nodes":[{"host":"mongodb/10.158.125.189:27017","agent":"v2.0.2","role":"P","ok":true}]}],"pitr":{"conf":false,"run":false},"running":{"type":"backup","name":"2023-02-14T17:06:38Z","startTS":1676394399,"status":"snapshot backup","opID":"63ebbf9e0b3268788994c963"}} diff --git a/tests/unit/data/pbm_status_duplicate_running.txt b/tests/unit/data/pbm_status_duplicate_running.txt new file mode 100644 index 000000000..ef6f0829d --- /dev/null +++ b/tests/unit/data/pbm_status_duplicate_running.txt @@ -0,0 +1 @@ +{"backups":{"type":"S3","path":"s3://pbm-test-bucket-1/mongodb-vm/demo","region":"us-west-2","snapshot":[{"name":"2023-02-14T17:06:38Z","status":"error","error":"get file 2023-02-14T17:06:38Z/mongodb/local.oplog.rs.bson.s2: no such file","restoreTo":1676394401,"pbmVersion":"2.0.2","type":"logical"},{"name":"2000-02-14T14:09:43Z","size":465194,"status":"done","restoreTo":1676383789,"pbmVersion":"2.0.2","type":"logical"},{"name":"1900-02-14T13:59:14Z","size":464540,"status":"error","error":"internet not invented yet","type":"physical"}],"pitrChunks":{"size":0}},"cluster":[{"rs":"mongodb","nodes":[{"host":"mongodb/10.158.125.189:27017","agent":"v2.0.2","role":"P","ok":true}]}],"pitr":{"conf":false,"run":false},"running":{"type":"backup","name":"2023-02-14T17:06:38Z","startTS":1676394399,"status":"snapshot backup","opID":"63ebbf9e0b3268788994c963"}} diff --git a/tests/unit/data/pbm_status_error_remap.txt b/tests/unit/data/pbm_status_error_remap.txt new file mode 100644 index 000000000..597e55ac9 --- /dev/null +++ b/tests/unit/data/pbm_status_error_remap.txt @@ -0,0 +1 @@ +{"backups":{"type":"S3","path":"s3://pbm-test-bucket-1/mongodb-vm/demo","region":"us-west-2","snapshot":[{"name":"2000-02-14T14:09:43Z","size":465194,"status":"done","restoreTo":1676383789,"pbmVersion":"2.0.2","type":"logical"},{"name":"1900-02-14T13:59:14Z","size":464540,"status":"error","error":"internet not invented yet","type":"physical"},{"name":"2001-02-14T13:59:14Z","size":464540,"status":"error","error":"Backup doesn't match current cluster topology - it has different replica set names. Extra shards in the backup will cause this, for a simple example. The extra/unknown replica set names found in the backup are: mongodb.THIS IS AN ERROR MESSAGE. Backup has no data for the config server or sole replicaset","type":"physical"},{"name":"2002-02-14T13:59:14Z","size":464540,"status":"error","error":"Backup doesn't match current cluster topology - it has different replica set names. Extra shards in the backup will cause this, for a simple example. The extra/unknown replica set names found in the backup are: old-cluster-name. Backup has no data for the config server or sole replicaset","type":"physical"}],"pitrChunks":{"size":0}},"cluster":[{"rs":"mongodb","nodes":[{"host":"mongodb/10.158.125.189:27017","agent":"v2.0.2","role":"P","ok":true}]}],"pitr":{"conf":false,"run":false},"running":{"type":"backup","name":"2023-02-14T17:06:38Z","startTS":1676394399,"status":"snapshot backup","opID":"63ebbf9e0b3268788994c963"}} \ No newline at end of file diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 27918aa0c..77df3db13 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -8,7 +8,7 @@ import pytest from charms.mongodb.v0.helpers import CONF_DIR, DATA_DIR, KEY_FILE -from ops.model import ModelError +from ops.model import ActiveStatus, MaintenanceStatus, ModelError from ops.pebble import APIError, ExecError, PathError, ProtocolError from ops.testing import Harness from parameterized import parameterized @@ -745,15 +745,20 @@ def test_connect_to_mongo_exporter_on_set_password(self, connect_exporter, conne self.harness.charm._on_set_password(action_event) connect_exporter.assert_called() + @patch("charm.MongoDBBackups._get_pbm_status") + @patch("charm.MongoDBCharm.get_backup_service") @patch("charm.MongoDBConnection") @patch("charm.MongoDBCharm._connect_mongodb_exporter") - def test_event_set_password_secrets(self, connect_exporter, connection): + def test_event_set_password_secrets( + self, connect_exporter, connection, get_backup_service, get_pbm_status + ): """Test _connect_mongodb_exporter is called when the password is set for 'montior' user. Furthermore: in Juju 3.x we want to use secrets """ pw = "bla" - + get_backup_service.return_value = "pbm" + get_pbm_status.return_value = ActiveStatus() self.harness.set_leader(True) action_event = mock.Mock() @@ -772,15 +777,19 @@ def test_event_set_password_secrets(self, connect_exporter, connection): assert "password" in args_pw assert args_pw["password"] == pw + @patch("charm.MongoDBBackups._get_pbm_status") + @patch("charm.MongoDBCharm.get_backup_service") @patch("charm.MongoDBConnection") @patch("charm.MongoDBCharm._connect_mongodb_exporter") def test_event_auto_reset_password_secrets_when_no_pw_value_shipped( - self, connect_exporter, connection + self, connect_exporter, connection, get_backup_service, get_pbm_status ): """Test _connect_mongodb_exporter is called when the password is set for 'montior' user. Furthermore: in Juju 3.x we want to use secrets """ + get_backup_service.return_value = "pbm" + get_pbm_status.return_value = ActiveStatus() self._setup_secrets() self.harness.set_leader(True) @@ -916,3 +925,22 @@ def test_set_password_provided(self, connection): # verify app data is updated and results are reported to user self.assertEqual("canonical123", new_password) + + @patch_network_get(private_address="1.1.1.1") + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBBackups._get_pbm_status") + def test_set_backup_password_pbm_busy(self, pbm_status, get_backup_service): + """Tests changes to passwords fail when pbm is restoring/backing up.""" + self.harness.set_leader(True) + original_password = "pass123" + action_event = mock.Mock() + get_backup_service.return_value = "pbm" + + for username in ["backup", "monitor", "operator"]: + self.harness.charm.app_peer_data[f"{username}-password"] = original_password + action_event.params = {"username": username} + pbm_status.return_value = MaintenanceStatus("pbm") + self.harness.charm._on_set_password(action_event) + current_password = self.harness.charm.app_peer_data[f"{username}-password"] + action_event.fail.assert_called() + self.assertEqual(current_password, original_password) diff --git a/tests/unit/test_mongodb_backups.py b/tests/unit/test_mongodb_backups.py index 5f6d5f1b9..ec8869996 100644 --- a/tests/unit/test_mongodb_backups.py +++ b/tests/unit/test_mongodb_backups.py @@ -1,11 +1,11 @@ # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. import unittest -from subprocess import CalledProcessError from unittest import mock from unittest.mock import patch import tenacity +from charms.mongodb.v0.helpers import current_pbm_op from charms.mongodb.v0.mongodb_backups import ( PBMBusyError, ResyncError, @@ -396,27 +396,28 @@ def test_s3_credentials_pbm_error( defer.assert_not_called() self.assertTrue(isinstance(self.harness.charm.unit.status, BlockedStatus)) - @unittest.skip("Not implemented yet") + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") @patch("charm.MongoDBBackups._get_pbm_status") - @patch("charm.snap.SnapCache") - def test_backup_failed(self, snap, pbm_status, output): + def test_backup_failed(self, pbm_status, pbm_command, service): """Verifies backup is fails if the pbm command failed.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} + container = self.harness.model.unit.get_container("mongod") + self.harness.set_can_connect(container, True) + service.return_value = "pbm" + + pbm_command.side_effect = ExecError( + command=["/usr/bin/pbm", "status"], exit_code=1, stdout="status code: 42", stderr="" + ) action_event = mock.Mock() action_event.params = {} pbm_status.return_value = ActiveStatus("") - output.side_effect = CalledProcessError(cmd="charmed-mongodb.pbm backup", returncode=42) - self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_create_backup_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") def test_backup_list_without_rel(self): """Verifies no backup lists are attempted without s3 relation.""" action_event = mock.Mock() @@ -425,63 +426,65 @@ def test_backup_list_without_rel(self): self.harness.charm.backups._on_list_backups_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_backup_list_syncing(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_backup_list_syncing(self, pbm_command, service): """Verifies backup list is deferred if more time is needed to resync.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} + container = self.harness.model.unit.get_container("mongod") + self.harness.set_can_connect(container, True) + service.return_value = "pbm" action_event = mock.Mock() action_event.params = {} - output.return_value = b"Currently running:\n====\nResync op" + + pbm_command.return_value = "Currently running:\n====\nResync op" self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_list_backups_action(action_event) action_event.defer.assert_called() - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_backup_list_wrong_cred(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_backup_list_wrong_cred(self, pbm_command, service): """Verifies backup list fails with wrong credentials.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {} - output.side_effect = CalledProcessError( - cmd="charmed-mongodb.pbm status", returncode=403, output=b"status code: 403" + + container = self.harness.model.unit.get_container("mongod") + self.harness.set_can_connect(container, True) + service.return_value = "pbm" + pbm_command.side_effect = ExecError( + command=["/usr/bin/pbm", "status"], exit_code=1, stdout="status code: 403", stderr="" ) self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_list_backups_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") @patch("charm.MongoDBBackups._get_pbm_status") - @patch("charm.snap.SnapCache") - def test_backup_list_failed(self, snap, pbm_status, output): + def test_backup_list_failed(self, pbm_status, pbm_command, service): """Verifies backup list fails if the pbm command fails.""" mock_pbm_snap = mock.Mock() mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} action_event = mock.Mock() action_event.params = {} pbm_status.return_value = ActiveStatus("") - output.side_effect = CalledProcessError(cmd="charmed-mongodb.pbm list", returncode=42) + pbm_command.side_effect = ExecError( + command=["/usr/bin/pbm", "list"], exit_code=1, stdout="status code: 403", stderr="" + ) self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_list_backups_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") - def test_generate_backup_list_output(self, check_output): + @patch("charm.MongoDBCharm.run_pbm_command") + def test_generate_backup_list_output(self, run_pbm_command): """Tests correct formation of backup list output. Specifically the spacing of the backups, the header, the backup order, and the backup @@ -492,7 +495,7 @@ def test_generate_backup_list_output(self, check_output): output_contents = f.readlines() output_contents = "".join(output_contents) - check_output.return_value = output_contents.encode("utf-8") + run_pbm_command.return_value = output_contents.encode("utf-8") formatted_output = self.harness.charm.backups._generate_backup_list_output() formatted_output = formatted_output.split("\n") header = formatted_output[0] @@ -514,7 +517,7 @@ def test_generate_backup_list_output(self, check_output): output_contents = f.readlines() output_contents = "".join(output_contents) - check_output.return_value = output_contents.encode("utf-8") + run_pbm_command.return_value = output_contents.encode("utf-8") formatted_output = self.harness.charm.backups._generate_backup_list_output() formatted_output = formatted_output.split("\n") header = formatted_output[0] @@ -533,7 +536,6 @@ def test_generate_backup_list_output(self, check_output): inprogress_backup = formatted_output[4] self.assertEqual(inprogress_backup, "2023-02-14T17:06:38Z | logical | in progress") - @unittest.skip("Not implemented yet") def test_restore_without_rel(self): """Verifies no restores are attempted without s3 relation.""" action_event = mock.Mock() @@ -542,73 +544,63 @@ def test_restore_without_rel(self): self.harness.charm.backups._on_restore_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_restore_syncing(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_restore_syncing(self, pbm_command, service): """Verifies restore is deferred if more time is needed to resync.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {"backup-id": "back-me-up"} - output.return_value = b"Currently running:\n====\nResync op" + service.return_value = "pbm" + pbm_command.return_value = "Currently running:\n====\nResync op" self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_restore_action(action_event) action_event.defer.assert_called() - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_restore_running_backup(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_restore_running_backup(self, pbm_command, service): """Verifies restore is fails if another backup is already running.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {"backup-id": "back-me-up"} - output.return_value = b"Currently running:\n====\nSnapshot backup" - + service.return_value = "pbm" + pbm_command.return_value = "Currently running:\n====\nSnapshot backup" self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_restore_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_restore_wrong_cred(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + @patch("charm.MongoDBBackups._get_pbm_status") + def test_restore_wrong_cred(self, pbm_status, pbm_command, service): """Verifies restore is fails if the credentials are incorrect.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {"backup-id": "back-me-up"} - output.side_effect = CalledProcessError( - cmd="charmed-mongodb.pbm status", returncode=403, output=b"status code: 403" + action_event = mock.Mock() + action_event.params = {} + pbm_status.return_value = ActiveStatus("") + + pbm_command.side_effect = ExecError( + command=["/usr/bin/pbm", "list"], exit_code=1, stdout="status code: 403", stderr="" ) self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_restore_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") @patch("charm.MongoDBBackups._get_pbm_status") - @patch("charm.snap.SnapCache") - def test_restore_failed(self, snap, pbm_status, output): + def test_restore_failed(self, pbm_status, pbm_command, service): """Verifies restore is fails if the pbm command failed.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {"backup-id": "back-me-up"} pbm_status.return_value = ActiveStatus("") - output.side_effect = CalledProcessError( - cmd="charmed-mongodb.pbm backup", returncode=42, output=b"failed" + pbm_command.side_effect = ExecError( + command=["/usr/bin/pbm", "list"], exit_code=1, stdout="failed", stderr="" ) self.harness.add_relation(RELATION_NAME, "s3-integrator") @@ -616,25 +608,25 @@ def test_restore_failed(self, snap, pbm_status, output): action_event.fail.assert_called() - @unittest.skip("Not implemented yet") - def test_remap_replicaset_no_backup(self, check_output): + @patch("charm.MongoDBCharm.run_pbm_command") + def test_remap_replicaset_no_backup(self, run_pbm_command): """Test verifies that no remapping is given if the backup_id doesn't exist.""" with open("tests/unit/data/pbm_status.txt") as f: output_contents = f.readlines() output_contents = "".join(output_contents) - check_output.return_value = output_contents.encode("utf-8") + run_pbm_command.return_value = output_contents.encode("utf-8") remap = self.harness.charm.backups._remap_replicaset("this-id-doesnt-exist") self.assertEqual(remap, "") - @unittest.skip("Not implemented yet") - def test_remap_replicaset_no_remap_necessary(self, check_output): + @patch("charm.MongoDBCharm.run_pbm_command") + def test_remap_replicaset_no_remap_necessary(self, run_pbm_command): """Test verifies that no remapping is given if no remapping is necessary.""" with open("tests/unit/data/pbm_status_error_remap.txt") as f: output_contents = f.readlines() output_contents = "".join(output_contents) - check_output.return_value = output_contents.encode("utf-8") + run_pbm_command.return_value = output_contents.encode("utf-8") # first case is that the backup is not in the error state remap = self.harness.charm.backups._remap_replicaset("2000-02-14T14:09:43Z") @@ -649,86 +641,76 @@ def test_remap_replicaset_no_remap_necessary(self, check_output): remap = self.harness.charm.backups._remap_replicaset("2001-02-14T13:59:14Z") self.assertEqual(remap, "") - @unittest.skip("Not implemented yet") - def test_remap_replicaset_remap_necessary(self, check_output): + @patch("charm.MongoDBCharm.run_pbm_command") + def test_remap_replicaset_remap_necessary(self, run_pbm_command): """Test verifies that remapping is provided and correct when necessary.""" with open("tests/unit/data/pbm_status_error_remap.txt") as f: output_contents = f.readlines() output_contents = "".join(output_contents) - check_output.return_value = output_contents.encode("utf-8") + run_pbm_command.return_value = output_contents.encode("utf-8") self.harness.charm.app.name = "current-app-name" # first case is that the backup is not in the error state remap = self.harness.charm.backups._remap_replicaset("2002-02-14T13:59:14Z") self.assertEqual(remap, "--replset-remapping current-app-name=old-cluster-name") - @unittest.skip("Not implemented yet") - @patch("charm.snap.SnapCache") - def test_get_pbm_status_backup(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_get_pbm_status_backup(self, run_pbm_command, service): """Tests that when pbm running a backup that pbm is in maintenance state.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - output.return_value = b"Currently running:\n====\nSnapshot backup" + service.return_value = "pbm" + run_pbm_command.return_value = "Currently running:\n====\nSnapshot backup" self.assertTrue( isinstance(self.harness.charm.backups._get_pbm_status(), MaintenanceStatus) ) - @unittest.skip("Not implemented yet") def test_current_pbm_op(self): """Test if _current_pbm_op can identify the operation pbm is running.""" - action = self.harness.charm.backups._current_pbm_op( - "nothing\nCurrently running:\n====\nexpected action" - ) + action = current_pbm_op("nothing\nCurrently running:\n====\nexpected action") self.assertEqual(action, "expected action") - no_action = self.harness.charm.backups._current_pbm_op("pbm not started") + no_action = current_pbm_op("pbm not started") self.assertEqual(no_action, "") - @unittest.skip("Not implemented yet.") - @patch("charm.snap.SnapCache") - def test_backup_syncing(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_backup_syncing(self, run_pbm_command, service): """Verifies backup is deferred if more time is needed to resync.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {} - output.return_value = b"Currently running:\n====\nResync op" + service.return_value = "pbm" + run_pbm_command.return_value = "Currently running:\n====\nResync op" self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_create_backup_action(action_event) action_event.defer.assert_called() - @unittest.skip("Not implemented yet.") - @patch("charm.snap.SnapCache") - def test_backup_running_backup(self, snap, output): + @patch("charm.MongoDBCharm.get_backup_service") + @patch("charm.MongoDBCharm.run_pbm_command") + def test_backup_running_backup(self, run_pbm_command, service): """Verifies backup is fails if another backup is already running.""" - mock_pbm_snap = mock.Mock() - mock_pbm_snap.present = True - snap.return_value = {"charmed-mongodb": mock_pbm_snap} - action_event = mock.Mock() action_event.params = {} - output.return_value = b"Currently running:\n====\nSnapshot backup" + service.return_value = "pbm" + run_pbm_command.return_value = "Currently running:\n====\nSnapshot backup" self.harness.add_relation(RELATION_NAME, "s3-integrator") self.harness.charm.backups._on_create_backup_action(action_event) action_event.fail.assert_called() - @unittest.skip("Not implemented yet") + @patch("charm.MongoDBCharm.get_backup_service") @patch("charm.MongoDBCharm.run_pbm_command") - def test_backup_wrong_cred(self, output): + def test_backup_wrong_cred(self, run_pbm_command, service): """Verifies backup is fails if the credentials are incorrect.""" container = self.harness.model.unit.get_container("mongod") self.harness.set_can_connect(container, True) action_event = mock.Mock() action_event.params = {} - output.side_effect = ExecError( + service.return_value = "pbm" + run_pbm_command.side_effect = ExecError( command=["/usr/bin/pbm config --set this_key=doesnt_exist"], exit_code=403, stdout="status code: 403", diff --git a/tox.ini b/tox.ini index 299189109..4ebe2b0db 100644 --- a/tox.ini +++ b/tox.ini @@ -140,7 +140,7 @@ pass_env = CI_PACKED_CHARMS commands_pre = poetry install --with integration - poetry run pip install juju==2.9.42.1 + poetry run pip install juju==3.2.0.1 commands = poetry run pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/backup_tests/test_backups.py