Skip to content

Commit

Permalink
[DPE-2414] Storage reuse (#194)
Browse files Browse the repository at this point in the history
* storage reuse tests WIP

* updating tests

* tests passing

* format + lint

* cleaning files

* PR comments

* resolving enrico comments
  • Loading branch information
MiaAltieri authored Aug 31, 2023
1 parent b3fed03 commit 547ed0c
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 0 deletions.
55 changes: 55 additions & 0 deletions tests/integration/ha_tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import subprocess
import tarfile
import tempfile
import time
from asyncio import gather
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -734,6 +735,60 @@ async def update_pebble_plans(ops_test: OpsTest, override: Dict[str, str]) -> No
assert ret_code == 0, f"Failed to replan for unit {unit.name}"


async def reused_storage(ops_test: OpsTest, reused_unit: Unit, removal_time: datetime) -> None:
"""Verifies storage is reused by the mongo daemon.
MongoDB startup message indicates storage reuse:
If member transitions to STARTUP2 from STARTUP then it is syncing/getting data from
primary.
If member transitions to STARTUP2 from REMOVED then it is re-using the storage we
provided.
"""
cat_cmd = [
"ssh",
"--container",
MONGODB_CONTAINER_NAME,
reused_unit.name,
"cat /var/lib/mongodb/mongodb.log",
]

return_code, logs, _ = await ops_test.juju(*cat_cmd)

assert (
return_code == 0
), f"Failed catting mongodb logs, unit={reused_unit.name}, container={MONGODB_CONTAINER_NAME}"

filtered_logs = filter(filter_logs, logs.split("\n"))

for log in filtered_logs:
item = json.loads(log)
reuse_time = convert_time(item["t"]["$date"])
if reuse_time > removal_time:
return True

return False


def filter_logs(log):
return True if '"newState":"STARTUP2","oldState":"REMOVED"' in log else False


def convert_time(time_as_str: str) -> int:
"""Converts a string time representation to an integer time representation."""
# parse time representation, provided in this format: 'YYYY-MM-DDTHH:MM:SS.MMM+00:00'
d = datetime.strptime(time_as_str, "%Y-%m-%dT%H:%M:%S.%f%z")
return time.mktime(d.timetuple())


def get_highest_unit(ops_test: OpsTest, app_name: str) -> Unit:
"""Retrieves the most recently added unit to the MongoDB application."""
num_units = len(ops_test.model.applications[app_name].units)
highest_unit_name = f"mongodb-k8s/{num_units-1}"
for unit in ops_test.model.applications[app_name].units:
if unit.name == highest_unit_name:
return unit


async def are_all_db_processes_down(ops_test: OpsTest, process: str) -> bool:
"""Verifies that all units of the charm do not have the DB process running."""
app = await get_application_name(ops_test, APP_NAME)
Expand Down
49 changes: 49 additions & 0 deletions tests/integration/ha_tests/test_ha.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
find_record_in_collection,
find_unit,
get_application_name,
get_highest_unit,
get_mongo_client,
get_other_mongodb_direct_client,
get_process_pid,
Expand All @@ -42,6 +43,7 @@
remove_instance_isolation,
retrieve_current_mongod_command,
retrieve_entries,
reused_storage,
scale_application,
send_signal_to_pod_container_process,
set_log_level,
Expand Down Expand Up @@ -586,3 +588,50 @@ async def test_network_cut(ops_test: OpsTest, continuous_writes, chaos_mesh):

# verify that old primary is up to date.
await verify_writes(ops_test)


async def test_storage_re_use(ops_test, continuous_writes):
"""Verifies that database units with attached storage correctly repurpose storage.
It is not enough to verify that Juju attaches the storage. Hence test checks that the mongod
properly uses the storage that was provided. (ie. doesn't just re-sync everything from
primary, but instead computes a diff between current storage and primary storage.)
"""
app = await get_application_name(ops_test, APP_NAME)

# removing the only replica can be disastrous
if len(ops_test.model.applications[app].units) < 2:
await ops_test.model.applications[app].add_unit(count=1)
await ops_test.model.wait_for_idle(apps=[app], status="active", timeout=1000)

# remove a unit and attach it's storage to a new unit
current_number_units = len(ops_test.model.applications[app].units)
await scale_application(ops_test, app, current_number_units - 1)
await ops_test.model.wait_for_idle(
apps=[app], status="active", timeout=1000, wait_for_exact_units=(current_number_units - 1)
)

# k8s will automatically use the old storage from the storage pool
removal_time = datetime.now(timezone.utc).timestamp()
await scale_application(ops_test, app, current_number_units)
await ops_test.model.wait_for_idle(
apps=[app], status="active", timeout=1000, wait_for_exact_units=(current_number_units)
)

# for this test, we only scaled up the application by one unit. So it the highest unit will be
# the newest unit.
new_unit = get_highest_unit(ops_test, app)
assert await reused_storage(
ops_test, new_unit, removal_time
), "attached storage not properly re-used by MongoDB."

# verify presence of primary, replica set member configuration, and number of primaries
hostnames = await get_units_hostnames(ops_test)
member_hosts = await fetch_replica_set_members(ops_test)
assert set(member_hosts) == set(hostnames)
assert (
await count_primaries(ops_test) == 1
), "there is more than one primary in the replica set."

# verify all units are up to date.
await verify_writes(ops_test)

0 comments on commit 547ed0c

Please sign in to comment.