Skip to content

Commit

Permalink
[DPE-5237] implement revision check for sharding components (#331)
Browse files Browse the repository at this point in the history
* update deps for revision checker

* copy over infra from vm

* adding infra for local charm revision

* update set status lib

* add integration tests

* fmt + lint

* update unit tests

* remove unused bits

* Apply suggestions from code review

Co-authored-by: Mehdi Bendriss <bendrissmehdi@gmail.com>

---------

Co-authored-by: Mehdi Bendriss <bendrissmehdi@gmail.com>
  • Loading branch information
MiaAltieri and Mehdi-Bendriss authored Sep 13, 2024
1 parent aa27779 commit b2aa4b2
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 130 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ share/

/requirements.txt
/requirements-last-build.txt
/charm_internal_version
5 changes: 5 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ parts:
override-build: |
rustup default stable
craftctl default
files:
plugin: dump
source: .
prime:
- charm_internal_version
bases:
- build-on:
- name: "ubuntu"
Expand Down
51 changes: 47 additions & 4 deletions lib/charms/mongodb/v0/set_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Optional, Tuple

from charms.mongodb.v1.mongodb import MongoConfiguration, MongoDBConnection
from data_platform_helpers.version_check import NoVersionError, get_charm_revision
from ops.charm import CharmBase
from ops.framework import Object
from ops.model import ActiveStatus, BlockedStatus, StatusBase, WaitingStatus
Expand All @@ -22,7 +23,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 3
LIBPATCH = 4

AUTH_FAILED_CODE = 18
UNAUTHORISED_CODE = 13
Expand Down Expand Up @@ -88,7 +89,7 @@ def is_status_related_to_mismatched_revision(self, status_type: str) -> bool:
"goal state" which processes data differently and the other via the ".status" property.
Hence we have to be flexible to handle each.
"""
if not self.charm.get_cluster_mismatched_revision_status():
if not self.get_cluster_mismatched_revision_status():
return False

if "waiting" in status_type and self.charm.is_role(Config.Role.CONFIG_SERVER):
Expand Down Expand Up @@ -153,7 +154,7 @@ def is_unit_status_ready_for_upgrade(self) -> bool:
if isinstance(current_status, ActiveStatus):
return True

if not isinstance(current_status, WaitingStatus):
if not isinstance(current_status, BlockedStatus):
return False

if status_message and "is not up-to date with config-server" in status_message:
Expand Down Expand Up @@ -235,7 +236,49 @@ def get_invalid_integration_status(self) -> Optional[StatusBase]:
"Relation to s3-integrator is not supported, config role must be config-server"
)

return self.charm.get_cluster_mismatched_revision_status()
return self.get_cluster_mismatched_revision_status()

def get_cluster_mismatched_revision_status(self) -> Optional[StatusBase]:
"""Returns a Status if the cluster has mismatched revisions."""
# check for invalid versions in sharding integrations, i.e. a shard running on
# revision 88 and a config-server running on revision 110
current_charms_version = get_charm_revision(
self.charm.unit, local_version=self.charm.get_charm_internal_revision
)
local_identifier = (
"-locally built"
if self.charm.version_checker.is_local_charm(self.charm.app.name)
else ""
)
try:
if self.charm.version_checker.are_related_apps_valid():
return
except NoVersionError as e:
# relations to shards/config-server are expected to provide a version number. If they
# do not, it is because they are from an earlier charm revision, i.e. pre-revison X.
logger.debug(e)
if self.charm.is_role(Config.Role.SHARD):
return BlockedStatus(
f"Charm revision ({current_charms_version}{local_identifier}) is not up-to date with config-server."
)

if self.charm.is_role(Config.Role.SHARD):
config_server_revision = self.charm.version_checker.get_version_of_related_app(
self.get_config_server_name()
)
remote_local_identifier = (
"-locally built"
if self.charm.version_checker.is_local_charm(self.get_config_server_name())
else ""
)
return BlockedStatus(
f"Charm revision ({current_charms_version}{local_identifier}) is not up-to date with config-server ({config_server_revision}{remote_local_identifier})."
)

if self.charm.is_role(Config.Role.CONFIG_SERVER):
return WaitingStatus(
f"Waiting for shards to upgrade/downgrade to revision {current_charms_version}{local_identifier}."
)


def build_unit_status(mongodb_config: MongoConfiguration, unit_host: str) -> StatusBase:
Expand Down
191 changes: 96 additions & 95 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ tenacity = "^8.2.3"
pyyaml = "^6.0.1"
jinja2 = "^3.1.3"
poetry-core = "^1.9.0"
data-platform-helpers = "^0.1.2"
data-platform-helpers = "^0.1.3"
pyOpenSSL = "^24.2.1"
setuptools = "^72.0.0"

Expand Down Expand Up @@ -72,7 +72,7 @@ juju = "^3.5.0"
pytest = "^8.1.1"
pytest-asyncio = "^0.21.1"
pytest-mock = "^3.14.0"
pytest-operator = "^0.34.0"
pytest-operator = "^0.36.0"
pytest-operator-cache = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/pytest_operator_cache"}
pytest-operator-groups = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/pytest_operator_groups"}
pytest-github-secrets = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/github_secrets"}
Expand Down
29 changes: 20 additions & 9 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
OperatorUser,
)
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
from data_platform_helpers.version_check import (
CrossAppVersionChecker,
get_charm_revision,
)
from ops.charm import (
ActionEvent,
CharmBase,
Expand All @@ -56,7 +60,6 @@
ModelError,
Relation,
RelationDataContent,
StatusBase,
Unit,
WaitingStatus,
)
Expand Down Expand Up @@ -140,6 +143,15 @@ def __init__(self, *args):
self.config_server = ShardingProvider(self)
self.cluster = ClusterProvider(self)

self.version_checker = CrossAppVersionChecker(
self,
version=get_charm_revision(self.unit, local_version=self.get_charm_internal_revision),
relations_to_check=[
Config.Relations.SHARDING_RELATIONS_NAME,
Config.Relations.CONFIG_SERVER_RELATIONS_NAME,
],
)

# BEGIN: properties

@property
Expand Down Expand Up @@ -470,16 +482,15 @@ def primary(self) -> str | None:

return None

@property
def get_charm_internal_revision(self) -> str:
"""Returns the contents of the get_charm_internal_revision file."""
with open(Config.CHARM_INTERNAL_VERSION_FILE, "r") as f:
return f.read().strip()

# END: properties

# BEGIN: generic helper methods
def get_cluster_mismatched_revision_status(self) -> Optional[StatusBase]:
"""Returns a Status if the cluster has mismatched revisions.
TODO implement this method as a part of sharding upgrades.
"""
return None

def remote_mongos_config(self, hosts) -> MongoConfiguration:
"""Generates a MongoConfiguration object for mongos in the deployment of MongoDB."""
# mongos that are part of the cluster have the same username and password, but different
Expand Down Expand Up @@ -1568,7 +1579,7 @@ def is_relation_feasible(self, rel_interface: str) -> bool:
)
return False

if revision_mismatch_status := self.get_cluster_mismatched_revision_status():
if revision_mismatch_status := self.status.get_cluster_mismatched_revision_status():
self.status.set_and_share_status(revision_mismatch_status)
return False

Expand Down
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Config:
MONGOD_CONF_DIR = "/etc/mongod"
MONGODB_LOG_FILENAME = "mongodb.log"

CHARM_INTERNAL_VERSION_FILE = "charm_internal_version"
LICENSE_PATH = "/licenses/LICENSE"
CONTAINER_NAME = "mongod"
SERVICE_NAME = "mongod"
Expand Down
14 changes: 0 additions & 14 deletions src/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,26 @@
class MongoError(Exception):
"""Common parent for Mongo errors, allowing to catch them all at once."""

pass


class AdminUserCreationError(MongoError):
"""Raised when a commands to create an admin user on MongoDB fail."""

pass


class ApplicationHostNotFoundError(MongoError):
"""Raised when a queried host is not in the application peers or the current host."""

pass


class MongoSecretError(MongoError):
"""Common parent for all Mongo Secret Exceptions."""

pass


class SecretNotAddedError(MongoSecretError):
"""Raised when a Juju 3 secret couldn't be set or re-set."""

pass


class MissingSecretError(MongoSecretError):
"""Could be raised when a Juju 3 mandatory secret couldn't be found."""

pass


class SecretAlreadyExistsError(MongoSecretError):
"""A secret that we want to create already exists."""

pass
103 changes: 103 additions & 0 deletions tests/integration/upgrades/test_revision_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
import pytest
from pytest_operator.plugin import OpsTest

from ..helpers import METADATA, wait_for_mongodb_units_blocked

MONGODB_K8S_CHARM = "mongodb-k8s"
SHARD_REL_NAME = "sharding"
CONFIG_SERVER_REL_NAME = "config-server"

LOCAL_SHARD_APP_NAME = "local-shard"
REMOTE_SHARD_APP_NAME = "remote-shard"
LOCAL_CONFIG_SERVER_APP_NAME = "local-config-server"
REMOTE_CONFIG_SERVER_APP_NAME = "remote-config-server"

CLUSTER_COMPONENTS = [
LOCAL_SHARD_APP_NAME,
REMOTE_SHARD_APP_NAME,
LOCAL_CONFIG_SERVER_APP_NAME,
REMOTE_CONFIG_SERVER_APP_NAME,
]


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_build_and_deploy(ops_test: OpsTest) -> None:
my_charm = await ops_test.build_charm(".")
resources = {"mongodb-image": METADATA["resources"]["mongodb-image"]["upstream-source"]}

await ops_test.model.deploy(
MONGODB_K8S_CHARM,
application_name=REMOTE_SHARD_APP_NAME,
config={"role": "shard"},
channel="edge",
)

await ops_test.model.deploy(
MONGODB_K8S_CHARM,
application_name=REMOTE_CONFIG_SERVER_APP_NAME,
config={"role": "config-server"},
channel="edge",
)
await ops_test.model.deploy(
my_charm,
resources=resources,
config={"role": "config-server"},
application_name=LOCAL_CONFIG_SERVER_APP_NAME,
)
await ops_test.model.deploy(
my_charm,
resources=resources,
config={"role": "shard"},
application_name=LOCAL_SHARD_APP_NAME,
)

await ops_test.model.wait_for_idle(apps=CLUSTER_COMPONENTS, idle_period=20)


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_local_config_server_reports_remote_shard(ops_test: OpsTest) -> None:
"""Tests that the local config server reports remote shard."""
await ops_test.model.integrate(
f"{REMOTE_SHARD_APP_NAME}:{SHARD_REL_NAME}",
f"{LOCAL_CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[LOCAL_CONFIG_SERVER_APP_NAME],
status="waiting",
raise_on_blocked=False,
idle_period=20,
)

config_server_unit = ops_test.model.applications[LOCAL_CONFIG_SERVER_APP_NAME].units[0]

assert (
"Waiting for shards to upgrade/downgrade to revision"
in config_server_unit.workload_status_message
), "Config server does not correctly report mismatch in revision"


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_local_shard_reports_remote_config_server(ops_test: OpsTest) -> None:
"""Tests that the local shard reports remote config-server."""
await ops_test.model.integrate(
f"{LOCAL_SHARD_APP_NAME}:{SHARD_REL_NAME}",
f"{REMOTE_CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
)

await wait_for_mongodb_units_blocked(
ops_test,
LOCAL_SHARD_APP_NAME,
timeout=300,
)

shard_unit = ops_test.model.applications[LOCAL_SHARD_APP_NAME].units[0]
assert (
"is not up-to date with config-server." in shard_unit.workload_status_message
), "Shard does not correctly report mismatch in revision"
7 changes: 5 additions & 2 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@


class TestCharm(unittest.TestCase):
@patch("charm.get_charm_revision")
@patch_network_get(private_address="1.1.1.1")
def setUp(self):
def setUp(self, *unused):
self.maxDiff = None
self.harness = Harness(MongoDBCharm)
mongo_resource = {
Expand Down Expand Up @@ -583,9 +584,11 @@ def test_reconfigure_remove_member_failure(self, connection, defer):
connection.return_value.__enter__.return_value.remove_replset_member.assert_called()
defer.assert_called()

@patch("charms.mongodb.v0.set_status.get_charm_revision")
@patch("charm.CrossAppVersionChecker.is_local_charm")
@patch("ops.framework.EventBase.defer")
@patch("charm.MongoDBConnection")
def test_reconfigure_peer_not_ready(self, connection, defer):
def test_reconfigure_peer_not_ready(self, connection, defer, *unused):
"""Tests reconfigure does not proceed when the adding member is not ready.
Verifies in relation joined events, that when the adding member is not ready that the event
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_mongodb_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@


class TestMongoBackups(unittest.TestCase):
@patch("charm.get_charm_revision")
@patch_network_get(private_address="1.1.1.1")
def setUp(self):
def setUp(self, *unused):
self.harness = Harness(MongoDBCharm)
self.harness.begin()
self.harness.add_relation("database-peers", "database-peers")
Expand Down
Loading

0 comments on commit b2aa4b2

Please sign in to comment.