Skip to content

Commit

Permalink
add cos integration tests for alert rules
Browse files Browse the repository at this point in the history
  • Loading branch information
rgildein committed Aug 20, 2024
1 parent 367e9e7 commit 28b19bc
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 95 deletions.
3 changes: 3 additions & 0 deletions requirements-integration.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ requests
selenium
selenium-wire
tenacity
# Pin to >=0.4.0 because the reusable test infrastructure is on that version and above
# This prevents pip-compile from trying to pin an earlier version
charmed-kubeflow-chisme>=0.4.0
71 changes: 64 additions & 7 deletions requirements-integration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
#
# pip-compile requirements-integration.in
#
appnope==0.1.4
# via ipython
anyio==4.4.0
# via httpx
asttokens==2.4.0
# via stack-data
attrs==23.1.0
# via
# jsonschema
# outcome
# trio
backcall==0.2.0
Expand All @@ -24,6 +25,8 @@ cachetools==5.3.1
# via google-auth
certifi==2023.7.22
# via
# httpcore
# httpx
# kubernetes
# requests
# selenium
Expand All @@ -32,6 +35,8 @@ cffi==1.15.1
# via
# cryptography
# pynacl
charmed-kubeflow-chisme==0.4.3
# via -r requirements-integration.in
charset-normalizer==3.2.0
# via requests
cryptography==41.0.3
Expand All @@ -42,8 +47,11 @@ decorator==5.1.1
# via
# ipdb
# ipython
deepdiff==6.2.1
# via charmed-kubeflow-chisme
exceptiongroup==1.1.3
# via
# anyio
# pytest
# trio
# trio-websocket
Expand All @@ -52,11 +60,17 @@ executing==1.2.0
google-auth==2.17.3
# via kubernetes
h11==0.14.0
# via wsproto
# via
# httpcore
# wsproto
h2==4.1.0
# via selenium-wire
hpack==4.0.0
# via h2
httpcore==1.0.5
# via httpx
httpx==0.27.0
# via lightkube
hvac==1.2.0
# via juju
hyperframe==6.0.1
Expand All @@ -65,8 +79,12 @@ hyperframe==6.0.1
# selenium-wire
idna==3.4
# via
# anyio
# httpx
# requests
# trio
importlib-resources==6.4.3
# via jsonschema
iniconfig==2.0.0
# via pytest
ipdb==0.13.13
Expand All @@ -76,15 +94,24 @@ ipython==8.12.2
jedi==0.19.0
# via ipython
jinja2==3.1.2
# via pytest-operator
# via
# charmed-kubeflow-chisme
# pytest-operator
jsonschema==4.17.3
# via serialized-data-interface
juju==3.2.2
# via
# -r requirements-integration.in
# charmed-kubeflow-chisme
# pytest-operator
kaitaistruct==0.10
# via selenium-wire
kubernetes==27.2.0
# via juju
lightkube==0.15.3
# via charmed-kubeflow-chisme
lightkube-models==1.30.0.8
# via lightkube
macaroonbakery==1.3.1
# via juju
markupsafe==2.1.3
Expand All @@ -97,6 +124,12 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
ops==2.15.0
# via
# charmed-kubeflow-chisme
# serialized-data-interface
ordered-set==4.1.0
# via deepdiff
outcome==1.2.0
# via trio
packaging==23.1
Expand All @@ -109,6 +142,8 @@ pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
pkgutil-resolve-name==1.3.10
# via jsonschema
pluggy==1.3.0
# via pytest
prompt-toolkit==3.0.39
Expand Down Expand Up @@ -148,6 +183,8 @@ pyrfc3339==1.1
# via
# juju
# macaroonbakery
pyrsistent==0.20.0
# via jsonschema
pysocks==1.7.1
# via
# selenium-wire
Expand All @@ -170,24 +207,34 @@ pyyaml==6.0.1
# -r requirements-integration.in
# juju
# kubernetes
# lightkube
# ops
# pytest-operator
# serialized-data-interface
requests==2.31.0
# via
# -r requirements-integration.in
# hvac
# kubernetes
# macaroonbakery
# requests-oauthlib
# serialized-data-interface
requests-oauthlib==1.3.1
# via kubernetes
rsa==4.9
# via google-auth
ruamel-yaml==0.18.6
# via charmed-kubeflow-chisme
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
selenium==4.12.0
# via
# -r requirements-integration.in
# selenium-wire
selenium-wire==5.1.0
# via -r requirements-integration.in
serialized-data-interface==0.7.0
# via charmed-kubeflow-chisme
six==1.16.0
# via
# asttokens
Expand All @@ -198,13 +245,18 @@ six==1.16.0
# pymacaroons
# python-dateutil
sniffio==1.3.0
# via trio
# via
# anyio
# httpx
# trio
sortedcontainers==2.4.0
# via trio
stack-data==0.6.2
# via ipython
tenacity==8.2.3
# via -r requirements-integration.in
# via
# -r requirements-integration.in
# charmed-kubeflow-chisme
tomli==2.0.1
# via
# ipdb
Expand All @@ -223,6 +275,7 @@ trio-websocket==0.10.4
# via selenium
typing-extensions==4.7.1
# via
# anyio
# ipython
# typing-inspect
typing-inspect==0.9.0
Expand All @@ -235,12 +288,16 @@ urllib3[socks]==2.0.4
wcwidth==0.2.6
# via prompt-toolkit
websocket-client==1.6.2
# via kubernetes
# via
# kubernetes
# ops
websockets==8.1
# via juju
wsproto==1.2.0
# via
# selenium-wire
# trio-websocket
zipp==3.20.0
# via importlib-resources
zstandard==0.21.0
# via selenium-wire
128 changes: 40 additions & 88 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.

import json
import logging
from pathlib import Path

import pytest
import requests
import yaml
from charmed_kubeflow_chisme.testing import (
assert_alert_rules,
assert_grafana_dashboards,
assert_metrics_endpoint,
deploy_and_assert_grafana_agent,
get_alert_rules,
get_grafana_dashboards,
)
from pytest_operator.plugin import OpsTest
from tenacity import Retrying, stop_after_attempt, stop_after_delay, wait_exponential
from tenacity import Retrying, stop_after_delay, wait_exponential

log = logging.getLogger(__name__)

Expand All @@ -22,9 +28,6 @@

APP_NAME = "minio"
CHARM_ROOT = "."
PROMETHEUS = "prometheus-k8s"
GRAFANA = "grafana-k8s"
PROMETHEUS_SCRAPE = "prometheus-scrape-config-k8s"


@pytest.mark.abort_on_fail
Expand All @@ -42,6 +45,37 @@ async def test_build_and_deploy(ops_test: OpsTest):
)
await ops_test.model.wait_for_idle(timeout=60 * 10)

# Deploying grafana-agent-k8s and add all relations
await deploy_and_assert_grafana_agent(
ops_test.model, APP_NAME, metrics=True, dashboard=True, logging=False
)


async def test_metrics_enpoint(ops_test: OpsTest):
"""Test metrics_endpoints are defined in relation data bag and their accessibility.
This function gets all the metrics_endpoints from the relation data bag, checks if
they are available from the grafana-agent-k8s charm and finally compares them with the
ones provided to the function.
"""
app = ops_test.model.applications[APP_NAME]
await assert_metrics_endpoint(app, metrics_port=9000, metrics_path="/minio/v2/metrics/cluster")


async def test_alert_rules(ops_test: OpsTest):
"""Test check charm alert rules and rules defined in relation data bag."""
app = ops_test.model.applications[APP_NAME]
alert_rules = get_alert_rules()
log.info("found alert_rules: %s", alert_rules)
await assert_alert_rules(app, alert_rules)


async def test_grafana_dashboards(ops_test: OpsTest):
"""Test Grafana dashboards are defined in relation data bag."""
app = ops_test.model.applications[APP_NAME]
dashboards = get_grafana_dashboards()
log.info("found dashboards: %s", dashboards)
await assert_grafana_dashboards(app, dashboards)


async def connect_client_to_server(
ops_test: OpsTest, application, access_key=None, secret_key=None
Expand Down Expand Up @@ -200,85 +234,3 @@ async def test_refresh_credentials(ops_test: OpsTest):
access_key=config["access-key"],
secret_key=config["secret-key"],
)


async def test_prometheus_grafana_integration(ops_test: OpsTest):
"""Deploy prometheus, grafana and required relations, then test the metrics."""
prometheus = "prometheus-k8s"
grafana = "grafana-k8s"
prometheus_scrape = "prometheus-scrape-config-k8s"
scrape_config = {"scrape_interval": "30s"}

# Deploy and relate prometheus
# FIXME: Unpin revision once https://github.com/canonical/bundle-kubeflow/issues/688 is closed
await ops_test.juju(
"deploy",
prometheus,
"--channel",
"latest/edge",
"--revision",
"137",
"--trust",
check=True,
)
# FIXME: Unpin revision once https://github.com/canonical/bundle-kubeflow/issues/690 is closed
await ops_test.juju(
"deploy",
grafana,
"--channel",
"latest/edge",
"--revision",
"89",
"--trust",
check=True,
)
await ops_test.model.deploy(prometheus_scrape, channel="latest/beta", config=scrape_config)

await ops_test.model.add_relation(APP_NAME, prometheus_scrape)
await ops_test.model.add_relation(
f"{prometheus}:grafana-dashboard", f"{grafana}:grafana-dashboard"
)
await ops_test.model.add_relation(
f"{APP_NAME}:grafana-dashboard", f"{grafana}:grafana-dashboard"
)
await ops_test.model.add_relation(
f"{prometheus}:metrics-endpoint", f"{prometheus_scrape}:metrics-endpoint"
)

await ops_test.model.wait_for_idle(status="active", timeout=60 * 20)

status = await ops_test.model.get_status()
prometheus_unit_ip = status["applications"][prometheus]["units"][f"{prometheus}/0"]["address"]
log.info(f"Prometheus available at http://{prometheus_unit_ip}:9090")

for attempt in retry_for_5_attempts:
log.info(
f"Testing prometheus deployment (attempt " f"{attempt.retry_state.attempt_number})"
)
with attempt:
r = requests.get(
f"http://{prometheus_unit_ip}:9090/api/v1/query?"
f'query=up{{juju_application="{APP_NAME}"}}'
)
response = json.loads(r.content.decode("utf-8"))
response_status = response["status"]
log.info(f"Response status is {response_status}")
assert response_status == "success"

response_metric = response["data"]["result"][0]["metric"]
assert response_metric["juju_application"] == APP_NAME
assert response_metric["juju_model"] == ops_test.model_name

# Assert the unit is available by checking the query result
# The data is presented as a list [1707357912.349, '1'], where the
# first value is a timestamp and the second value is the state of the unit
# 1 means available, 0 means unavailable
assert response["data"]["result"][0]["value"][1] == "1"


# Helper to retry calling a function over 30 seconds or 5 attempts
retry_for_5_attempts = Retrying(
stop=(stop_after_attempt(5) | stop_after_delay(30)),
wait=wait_exponential(multiplier=1, min=1, max=10),
reraise=True,
)

0 comments on commit 28b19bc

Please sign in to comment.