Skip to content

Commit

Permalink
Add metrics relation (#261)
Browse files Browse the repository at this point in the history
* Add metrics relation

Expose metrics port and provide metrics-endpoint relation. With these
changes, charm can be monitored with Prometheus.

* fix ConfigMaps in integration tests

* revert changes done just for testing purpose
  • Loading branch information
rgildein authored Aug 7, 2024
1 parent e10a2e1 commit cde23b0
Show file tree
Hide file tree
Showing 9 changed files with 2,871 additions and 10 deletions.

Large diffs are not rendered by default.

2,378 changes: 2,378 additions & 0 deletions charms/kserve-controller/lib/charms/prometheus_k8s/v0/prometheus_scrape.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions charms/kserve-controller/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ resources:
type: oci-image
description: OCI image for kube rbac proxy
upstream-source: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
provides:
metrics-endpoint:
interface: prometheus_scrape
requires:
object-storage:
interface: object-storage
Expand Down
21 changes: 19 additions & 2 deletions charms/kserve-controller/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
GatewayRequirer,
)
from charms.loki_k8s.v1.loki_push_api import LogForwarder
from charms.observability_libs.v1.kubernetes_service_patch import KubernetesServicePatch
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
from charms.resource_dispatcher.v0.kubernetes_manifests import (
KubernetesManifest,
KubernetesManifestRequirerWrapper,
)
from jinja2 import Template
from jsonschema import ValidationError
from lightkube import ApiError
from lightkube.models.core_v1 import ServicePort
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
Expand Down Expand Up @@ -89,6 +92,8 @@
]
NO_MINIO_RELATION_DATA = {}

METRICS_PORT = 8080


def parse_images_config(config: str) -> Dict:
"""
Expand Down Expand Up @@ -166,6 +171,18 @@ def __init__(self, *args):

self._logging = LogForwarder(charm=self)

# metrics relation configuration
metrics_port = ServicePort(
port=METRICS_PORT, targetPort=METRICS_PORT, name=f"{self.app.name}-metrics"
)
self.service_patcher = KubernetesServicePatch(
self, [metrics_port], service_name=f"{self.model.app.name}"
)
self.prometheus_provider = MetricsEndpointProvider(
self,
jobs=[{"static_configs": [{"targets": [f"*:{METRICS_PORT}"]}]}],
)

@property
def _context(self):
"""Returns a dictionary containing context to be used for rendering."""
Expand Down Expand Up @@ -236,7 +253,7 @@ def _controller_pebble_layer(self):
self._controller_container_name: {
"override": "replace",
"summary": "KServe Controller",
"command": "/manager --metrics-addr=:8080",
"command": f"/manager --metrics-addr=:{METRICS_PORT}",
"startup": "enabled",
"environment": {
"POD_NAMESPACE": self.model.name,
Expand All @@ -256,7 +273,7 @@ def _rbac_proxy_pebble_layer(self):
self._rbac_proxy_container_name: {
"override": "replace",
"summary": "Kube Rbac Proxy",
"command": "/usr/local/bin/kube-rbac-proxy --secure-listen-address=0.0.0.0:8443 --upstream=http://127.0.0.1:8080 --logtostderr=true --v=10", # noqa E501
"command": f"/usr/local/bin/kube-rbac-proxy --secure-listen-address=0.0.0.0:8443 --upstream=http://127.0.0.1:{METRICS_PORT} --logtostderr=true --v=10", # noqa E501
"startup": "enabled",
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ data:
}
metricsAggregator: |-
{
"enableMetricAggregation": "false",
"enablePrometheusScraping" : "false"
"enableMetricAggregation": "true",
"enablePrometheusScraping" : "true"
}
router: |-
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ logger: |-
}
metricsAggregator: |-
{
"enableMetricAggregation": "false",
"enablePrometheusScraping" : "false"
"enableMetricAggregation": "true",
"enablePrometheusScraping" : "true"
}
router: |-
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ logger: |-
}
metricsAggregator: |-
{
"enableMetricAggregation": "false",
"enablePrometheusScraping" : "false"
"enableMetricAggregation": "true",
"enablePrometheusScraping" : "true"
}
router: |-
{
Expand Down
13 changes: 12 additions & 1 deletion charms/kserve-controller/tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from charmed_kubeflow_chisme.kubernetes import KubernetesResourceHandler
from charmed_kubeflow_chisme.testing import (
assert_logging,
assert_metrics_endpoint,
deploy_and_assert_grafana_agent,
)
from lightkube.core.exceptions import ApiError
Expand Down Expand Up @@ -247,7 +248,7 @@ async def test_build_and_deploy(ops_test: OpsTest):

# Deploying grafana-agent-k8s and add all relations
await deploy_and_assert_grafana_agent(
ops_test.model, APP_NAME, metrics=False, dashboard=False, logging=True
ops_test.model, APP_NAME, metrics=True, dashboard=False, logging=True
)


Expand Down Expand Up @@ -328,6 +329,16 @@ async def test_logging(ops_test: OpsTest):
await assert_logging(app)


async def test_metrics_enpoint(ops_test):
"""Test metrics_endpoints are defined in relation data bag and their accessibility.
This function gets all the metrics_endpoints from the relation data bag, checks if
they are available from the grafana-agent-k8s charm and finally compares them with the
ones provided to the function.
"""
app = ops_test.model.applications[APP_NAME]
await assert_metrics_endpoint(app, metrics_port=8080, metrics_path="/metrics")


# # Remove the InferenceService deployed in RawDeployment mode
# lightkube_client.delete(
# inference_service_resource, name=inf_svc_name, namespace=rawdeployment_mode_namespace
Expand Down
21 changes: 20 additions & 1 deletion charms/kserve-controller/tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def harness():
harness.set_leader(True)
harness.set_can_connect("kserve-controller", True)
harness.set_can_connect("kube-rbac-proxy", True)
return harness
with patch("charm.ServicePort"), patch("charm.KubernetesServicePatch"):
yield harness


@pytest.fixture()
Expand All @@ -91,6 +92,24 @@ def mocked_lightkube_client(mocker, mocked_resource_handler):
yield mocked_resource_handler.lightkube_client


def test_metrics(harness):
"""Test MetricsEndpointProvider initialization."""
with patch("charm.MetricsEndpointProvider") as mock_metrics, patch(
"charm.KubernetesServicePatch"
) as mock_service_patcher, patch("charm.ServicePort") as mock_service_port:
harness.begin()
mock_metrics.assert_called_once_with(
harness.charm,
jobs=[{"static_configs": [{"targets": ["*:8080"]}]}],
)
mock_service_port.assert_called_once_with(
port=8080, targetPort=8080, name="kserve-controller-metrics"
)
mock_service_patcher.assert_called_once_with(
harness.charm, [mock_service_port.return_value], service_name="kserve-controller"
)


def test_log_forwarding(harness):
"""Test LogForwarder initialization."""
with patch("charm.LogForwarder") as mock_logging:
Expand Down

0 comments on commit cde23b0

Please sign in to comment.