Skip to content

Commit

Permalink
Add discovery response metrics to Agent (#665)
Browse files Browse the repository at this point in the history
* add Akri Discovery Response Result metric

Signed-off-by: Johnson Shih <jshih@microsoft.com>

* add Akri Discovery Response latency metric

Signed-off-by: Johnson Shih <jshih@microsoft.com>

* separate metrics data to a file

Signed-off-by: Johnson Shih <jshih@microsoft.com>

* Update patch version

Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

* update patch version

Signed-off-by: Johnson Shih <jshih@microsoft.com>

---------

Signed-off-by: Johnson Shih <jshih@microsoft.com>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
johnsonshih and github-actions[bot] authored Oct 10, 2023
1 parent 632035e commit cc29959
Show file tree
Hide file tree
Showing 21 changed files with 98 additions and 45 deletions.
28 changes: 14 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agent/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "agent"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>", "<bfjelds@microsoft.com>"]
edition = "2018"
Expand Down
10 changes: 0 additions & 10 deletions agent/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
extern crate hyper;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
#[macro_use]
extern crate serde_derive;
mod util;

use akri_shared::akri::{metrics::run_metrics_server, API_NAMESPACE};
use log::{info, trace};
use prometheus::{HistogramVec, IntGaugeVec};
use std::{
collections::HashMap,
env,
Expand All @@ -28,13 +25,6 @@ use util::{
slot_reconciliation::periodic_slot_reconciliation,
};

lazy_static! {
// Reports the number of Instances visible to this node, grouped by Configuration and whether it is shared
pub static ref INSTANCE_COUNT_METRIC: IntGaugeVec = prometheus::register_int_gauge_vec!("akri_instance_count", "Akri Instance Count", &["configuration", "is_shared"]).unwrap();
// Reports the time to get discovery results, grouped by Configuration
pub static ref DISCOVERY_RESPONSE_TIME_METRIC: HistogramVec = prometheus::register_histogram_vec!("akri_discovery_response_time", "Akri Discovery Response Time", &["configuration"]).unwrap();
}

/// This is the entry point for the Akri Agent.
/// It must be built on unix systems, since the underlying libraries for the `DevicePluginService` unix socket connection are unix only.
#[cfg(unix)]
Expand Down
42 changes: 38 additions & 4 deletions agent/src/util/discovery_operator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::super::INSTANCE_COUNT_METRIC;
#[cfg(any(test, feature = "agent-full"))]
use super::embedded_discovery_handlers::get_discovery_handler;
use super::metrics::INSTANCE_COUNT_METRIC;
use super::{
config_action::ConfigId,
constants::SHARED_INSTANCE_OFFLINE_GRACE_PERIOD_SECS,
Expand Down Expand Up @@ -68,6 +68,8 @@ pub struct DiscoveryOperator {
config: Configuration,
/// Akri Instances discovered by this `DiscoveryOperator`
device_plugin_context: Arc<RwLock<DevicePluginContext>>,
/// Timestamp of DiscoveryOperator is created when config is created or updated
config_timestamp: Instant,
}

#[cfg_attr(test, automock)]
Expand All @@ -81,6 +83,7 @@ impl DiscoveryOperator {
discovery_handler_map,
config,
device_plugin_context,
config_timestamp: Instant::now(),
}
}
fn get_config_id(&self) -> ConfigId {
Expand All @@ -105,6 +108,11 @@ impl DiscoveryOperator {
pub fn get_device_plugin_context(&self) -> Arc<RwLock<DevicePluginContext>> {
self.device_plugin_context.clone()
}
/// Returns config_timestamp field. Allows the struct to be mocked.
#[allow(dead_code)]
pub fn get_config_timestamp(&self) -> Instant {
self.config_timestamp
}
#[allow(dead_code)]
pub async fn stop_all_discovery(&self) {
let mut discovery_handler_map = self.discovery_handler_map.lock().unwrap().clone();
Expand Down Expand Up @@ -794,6 +802,7 @@ async fn get_discovery_property_value_from_config_map(
}

pub mod start_discovery {
use super::super::metrics::{DISCOVERY_RESPONSE_RESULT_METRIC, DISCOVERY_RESPONSE_TIME_METRIC};
use super::super::registration::{DiscoveryDetails, DiscoveryHandlerEndpoint};
// Use this `mockall` macro to automate importing a mock type in test mode, or a real type otherwise.
use super::super::device_plugin_builder::{DevicePluginBuilder, DevicePluginBuilderInterface};
Expand Down Expand Up @@ -1038,11 +1047,26 @@ pub mod start_discovery {
dh_details: &'a DiscoveryDetails,
node_name: String,
) -> anyhow::Result<()> {
// get discovery handler name for metric use
let dh_name = discovery_operator.get_config().spec.discovery_handler.name;
let (_config_namespace, config_name) = discovery_operator.get_config_id();
let mut first_call = true;
loop {
if let Some(stream_type) = discovery_operator
let stream_type = discovery_operator
.get_stream(kube_interface.clone(), endpoint)
.await
{
.await;
let request_result = stream_type.as_ref().map(|_| "Success").unwrap_or("Fail");
DISCOVERY_RESPONSE_RESULT_METRIC
.with_label_values(&[&dh_name, request_result])
.inc();
if first_call {
first_call = false;
let start_time = discovery_operator.get_config_timestamp();
DISCOVERY_RESPONSE_TIME_METRIC
.with_label_values(&[&config_name])
.observe(start_time.elapsed().as_secs_f64());
}
if let Some(stream_type) = stream_type {
match stream_type {
StreamType::External(mut stream) => {
match discovery_operator
Expand Down Expand Up @@ -1574,6 +1598,11 @@ pub mod tests {
.unwrap();
});
}
// Config timestamp should be called
mock_discovery_operator
.expect_get_config_timestamp()
.times(1)
.returning(Instant::now);
let (mut finished_discovery_sender, finished_discovery_receiver) =
tokio::sync::mpsc::channel(2);
let (new_dh_sender, _) = broadcast::channel(2);
Expand Down Expand Up @@ -1636,6 +1665,11 @@ pub mod tests {
.expect_internal_do_discover()
.times(1)
.returning(|_, _, _, _| Ok(()));
// Config timestamp should be called
mock_discovery_operator
.expect_get_config_timestamp()
.times(1)
.returning(Instant::now);
let mock_kube_interface: Arc<dyn k8s::KubeInterface> = Arc::new(MockKubeInterface::new());
start_discovery::do_discover(
Arc::new(mock_discovery_operator),
Expand Down
28 changes: 28 additions & 0 deletions agent/src/util/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use lazy_static::lazy_static;
use prometheus::{opts, register_int_counter_vec, HistogramVec, IntCounterVec, IntGaugeVec};

// Discovery request response time bucket (in seconds)
const DISCOVERY_RESPONSE_TIME_BUCKETS: &[f64; 9] =
&[0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 10.0, 60.0];

lazy_static! {
// Reports the number of Instances visible to this node, grouped by Configuration and whether it is shared
pub static ref INSTANCE_COUNT_METRIC: IntGaugeVec = prometheus::register_int_gauge_vec!(
"akri_instance_count",
"Akri Instance Count",
&["configuration", "is_shared"])
.expect("akri_instance_count metric can be created");
// Reports the time to get discovery results, grouped by Configuration
pub static ref DISCOVERY_RESPONSE_TIME_METRIC: HistogramVec = prometheus::register_histogram_vec!(
"akri_discovery_response_time",
"Akri Discovery Response Time",
&["configuration"],
DISCOVERY_RESPONSE_TIME_BUCKETS.to_vec()
)
.expect("akri_discovery_response_time metric can be created");
// Reports the result of discover requests, grouped by Discovery Handler name and whether it is succeeded
pub static ref DISCOVERY_RESPONSE_RESULT_METRIC: IntCounterVec = register_int_counter_vec!(
opts!("akri_discovery_response_result", "Akri Discovery Response Result"),
&["discovery_handler_name", "result"])
.expect("akri_discovery_response_result metric can be created");
}
1 change: 1 addition & 0 deletions agent/src/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod device_plugin_service;
pub mod discovery_operator;
#[cfg(any(test, feature = "agent-full"))]
pub mod embedded_discovery_handlers;
mod metrics;
pub mod registration;
pub mod slot_reconciliation;
pub mod streaming_extension;
Expand Down
2 changes: 1 addition & 1 deletion controller/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "controller"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["<bfjelds@microsoft.com>", "<kagold@microsoft.com>"]
edition = "2018"
Expand Down
4 changes: 2 additions & 2 deletions deployment/helm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.12.11
version: 0.12.12

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: 0.12.11
appVersion: 0.12.12
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "debug-echo-discovery-handler"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "onvif-discovery-handler"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "opcua-discovery-handler"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "udev-discovery-handler"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion discovery-handlers/debug-echo/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "akri-debug-echo"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion discovery-handlers/onvif/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "akri-onvif"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion discovery-handlers/opcua/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "akri-opcua"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion discovery-handlers/udev/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "akri-udev"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion discovery-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "akri-discovery-utils"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>"]
edition = "2018"
Expand Down
2 changes: 1 addition & 1 deletion samples/brokers/udev-video-broker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "udev-video-broker"
version = "0.12.11"
version = "0.12.12"
license = "Apache-2.0"
authors = ["Kate Goldenring <kate.goldenring@microsoft.com>", "<bfjelds@microsoft.com>"]
edition = "2018"
Expand Down
Loading

0 comments on commit cc29959

Please sign in to comment.