Skip to content
This repository has been archived by the owner on Jul 26, 2024. It is now read-only.

feat: add more metrics #130

Merged
merged 11 commits into from
Jun 7, 2021
12 changes: 11 additions & 1 deletion src/adm/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use url::Url;
use super::{tiles::AdmTile, AdmAdvertiserFilterSettings, Tile, DEFAULT};
use crate::{
error::{HandlerError, HandlerErrorKind, HandlerResult},
metrics::Metrics,
tags::Tags,
web::middleware::sentry as l_sentry,
};
Expand Down Expand Up @@ -208,7 +209,12 @@ impl AdmFilter {
///
/// - Returns None for tiles that shouldn't be shown to the client
/// - Modifies tiles for output to the client (adding additional fields, etc.)
pub fn filter_and_process(&self, mut tile: AdmTile, tags: &mut Tags) -> Option<Tile> {
pub fn filter_and_process(
&self,
mut tile: AdmTile,
tags: &mut Tags,
metrics: &Metrics,
) -> Option<Tile> {
// Use strict matching for now, eventually, we may want to use backwards expanding domain
// searches, (.e.g "xyz.example.com" would match "example.com")
match self.filter_set.get(&tile.name.to_lowercase()) {
Expand Down Expand Up @@ -238,16 +244,19 @@ impl AdmFilter {
};
if let Err(e) = self.check_advertiser(adv_filter, &mut tile, tags) {
dbg!("bad adv");
metrics.incr_with_tags("tag.err.invalid_advertiser", Some(tags));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does "tag" in this name mean?

Also related, dunno if you saw this comment: #130 (review)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, I think that was the product of a bad replace.
I'll correct the names (and I'll also add adm)

self.report(&e, tags);
return None;
}
if let Err(e) = self.check_click(click_filter, &mut tile, tags) {
dbg!("bad click");
metrics.incr_with_tags("tag.err.invalid_click", Some(tags));
self.report(&e, tags);
return None;
}
if let Err(e) = self.check_impression(impression_filter, &mut tile, tags) {
dbg!("bad imp");
metrics.incr_with_tags("tag.err.invalid_impression", Some(tags));
self.report(&e, tags);
return None;
}
Expand All @@ -260,6 +269,7 @@ impl AdmFilter {
))
}
None => {
metrics.incr_with_tags("tag.err.unexpected_advertiser", Some(tags));
self.report(
&HandlerErrorKind::UnexpectedAdvertiser(tile.name).into(),
tags,
Expand Down
4 changes: 3 additions & 1 deletion src/adm/tiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use url::Url;
use crate::{
adm::DEFAULT,
error::{HandlerError, HandlerErrorKind, HandlerResult},
metrics::Metrics,
server::{location::LocationResult, ServerState},
settings::Settings,
tags::Tags,
Expand Down Expand Up @@ -115,6 +116,7 @@ pub async fn get_tiles(
form_factor: FormFactor,
state: &ServerState,
tags: &mut Tags,
metrics: &Metrics,
headers: Option<&HeaderMap>,
) -> Result<TileResponse, HandlerError> {
// XXX: Assumes adm_endpoint_url includes
Expand Down Expand Up @@ -177,7 +179,7 @@ pub async fn get_tiles(
let tiles = response
.tiles
.into_iter()
.filter_map(|tile| state.filter.filter_and_process(tile, tags))
.filter_map(|tile| state.filter.filter_and_process(tile, tags, metrics))
.take(settings.adm_max_tiles as usize)
.collect();
Ok(TileResponse { tiles })
Expand Down
4 changes: 2 additions & 2 deletions src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ impl Metrics {
}

/// Increment a given metric with optional [crate::tags::Tags]
pub fn incr_with_tags(&self, label: &str, tags: Option<Tags>) {
pub fn incr_with_tags(&self, label: &str, tags: Option<&Tags>) {
if let Some(client) = self.client.as_ref() {
let mut tagged = client.incr_with_tags(label);
let mut mtags = self.tags.clone().unwrap_or_default();
if let Some(tags) = tags {
mtags.extend(tags);
mtags.extend(tags.clone());
}
for key in mtags.tags.keys().clone() {
if let Some(val) = mtags.tags.get(key) {
Expand Down
46 changes: 38 additions & 8 deletions src/server/cache.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
//! Tile cache manager
use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc, time::Duration};
use std::{
collections::HashMap,
fmt::Debug,
ops::Deref,
sync::Arc,
time::{Duration, SystemTime},
};

use cadence::Counted;
use tokio::sync::RwLock;

use crate::{
adm,
metrics::Metrics,
server::location::LocationResult,
server::ServerState,
tags::Tags,
Expand All @@ -32,6 +38,7 @@ pub struct AudienceKey {
#[derive(Debug)]
pub struct Tiles {
pub json: String,
pub ttl: SystemTime,
}

/// The simple tile Cache
Expand Down Expand Up @@ -71,14 +78,25 @@ async fn tile_cache_updater(state: &ServerState) {
tiles_cache,
reqwest_client,
adm_endpoint_url,
metrics,
settings,
..
} = state;

trace!("tile_cache_updater..");
let keys: Vec<_> = tiles_cache.read().await.keys().cloned().collect();
let tiles = tiles_cache.read().await;
jrconlin marked this conversation as resolved.
Show resolved Hide resolved
let keys: Vec<_> = tiles.keys().cloned().collect();
let mut cache_size = 0;
let mut cache_count: i64 = 0;
for key in keys {
// proactively remove expired tiles from the cache, since we only
// write new ones (or ones which return a value)
if let Some(tile) = tiles.get(&key) {
if tile.ttl <= SystemTime::now() {
tiles_cache.write().await.remove(&key);
}
}
let mut tags = Tags::default();
let metrics = Metrics::from(state);
let result = adm::get_tiles(
reqwest_client,
adm_endpoint_url,
Expand All @@ -92,6 +110,7 @@ async fn tile_cache_updater(state: &ServerState) {
key.form_factor,
state,
&mut tags,
&metrics,
None,
)
.await;
Expand All @@ -103,10 +122,12 @@ async fn tile_cache_updater(state: &ServerState) {
Ok(tiles) => tiles,
Err(e) => {
error!("tile_cache_updater: response error {}", e);
metrics.incr_with_tags("tile_cache_updater.error").send();
metrics.incr_with_tags("tile_cache_updater.error", Some(&tags));
continue;
}
};
cache_size += tiles.len();
cache_count += 1;
// XXX: not a great comparison (comparing json Strings)..
let new_tiles = {
tiles_cache
Expand All @@ -117,14 +138,23 @@ async fn tile_cache_updater(state: &ServerState) {
};
if new_tiles {
trace!("tile_cache_updater updating: {:?}", &key);
tiles_cache.write().await.insert(key, Tiles { json: tiles });
metrics.incr_with_tags("tile_cache_updater.update").send();
tiles_cache.write().await.insert(
key,
Tiles {
json: tiles,
ttl: SystemTime::now() + Duration::from_secs(settings.tiles_ttl as u64),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm debating if we should make this TTL * 2, to cover potential issues with ADM returning nothing, or an error. To be fair, we should probably also exit if adM returns anything other than a 200, and assume that they're having problems.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially I leaned towards hiding things from the client like this but #92 suggests the client wants more feedback from the other end.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, if a background refresh fails, how should we notify the client?

},
);
metrics.incr_with_tags("tile_cache_updater.update", Some(&tags));
}
}
Err(e) => {
error!("tile_cache_updater error: {}", e);
metrics.incr_with_tags("tile_cache_updater.error").send();
metrics.incr_with_tags("tile_cache_updater.error", Some(&tags));
}
}
}
let metrics = Metrics::from(state);
metrics.count("tile_cache_updater.size", cache_size as i64);
metrics.count("tile_cache_updater.count", cache_count);
}
27 changes: 25 additions & 2 deletions src/server/location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ use maxminddb::{self, geoip2::City, MaxMindDBError};
use serde::{self, Serialize};

use crate::error::{HandlerErrorKind, HandlerResult};
use crate::metrics::Metrics;
use crate::settings::Settings;
use crate::tags::Tags;

const GOOG_LOC_HEADER: &str = "x-client-geo-location";

Expand Down Expand Up @@ -255,6 +257,7 @@ impl Location {
&self,
ip_addr: IpAddr,
preferred_languages: &[String],
metrics: &Metrics,
) -> HandlerResult<Option<LocationResult>> {
if self.iploc.is_none() {
return Ok(None);
Expand Down Expand Up @@ -302,9 +305,19 @@ impl Location {
Ok(location) => {
if let Some(names) = location.city.and_then(|c| c.names) {
result.city = get_preferred_language_element(&preferred_languages, &names)
} else {
metrics.incr_with_tags(
"location.unknown.city",
Some(&Tags::from_extra(vec![("ip", ip_addr.to_string())])),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extras are only sent to sentry, so all these one offs in this module are unnecessary for metrics calls (I guess that applies to the new from_extra method too)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I debated adding these as proper tags, but I figured we'd hit cardinality issues pretty fast. I'll remove the tag from this for now.

As for keeping the from::extra, I can see that potentially being useful in the future.

);
};
if let Some(names) = location.country.and_then(|c| c.names) {
result.country = get_preferred_language_element(&preferred_languages, &names)
} else {
jrconlin marked this conversation as resolved.
Show resolved Hide resolved
metrics.incr_with_tags(
"location.unknown.country",
Some(&Tags::from_extra(vec![("ip", ip_addr.to_string())])),
);
};
if let Some(divs) = location.subdivisions {
if let Some(subdivision) = divs.get(0) {
Expand All @@ -313,6 +326,11 @@ impl Location {
get_preferred_language_element(&preferred_languages, names);
}
}
} else {
metrics.incr_with_tags(
"location.unknown.subdivision",
Some(&Tags::from_extra(vec![("ip", ip_addr.to_string())])),
)
}
if let Some(location) = location.location {
result.dma = location.metro_code;
Expand Down Expand Up @@ -396,9 +414,13 @@ mod test {
..Default::default()
};
let location = Location::from(&settings);
let metrics = Metrics::noop();
if location.is_available() {
// TODO: either mock maxminddb::Reader or pass it in as a wrapped impl
let result = location.mmdb_locate(test_ip, &langs).await?.unwrap();
let result = location
.mmdb_locate(test_ip, &langs, &metrics)
.await?
.unwrap();
assert_eq!(result.city, Some("Milton".to_owned()));
assert_eq!(result.subdivision, Some("Washington".to_owned()));
assert_eq!(result.country, Some("United States".to_owned()));
Expand All @@ -417,8 +439,9 @@ mod test {
..Default::default()
};
let location = Location::from(&settings);
let metrics = Metrics::noop();
if location.is_available() {
let result = location.mmdb_locate(test_ip, &langs).await?;
let result = location.mmdb_locate(test_ip, &langs, &metrics).await?;
assert!(result.is_none());
} else {
println!("⚠Location Database not found, cannot test location, skipping");
Expand Down
16 changes: 15 additions & 1 deletion src/tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,21 @@ impl From<HttpRequest> for Tags {
}
}

/// Tags are extra data to be recorded in metric and logging calls.
/// Convenience function to bulk load `extra`
impl Tags {
pub fn from_extra(map: Vec<(&'static str, String)>) -> Self {
let mut extra = HashMap::new();
for (key, val) in map {
extra.insert(key.to_owned(), val);
}
Self {
tags: HashMap::new(),
extra,
}
}
}

// Tags are extra data to be recorded in metric and logging calls.
/// If additional tags are required or desired, you will need to add them to the
/// mutable extensions, e.g.
/// ```compile_fail
Expand Down
17 changes: 12 additions & 5 deletions src/web/handlers.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! API Handlers
use std::time::{Duration, SystemTime};

use actix_web::{web, HttpRequest, HttpResponse};

use super::user_agent;
Expand All @@ -24,6 +26,7 @@ pub async fn get_tiles(
trace!("get_tiles");

let cinfo = request.connection_info();
let settings = &state.settings;
let ip_addr_str = cinfo.remote_addr().unwrap_or({
let default = state
.adm_country_ip_map
Expand All @@ -47,11 +50,11 @@ pub async fn get_tiles(
};
state
.mmdb
.mmdb_locate(addr, &["en".to_owned()])
.mmdb_locate(addr, &["en".to_owned()], &metrics)
.await?
.unwrap_or_else(|| LocationResult::from_header(header, &state.settings))
.unwrap_or_else(|| LocationResult::from_header(header, settings))
} else {
LocationResult::from_header(header, &state.settings)
LocationResult::from_header(header, settings)
};

let mut tags = Tags::default();
Expand All @@ -69,7 +72,7 @@ pub async fn get_tiles(
form_factor,
os_family,
};
if !state.settings.test_mode {
if !settings.test_mode {
if let Some(tiles) = state.tiles_cache.read().await.get(&audience_key) {
trace!("get_tiles: cache hit: {:?}", audience_key);
metrics.incr("tiles_cache.hit");
Expand All @@ -86,8 +89,9 @@ pub async fn get_tiles(
form_factor,
&state,
&mut tags,
&metrics,
// be aggressive about not passing headers unless we absolutely need to
if state.settings.test_mode {
if settings.test_mode {
Some(request.head().headers())
} else {
None
Expand All @@ -98,6 +102,7 @@ pub async fn get_tiles(
Ok(response) => {
// adM sometimes returns an invalid response. We don't want to cache that.
if response.tiles.is_empty() {
metrics.incr_with_tags("tiles.empty", Some(&tags));
return Ok(HttpResponse::NoContent().finish());
};
let tiles = serde_json::to_string(&response).map_err(|e| {
Expand All @@ -109,13 +114,15 @@ pub async fn get_tiles(
audience_key,
cache::Tiles {
json: tiles.clone(),
ttl: SystemTime::now() + Duration::from_secs(settings.tiles_ttl as u64),
},
);
tiles
}
Err(e) => match e.kind() {
HandlerErrorKind::BadAdmResponse(es) => {
warn!("Bad response from ADM: {:?}", e);
metrics.incr_with_tags("tiles.invalid", Some(&tags));
// Report directly to sentry
// (This is starting to become a pattern. 🤔)
let mut tags = Tags::from(request.head());
Expand Down