Skip to content

Add new self-profiling event to cheaply aggregate query cache hit counts #142978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 87 additions & 7 deletions compiler/rustc_data_structures/src/profiling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ use std::fmt::Display;
use std::intrinsics::unlikely;
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
use std::{fs, process};

Expand All @@ -105,6 +106,8 @@ bitflags::bitflags! {
struct EventFilter: u16 {
const GENERIC_ACTIVITIES = 1 << 0;
const QUERY_PROVIDERS = 1 << 1;
/// Store detailed instant events, including timestamp and thread ID,
/// per each query cache hit. Note that this is quite expensive.
const QUERY_CACHE_HITS = 1 << 2;
const QUERY_BLOCKED = 1 << 3;
const INCR_CACHE_LOADS = 1 << 4;
Expand All @@ -113,16 +116,20 @@ bitflags::bitflags! {
const FUNCTION_ARGS = 1 << 6;
const LLVM = 1 << 7;
const INCR_RESULT_HASHING = 1 << 8;
const ARTIFACT_SIZES = 1 << 9;
const ARTIFACT_SIZES = 1 << 9;
/// Store aggregated counts of cache hits per query invocation.
const QUERY_CACHE_HIT_COUNTS = 1 << 10;

const DEFAULT = Self::GENERIC_ACTIVITIES.bits() |
Self::QUERY_PROVIDERS.bits() |
Self::QUERY_BLOCKED.bits() |
Self::INCR_CACHE_LOADS.bits() |
Self::INCR_RESULT_HASHING.bits() |
Self::ARTIFACT_SIZES.bits();
Self::ARTIFACT_SIZES.bits() |
Self::QUERY_CACHE_HIT_COUNTS.bits();

const ARGS = Self::QUERY_KEYS.bits() | Self::FUNCTION_ARGS.bits();
const QUERY_CACHE_HIT_COMBINED = Self::QUERY_CACHE_HITS.bits() | Self::QUERY_CACHE_HIT_COUNTS.bits();
}
}

Expand All @@ -134,6 +141,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
("generic-activity", EventFilter::GENERIC_ACTIVITIES),
("query-provider", EventFilter::QUERY_PROVIDERS),
("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
("query-cache-hit-count", EventFilter::QUERY_CACHE_HITS),
("query-blocked", EventFilter::QUERY_BLOCKED),
("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
("query-keys", EventFilter::QUERY_KEYS),
Expand Down Expand Up @@ -411,13 +419,24 @@ impl SelfProfilerRef {
#[inline(never)]
#[cold]
fn cold_call(profiler_ref: &SelfProfilerRef, query_invocation_id: QueryInvocationId) {
profiler_ref.instant_query_event(
|profiler| profiler.query_cache_hit_event_kind,
query_invocation_id,
);
if profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) {
profiler_ref
.profiler
.as_ref()
.unwrap()
.increment_query_cache_hit_counters(QueryInvocationId(query_invocation_id.0));
}
if unlikely(profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) {
profiler_ref.instant_query_event(
|profiler| profiler.query_cache_hit_event_kind,
query_invocation_id,
);
}
}

if unlikely(self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) {
// We check both kinds of query cache hit events at once, to reduce overhead in the
// common case (with self-profile disabled).
if unlikely(self.event_filter_mask.intersects(EventFilter::QUERY_CACHE_HIT_COMBINED)) {
cold_call(self, query_invocation_id);
}
}
Expand Down Expand Up @@ -489,6 +508,30 @@ impl SelfProfilerRef {
self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s))
}

/// Store query cache hits to the self-profile log.
/// Should be called once at the end of the compilation session.
///
/// The cache hits are stored per **query invocation**, not **per query kind/type**.
/// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
/// IDs.
pub fn store_query_cache_hits(&self) {
if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) {
let profiler = self.profiler.as_ref().unwrap();
let query_hits = profiler.query_hits.read();
let builder = EventIdBuilder::new(&profiler.profiler);
let thread_id = get_thread_id();
for (query_invocation, hit_count) in query_hits.iter().enumerate() {
let event_id = builder.from_label(StringId::new_virtual(query_invocation as u64));
profiler.profiler.record_integer_event(
profiler.query_cache_hit_count_event_kind,
event_id,
thread_id,
hit_count.load(Ordering::Relaxed),
);
}
}
}

#[inline]
pub fn enabled(&self) -> bool {
self.profiler.is_some()
Expand Down Expand Up @@ -537,13 +580,28 @@ pub struct SelfProfiler {

string_cache: RwLock<FxHashMap<String, StringId>>,

/// Recording individual query cache hits as "instant" measureme events
/// is incredibly expensive. Instead of doing that, we simply aggregate
/// cache hit *counts* per query invocation, and then store the final count
/// of cache hits per invocation at the end of the compilation session.
///
/// With this approach, we don't know the individual thread IDs and timestamps
/// of cache hits, but it has very little overhead on top of `-Zself-profile`.
/// Recording the cache hits as individual events made compilation 3-5x slower.
///
/// Query invocation IDs should be monotonic integers, so we can store them in a vec,
/// rather than using a hashmap.
query_hits: RwLock<Vec<AtomicU64>>,

query_event_kind: StringId,
generic_activity_event_kind: StringId,
incremental_load_result_event_kind: StringId,
incremental_result_hashing_event_kind: StringId,
query_blocked_event_kind: StringId,
query_cache_hit_event_kind: StringId,
artifact_size_event_kind: StringId,
/// Total cache hits per query invocation
query_cache_hit_count_event_kind: StringId,
}

impl SelfProfiler {
Expand Down Expand Up @@ -573,6 +631,7 @@ impl SelfProfiler {
let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
let artifact_size_event_kind = profiler.alloc_string("ArtifactSize");
let query_cache_hit_count_event_kind = profiler.alloc_string("QueryCacheHitCount");

let mut event_filter_mask = EventFilter::empty();

Expand Down Expand Up @@ -618,6 +677,8 @@ impl SelfProfiler {
query_blocked_event_kind,
query_cache_hit_event_kind,
artifact_size_event_kind,
query_cache_hit_count_event_kind,
query_hits: Default::default(),
})
}

Expand All @@ -627,6 +688,25 @@ impl SelfProfiler {
self.profiler.alloc_string(s)
}

/// Store a cache hit of a query invocation
pub fn increment_query_cache_hit_counters(&self, id: QueryInvocationId) {
// Fast path: assume that the query was already encountered before, and just record
// a cache hit.
let mut guard = self.query_hits.upgradable_read();
let query_hits = &guard;
let index = id.0 as usize;
if index < query_hits.len() {
// We only want to increment the count, no other synchronization is required
query_hits[index].fetch_add(1, Ordering::Relaxed);
} else {
// If not, we need to extend the query hit map to the highest observed ID
guard.with_upgraded(|vec| {
vec.resize_with(index + 1, || AtomicU64::new(0));
vec[index] = AtomicU64::from(1);
});
}
}

/// Gets a `StringId` for the given string. This method makes sure that
/// any strings going through it will only be allocated once in the
/// profiling data.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_query_impl/src/profiling_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,5 @@ pub fn alloc_self_profile_query_strings(tcx: TyCtxt<'_>) {
for alloc in super::ALLOC_SELF_PROFILE_QUERY_STRINGS.iter() {
alloc(tcx, &mut string_cache)
}
tcx.sess.prof.store_query_cache_hits();
}
Loading