Skip to content

[WIP] Split query execution into hot and cold paths #69109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/librustc/dep_graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,7 @@ impl CurrentDepGraph {
}

impl DepGraphData {
#[inline]
fn read_index(&self, source: DepNodeIndex) {
ty::tls::with_context_opt(|icx| {
let icx = if let Some(icx) = icx { icx } else { return };
Expand Down
1 change: 1 addition & 0 deletions src/librustc/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,7 @@ pub mod tls {

/// Gets the pointer to the current `ImplicitCtxt`.
#[cfg(not(parallel_compiler))]
#[inline]
fn get_tlv() -> usize {
TLV.with(|tlv| tlv.get())
}
Expand Down
268 changes: 163 additions & 105 deletions src/librustc/ty/query/plumbing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ use crate::ty::{self, TyCtxt};
#[cfg(not(parallel_compiler))]
use rustc_data_structures::cold_path;
use rustc_data_structures::fx::{FxHashMap, FxHasher};
#[cfg(parallel_compiler)]
use rustc_data_structures::profiling::TimingGuard;
use rustc_data_structures::sharded::Sharded;
use rustc_data_structures::sync::Lock;
use rustc_data_structures::sync::{Lock, LockGuard};
use rustc_data_structures::thin_vec::ThinVec;
use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
use rustc_span::source_map::DUMMY_SP;
Expand Down Expand Up @@ -70,6 +68,12 @@ impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
}
}

/// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
pub(super) struct QueryLookup<'tcx, Q: QueryDescription<'tcx>> {
shard: usize,
lock: LockGuard<'tcx, QueryCache<'tcx, Q>>,
}

/// A type representing the responsibility to execute the job in the `job` field.
/// This will poison the relevant query if dropped.
pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
Expand All @@ -81,119 +85,91 @@ pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
/// Either gets a `JobOwner` corresponding the query, allowing us to
/// start executing the query, or returns with the result of the query.
/// If the query is executing elsewhere, this will wait for it.
/// This function assumes that `try_get_cached` is already called and returned `lookup`.
/// If the query is executing elsewhere, this will wait for it and return the result.
/// If the query panicked, this will silently panic.
///
/// This function is inlined because that results in a noticeable speed-up
/// for some compile-time benchmarks.
#[inline(always)]
pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
// Handling the `query_blocked_prof_timer` is a bit weird because of the
// control flow in this function: Blocking is implemented by
// awaiting a running job and, once that is done, entering the loop below
// again from the top. In that second iteration we will hit the
// cache which provides us with the information we need for
// finishing the "query-blocked" event.
//
// We thus allocate `query_blocked_prof_timer` outside the loop,
// initialize it during the first iteration and finish it during the
// second iteration.
#[cfg(parallel_compiler)]
let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;

let cache = Q::query_cache(tcx);
loop {
// We compute the key's hash once and then use it for both the
// shard lookup and the hashmap lookup. This relies on the fact
// that both of them use `FxHasher`.
let mut state = FxHasher::default();
key.hash(&mut state);
let key_hash = state.finish();

let shard = cache.get_shard_index_by_hash(key_hash);
let mut lock_guard = cache.get_shard_by_index(shard).lock();
let lock = &mut *lock_guard;

if let Some((_, value)) =
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
{
if unlikely!(tcx.prof.enabled()) {
tcx.prof.query_cache_hit(value.index.into());
pub(super) fn try_start(
tcx: TyCtxt<'tcx>,
span: Span,
key: &Q::Key,
mut lookup: QueryLookup<'tcx, Q>,
) -> TryGetJob<'a, 'tcx, Q> {
let lock = &mut *lookup.lock;

let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
Entry::Occupied(mut entry) => {
match entry.get_mut() {
QueryResult::Started(job) => {
// For parallel queries, we'll block and wait until the query running
// in another thread has completed. Record how long we wait in the
// self-profiler.
let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
Some(tcx.prof.query_blocked())
} else {
None
};

#[cfg(parallel_compiler)]
{
if let Some(prof_timer) = query_blocked_prof_timer.take() {
prof_timer.finish_with_query_invocation_id(value.index.into());
}
}
}
// Create the id of the job we're waiting for
let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());

let result = (value.value.clone(), value.index);
#[cfg(debug_assertions)]
{
lock.cache_hits += 1;
(job.latch(id), _query_blocked_prof_timer)
}
QueryResult::Poisoned => FatalError.raise(),
}
return TryGetJob::JobCompleted(result);
}
Entry::Vacant(entry) => {
// No job entry for this query. Return a new one to be started later.

let latch = match lock.active.entry((*key).clone()) {
Entry::Occupied(mut entry) => {
match entry.get_mut() {
QueryResult::Started(job) => {
// For parallel queries, we'll block and wait until the query running
// in another thread has completed. Record how long we wait in the
// self-profiler.
#[cfg(parallel_compiler)]
{
query_blocked_prof_timer = Some(tcx.prof.query_blocked());
}

// Create the id of the job we're waiting for
let id = QueryJobId::new(job.id, shard, Q::dep_kind());
// Generate an id unique within this shard.
let id = lock.jobs.checked_add(1).unwrap();
lock.jobs = id;
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());

job.latch(id)
}
QueryResult::Poisoned => FatalError.raise(),
}
}
Entry::Vacant(entry) => {
// No job entry for this query. Return a new one to be started later.
let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());

// Generate an id unique within this shard.
let id = lock.jobs.checked_add(1).unwrap();
lock.jobs = id;
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));

let global_id = QueryJobId::new(id, shard, Q::dep_kind());
entry.insert(QueryResult::Started(job));

let job =
tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
let owner =
JobOwner { cache: Q::query_cache(tcx), id: global_id, key: (*key).clone() };
return TryGetJob::NotYetStarted(owner);
}
};
mem::drop(lookup.lock);

entry.insert(QueryResult::Started(job));
// If we are single-threaded we know that we have cycle error,
// so we just return the error.
#[cfg(not(parallel_compiler))]
return TryGetJob::Cycle(cold_path(|| {
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
}));

let owner = JobOwner { cache, id: global_id, key: (*key).clone() };
return TryGetJob::NotYetStarted(owner);
}
};
mem::drop(lock_guard);
// With parallel queries we might just have to wait on some other
// thread.
#[cfg(parallel_compiler)]
{
let result = latch.wait_on(tcx, span);

// If we are single-threaded we know that we have cycle error,
// so we just return the error.
#[cfg(not(parallel_compiler))]
return TryGetJob::Cycle(cold_path(|| {
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
}));
if let Err(cycle) = result {
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
}

// With parallel queries we might just have to wait on some other
// thread.
#[cfg(parallel_compiler)]
{
let result = latch.wait_on(tcx, span);
let cached = tcx.try_get_cached::<Q, _, _, _>(
key,
|(value, index)| (value.clone(), index),
|_, _| panic!("value must be in cache after waiting"),
);

if let Err(cycle) = result {
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
}
if let Some(prof_timer) = _query_blocked_prof_timer.take() {
prof_timer.finish_with_query_invocation_id(cached.1.into());
}

return TryGetJob::JobCompleted(cached);
}
}

Expand Down Expand Up @@ -269,6 +245,7 @@ pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
/// The query was already completed.
/// Returns the result of the query and its dep-node index
/// if it succeeded or a cycle error if it failed.
#[cfg(parallel_compiler)]
JobCompleted((D::Value, DepNodeIndex)),

/// Trying to execute the query resulted in a cycle.
Expand Down Expand Up @@ -396,13 +373,78 @@ impl<'tcx> TyCtxt<'tcx> {
eprintln!("end of query stack");
}

/// Checks if the query is already computed and in the cache.
/// It returns the shard index and a lock guard to the shard,
/// which will be used if the query is not in the cache and we need
/// to compute it.
#[inline(always)]
fn try_get_cached<Q, R, OnHit, OnMiss>(
self,
key: Q::Key,
// `on_hit` can be called while holding a lock to the query cache
on_hit: OnHit,
on_miss: OnMiss,
) -> R
where
Q: QueryDescription<'tcx> + 'tcx,
OnHit: FnOnce(&Q::Value, DepNodeIndex) -> R,
OnMiss: FnOnce(Q::Key, QueryLookup<'tcx, Q>) -> R,
{
let cache = Q::query_cache(self);

// We compute the key's hash once and then use it for both the
// shard lookup and the hashmap lookup. This relies on the fact
// that both of them use `FxHasher`.
let mut state = FxHasher::default();
key.hash(&mut state);
let key_hash = state.finish();

let shard = cache.get_shard_index_by_hash(key_hash);
let mut lock_guard = cache.get_shard_by_index(shard).lock();
let lock = &mut *lock_guard;

let result = lock.results.raw_entry().from_key_hashed_nocheck(key_hash, &key);

if let Some((_, value)) = result {
if unlikely!(self.prof.enabled()) {
self.prof.query_cache_hit(value.index.into());
}

on_hit(&value.value, value.index)
} else {
on_miss(key, QueryLookup { lock: lock_guard, shard })
}
}

#[inline(never)]
pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
self,
span: Span,
key: Q::Key,
) -> Q::Value {
debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);

let job = match JobOwner::try_get(self, span, &key) {
self.try_get_cached::<Q, _, _, _>(
key,
|value, index| {
self.dep_graph.read_index(index);
value.clone()
},
|key, lookup| self.try_execute_query(span, key, lookup),
)
}

#[inline(always)]
pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
self,
span: Span,
key: Q::Key,
lookup: QueryLookup<'tcx, Q>,
) -> Q::Value {
let job = match JobOwner::try_start(self, span, &key, lookup) {
TryGetJob::NotYetStarted(job) => job,
TryGetJob::Cycle(result) => return result,
#[cfg(parallel_compiler)]
TryGetJob::JobCompleted((v, index)) => {
self.dep_graph.read_index(index);
return v;
Expand Down Expand Up @@ -615,7 +657,7 @@ impl<'tcx> TyCtxt<'tcx> {
/// side-effects -- e.g., in order to report errors for erroneous programs.
///
/// Note: The optimization is only available during incr. comp.
pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
if Q::EVAL_ALWAYS {
let _ = self.get_query::<Q>(DUMMY_SP, key);
return;
Expand Down Expand Up @@ -643,14 +685,30 @@ impl<'tcx> TyCtxt<'tcx> {
}

#[allow(dead_code)]
fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
self,
key: Q::Key,
span: Span,
dep_node: DepNode,
) {
// We may be concurrently trying both execute and force a query.
// Ensure that only one of them runs the query.
let job = match JobOwner::try_get(self, span, &key) {
TryGetJob::NotYetStarted(job) => job,
TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
};
self.force_query_with_job::<Q>(key, job, dep_node);

self.try_get_cached::<Q, _, _, _>(
key,
|_, _| {
// Cache hit, do nothing
},
|key, lookup| {
let job = match JobOwner::try_start(self, span, &key, lookup) {
TryGetJob::NotYetStarted(job) => job,
TryGetJob::Cycle(_) => return,
#[cfg(parallel_compiler)]
TryGetJob::JobCompleted(_) => return,
};
self.force_query_with_job::<Q>(key, job, dep_node);
},
);
}
}

Expand Down