From aaff05bd1c420a46c91f61f83c256b9f83605c43 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 19 Sep 2019 16:51:41 +1000 Subject: [PATCH 1/7] Reorder the state handling in `process_cycles()`. This gives a slight speed-up. --- .../obligation_forest/mod.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs index 98ae1a5832447..37f4537f4b770 100644 --- a/src/librustc_data_structures/obligation_forest/mod.rs +++ b/src/librustc_data_structures/obligation_forest/mod.rs @@ -484,13 +484,16 @@ impl ObligationForest { debug!("process_cycles()"); for (index, node) in self.nodes.iter().enumerate() { - // For rustc-benchmarks/inflate-0.1.0 this state test is extremely - // hot and the state is almost always `Pending` or `Waiting`. It's - // a win to handle the no-op cases immediately to avoid the cost of - // the function call. + // For some benchmarks this state test is extremely + // hot. It's a win to handle the no-op cases immediately to avoid + // the cost of the function call. match node.state.get() { - NodeState::Waiting | NodeState::Pending | NodeState::Done | NodeState::Error => {}, - _ => self.find_cycles_from_node(&mut stack, processor, index), + // Match arms are in order of frequency. Pending, Success and + // Waiting dominate; the others are rare. + NodeState::Pending => {}, + NodeState::Success => self.find_cycles_from_node(&mut stack, processor, index), + NodeState::Waiting | NodeState::Done | NodeState::Error => {}, + NodeState::OnDfsStack => self.find_cycles_from_node(&mut stack, processor, index), } } From acf7d4dcdba4046917c61aab141c1dec25669ce9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 18 Sep 2019 14:50:48 +1000 Subject: [PATCH 2/7] Specialize the `stalled_on` handling in `process_obligation()`. Optimizing for the common numbers of entries in `stalled_on` wins about 4% on `keccak` and `inflate`. --- src/librustc/infer/mod.rs | 4 +-- src/librustc/traits/fulfill.rs | 55 ++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/librustc/infer/mod.rs b/src/librustc/infer/mod.rs index c5712cc9941a9..eaef2198e3bcf 100644 --- a/src/librustc/infer/mod.rs +++ b/src/librustc/infer/mod.rs @@ -1600,8 +1600,8 @@ impl<'a, 'tcx> ShallowResolver<'a, 'tcx> { // `resolver.shallow_resolve_changed(ty)` is equivalent to // `resolver.shallow_resolve(ty) != ty`, but more efficient. It's always - // inlined, despite being large, because it has a single call site that is - // extremely hot. + // inlined, despite being large, because it has only two call sites that + // are extremely hot. #[inline(always)] pub fn shallow_resolve_changed(&mut self, typ: Ty<'tcx>) -> bool { match typ.sty { diff --git a/src/librustc/traits/fulfill.rs b/src/librustc/traits/fulfill.rs index 805727b6ce0d7..6c421e9df6800 100644 --- a/src/librustc/traits/fulfill.rs +++ b/src/librustc/traits/fulfill.rs @@ -256,29 +256,46 @@ impl<'a, 'b, 'tcx> ObligationProcessor for FulfillProcessor<'a, 'b, 'tcx> { &mut self, pending_obligation: &mut Self::Obligation, ) -> ProcessResult { - // If we were stalled on some unresolved variables, first check - // whether any of them have been resolved; if not, don't bother - // doing more work yet - if !pending_obligation.stalled_on.is_empty() { - let mut changed = false; - // This `for` loop was once a call to `all()`, but this lower-level - // form was a perf win. See #64545 for details. - for &ty in &pending_obligation.stalled_on { - if ShallowResolver::new(self.selcx.infcx()).shallow_resolve_changed(ty) { - changed = true; - break; - } + // If we were stalled on some unresolved variables, first check whether + // any of them have been resolved; if not, don't bother doing more work + // yet. + let change = match pending_obligation.stalled_on.len() { + // Match arms are in order of frequency, which matters because this + // code is so hot. 1 and 0 dominate; 2+ is fairly rare. + 1 => { + let ty = pending_obligation.stalled_on[0]; + ShallowResolver::new(self.selcx.infcx()).shallow_resolve_changed(ty) + } + 0 => { + // In this case we haven't changed, but wish to make a change. + true } - if !changed { - debug!("process_predicate: pending obligation {:?} still stalled on {:?}", - self.selcx.infcx() - .resolve_vars_if_possible(&pending_obligation.obligation), - pending_obligation.stalled_on); - return ProcessResult::Unchanged; + _ => { + // This `for` loop was once a call to `all()`, but this lower-level + // form was a perf win. See #64545 for details. + (|| { + for &ty in &pending_obligation.stalled_on { + if ShallowResolver::new(self.selcx.infcx()).shallow_resolve_changed(ty) { + return true; + } + } + false + })() } - pending_obligation.stalled_on = vec![]; + }; + + if !change { + debug!("process_predicate: pending obligation {:?} still stalled on {:?}", + self.selcx.infcx() + .resolve_vars_if_possible(&pending_obligation.obligation), + pending_obligation.stalled_on); + return ProcessResult::Unchanged; } + // This part of the code is much colder. + + pending_obligation.stalled_on.truncate(0); + let obligation = &mut pending_obligation.obligation; if obligation.predicate.has_infer_types() { From 3eae7f6291677fea37bcf91fd42e435803eabebf Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2019 11:23:47 +1000 Subject: [PATCH 3/7] Upgrade to ena-0.13.1 and use the new `inlined_probe_value` function. This is a big speed win for `keccak` and `inflate`. --- Cargo.lock | 4 ++-- src/librustc/infer/mod.rs | 5 +++-- src/librustc/infer/type_variable.rs | 8 +++++++- src/librustc_data_structures/Cargo.toml | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f52e9738da8f9..78de3af2631d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -910,9 +910,9 @@ dependencies = [ [[package]] name = "ena" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dc01d68e08ca384955a3aeba9217102ca1aa85b6e168639bf27739f1d749d87" +checksum = "8944dc8fa28ce4a38f778bd46bf7d923fe73eed5a439398507246c8e017e6f36" dependencies = [ "log", ] diff --git a/src/librustc/infer/mod.rs b/src/librustc/infer/mod.rs index eaef2198e3bcf..5d556485c15f3 100644 --- a/src/librustc/infer/mod.rs +++ b/src/librustc/infer/mod.rs @@ -1609,20 +1609,21 @@ impl<'a, 'tcx> ShallowResolver<'a, 'tcx> { use self::type_variable::TypeVariableValue; // See the comment in `shallow_resolve()`. - match self.infcx.type_variables.borrow_mut().probe(v) { + match self.infcx.type_variables.borrow_mut().inlined_probe(v) { TypeVariableValue::Known { value: t } => self.fold_ty(t) != typ, TypeVariableValue::Unknown { .. } => false, } } ty::Infer(ty::IntVar(v)) => { - match self.infcx.int_unification_table.borrow_mut().probe_value(v) { + match self.infcx.int_unification_table.borrow_mut().inlined_probe_value(v) { Some(v) => v.to_type(self.infcx.tcx) != typ, None => false, } } ty::Infer(ty::FloatVar(v)) => { + // Not `inlined_probe_value(v)` because this call site is colder. match self.infcx.float_unification_table.borrow_mut().probe_value(v) { Some(v) => v.to_type(self.infcx.tcx) != typ, None => false, diff --git a/src/librustc/infer/type_variable.rs b/src/librustc/infer/type_variable.rs index e30e86998a8c6..dd09e9a8f58a4 100644 --- a/src/librustc/infer/type_variable.rs +++ b/src/librustc/infer/type_variable.rs @@ -234,7 +234,13 @@ impl<'tcx> TypeVariableTable<'tcx> { /// Retrieves the type to which `vid` has been instantiated, if /// any. pub fn probe(&mut self, vid: ty::TyVid) -> TypeVariableValue<'tcx> { - self.eq_relations.probe_value(vid) + self.inlined_probe(vid) + } + + /// An always-inlined variant of `probe`, for very hot call sites. + #[inline(always)] + pub fn inlined_probe(&mut self, vid: ty::TyVid) -> TypeVariableValue<'tcx> { + self.eq_relations.inlined_probe_value(vid) } /// If `t` is a type-inference variable, and it has been diff --git a/src/librustc_data_structures/Cargo.toml b/src/librustc_data_structures/Cargo.toml index be9f79c83bb5a..ae3403cf0ce9f 100644 --- a/src/librustc_data_structures/Cargo.toml +++ b/src/librustc_data_structures/Cargo.toml @@ -10,7 +10,7 @@ path = "lib.rs" doctest = false [dependencies] -ena = "0.13" +ena = "0.13.1" indexmap = "1" log = "0.4" jobserver_crate = { version = "0.1.13", package = "jobserver" } From 8d73faf9ab471ffa927360040247deb082a65a43 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2019 11:25:16 +1000 Subject: [PATCH 4/7] Remove some unnecessary `backtrace` intermediate variables. --- src/librustc_data_structures/obligation_forest/mod.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs index 37f4537f4b770..bcd041afc055c 100644 --- a/src/librustc_data_structures/obligation_forest/mod.rs +++ b/src/librustc_data_structures/obligation_forest/mod.rs @@ -355,10 +355,9 @@ impl ObligationForest { let mut errors = vec![]; for (index, node) in self.nodes.iter().enumerate() { if let NodeState::Pending = node.state.get() { - let backtrace = self.error_at(index); errors.push(Error { error: error.clone(), - backtrace, + backtrace: self.error_at(index), }); } } @@ -439,10 +438,9 @@ impl ObligationForest { } ProcessResult::Error(err) => { stalled = false; - let backtrace = self.error_at(index); errors.push(Error { error: err, - backtrace, + backtrace: self.error_at(index), }); } } From 75fd4f754e344df63353eb530f31500455b904d8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2019 11:26:07 +1000 Subject: [PATCH 5/7] Rename a loop variable. We normally use `index` for indices to `ObligationForest::nodes`, but this is a `Nodes::dependents` index. --- .../obligation_forest/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs index bcd041afc055c..be8245e158e3d 100644 --- a/src/librustc_data_structures/obligation_forest/mod.rs +++ b/src/librustc_data_structures/obligation_forest/mod.rs @@ -698,18 +698,18 @@ impl ObligationForest { let nodes_len = node_rewrites.len(); for node in &mut self.nodes { - let mut index = 0; - while index < node.dependents.len() { - let new_index = node_rewrites[node.dependents[index]]; + let mut i = 0; + while i < node.dependents.len() { + let new_index = node_rewrites[node.dependents[i]]; if new_index >= nodes_len { - node.dependents.swap_remove(index); - if index == 0 && node.has_parent { + node.dependents.swap_remove(i); + if i == 0 && node.has_parent { // We just removed the parent. node.has_parent = false; } } else { - node.dependents[index] = new_index; - index += 1; + node.dependents[i] = new_index; + i += 1; } } } From 27c7c23840290fed94aef069b47169b26b7c3625 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2019 11:29:45 +1000 Subject: [PATCH 6/7] Rename `waiting_cache`. The name `waiting_cache` sounds like it is related to the states `NodeState::Waiting`, but it's not; the cache holds nodes of various states. This commit changes it to `active_state`. --- .../obligation_forest/mod.rs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs index be8245e158e3d..2b7c44cdac35b 100644 --- a/src/librustc_data_structures/obligation_forest/mod.rs +++ b/src/librustc_data_structures/obligation_forest/mod.rs @@ -149,7 +149,7 @@ pub struct ObligationForest { /// A cache of the nodes in `nodes`, indexed by predicate. Unfortunately, /// its contents are not guaranteed to match those of `nodes`. See the /// comments in `process_obligation` for details. - waiting_cache: FxHashMap, + active_cache: FxHashMap, /// A scratch vector reused in various operations, to avoid allocating new /// vectors. @@ -278,7 +278,7 @@ impl ObligationForest { ObligationForest { nodes: vec![], done_cache: Default::default(), - waiting_cache: Default::default(), + active_cache: Default::default(), scratch: RefCell::new(vec![]), obligation_tree_id_generator: (0..).map(ObligationTreeId), error_cache: Default::default(), @@ -303,15 +303,15 @@ impl ObligationForest { return Ok(()); } - match self.waiting_cache.entry(obligation.as_predicate().clone()) { + match self.active_cache.entry(obligation.as_predicate().clone()) { Entry::Occupied(o) => { debug!("register_obligation_at({:?}, {:?}) - duplicate of {:?}!", obligation, parent, o.get()); let node = &mut self.nodes[*o.get()]; if let Some(parent_index) = parent { - // If the node is already in `waiting_cache`, it has - // already had its chance to be marked with a parent. So if - // it's not already present, just dump `parent` into the + // If the node is already in `active_cache`, it has already + // had its chance to be marked with a parent. So if it's + // not already present, just dump `parent` into the // dependents as a non-parent. if !node.dependents.contains(&parent_index) { node.dependents.push(parent_index); @@ -405,8 +405,8 @@ impl ObligationForest { // `processor.process_obligation` can modify the predicate within // `node.obligation`, and that predicate is the key used for - // `self.waiting_cache`. This means that `self.waiting_cache` can - // get out of sync with `nodes`. It's not very common, but it does + // `self.active_cache`. This means that `self.active_cache` can get + // out of sync with `nodes`. It's not very common, but it does // happen, and code in `compress` has to allow for it. let result = match node.state.get() { NodeState::Pending => processor.process_obligation(&mut node.obligation), @@ -637,11 +637,11 @@ impl ObligationForest { } NodeState::Done => { // This lookup can fail because the contents of - // `self.waiting_cache` is not guaranteed to match those of + // `self.active_cache` is not guaranteed to match those of // `self.nodes`. See the comment in `process_obligation` // for more details. - if let Some((predicate, _)) = self.waiting_cache - .remove_entry(node.obligation.as_predicate()) + if let Some((predicate, _)) = + self.active_cache.remove_entry(node.obligation.as_predicate()) { self.done_cache.insert(predicate); } else { @@ -654,7 +654,7 @@ impl ObligationForest { // We *intentionally* remove the node from the cache at this point. Otherwise // tests must come up with a different type on every type error they // check against. - self.waiting_cache.remove(node.obligation.as_predicate()); + self.active_cache.remove(node.obligation.as_predicate()); node_rewrites[index] = nodes_len; dead_nodes += 1; self.insert_into_error_cache(index); @@ -714,9 +714,9 @@ impl ObligationForest { } } - // This updating of `self.waiting_cache` is necessary because the + // This updating of `self.active_cache` is necessary because the // removal of nodes within `compress` can fail. See above. - self.waiting_cache.retain(|_predicate, index| { + self.active_cache.retain(|_predicate, index| { let new_index = node_rewrites[*index]; if new_index >= nodes_len { false From aa10abb2119f0740aac704a78d6eebd800ddb1da Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2019 12:53:22 +1000 Subject: [PATCH 7/7] Rename a variable. Because the meaning of this `index` variable is quite different to all the other `index` variables in this file. --- src/librustc_data_structures/obligation_forest/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs index 2b7c44cdac35b..1c7109fe500e0 100644 --- a/src/librustc_data_structures/obligation_forest/mod.rs +++ b/src/librustc_data_structures/obligation_forest/mod.rs @@ -507,8 +507,8 @@ impl ObligationForest { let node = &self.nodes[index]; match node.state.get() { NodeState::OnDfsStack => { - let index = stack.iter().rposition(|&n| n == index).unwrap(); - processor.process_backedge(stack[index..].iter().map(GetObligation(&self.nodes)), + let rpos = stack.iter().rposition(|&n| n == index).unwrap(); + processor.process_backedge(stack[rpos..].iter().map(GetObligation(&self.nodes)), PhantomData); } NodeState::Success => {