Skip to content

Commit ebe9682

Browse files
committed
Auto merge of #39020 - michaelwoerister:dep-graph-dfs-caching, r=nikomatsakis
incr.comp.: Add some caching to Predecessors construction. This speeds up the "serialize dep graph" pass for libsyntax from 45 secs to 15 secs on my machine. Still far from ideal, but things will get better when we change the metadata hashing strategy. The `CACHING_THRESHOLD` value of 60 has been arrived at experimentally. It seemed to give the best speedup. r? @nikomatsakis
2 parents 93e70ec + 482fa0f commit ebe9682

File tree

3 files changed

+317
-9
lines changed

3 files changed

+317
-9
lines changed

src/librustc_data_structures/bitvec.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,27 @@ pub struct BitVector {
1717
}
1818

1919
impl BitVector {
20+
#[inline]
2021
pub fn new(num_bits: usize) -> BitVector {
2122
let num_words = u64s(num_bits);
2223
BitVector { data: vec![0; num_words] }
2324
}
2425

26+
#[inline]
2527
pub fn clear(&mut self) {
2628
for p in &mut self.data {
2729
*p = 0;
2830
}
2931
}
3032

33+
#[inline]
3134
pub fn contains(&self, bit: usize) -> bool {
3235
let (word, mask) = word_mask(bit);
3336
(self.data[word] & mask) != 0
3437
}
3538

3639
/// Returns true if the bit has changed.
40+
#[inline]
3741
pub fn insert(&mut self, bit: usize) -> bool {
3842
let (word, mask) = word_mask(bit);
3943
let data = &mut self.data[word];
@@ -43,6 +47,7 @@ impl BitVector {
4347
new_value != value
4448
}
4549

50+
#[inline]
4651
pub fn insert_all(&mut self, all: &BitVector) -> bool {
4752
assert!(self.data.len() == all.data.len());
4853
let mut changed = false;
@@ -56,6 +61,7 @@ impl BitVector {
5661
changed
5762
}
5863

64+
#[inline]
5965
pub fn grow(&mut self, num_bits: usize) {
6066
let num_words = u64s(num_bits);
6167
if self.data.len() < num_words {
@@ -64,6 +70,7 @@ impl BitVector {
6470
}
6571

6672
/// Iterates over indexes of set bits in a sorted order
73+
#[inline]
6774
pub fn iter<'a>(&'a self) -> BitVectorIter<'a> {
6875
BitVectorIter {
6976
iter: self.data.iter(),
@@ -226,10 +233,12 @@ impl BitMatrix {
226233
}
227234
}
228235

236+
#[inline]
229237
fn u64s(elements: usize) -> usize {
230238
(elements + 63) / 64
231239
}
232240

241+
#[inline]
233242
fn word_mask(index: usize) -> (usize, u64) {
234243
let word = index / 64;
235244
let mask = 1 << (index % 64);

src/librustc_incremental/persist/preds.rs

Lines changed: 297 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
use rustc::dep_graph::{DepGraphQuery, DepNode};
1212
use rustc::hir::def_id::DefId;
1313
use rustc_data_structures::fx::FxHashMap;
14-
use rustc_data_structures::graph::{DepthFirstTraversal, INCOMING, NodeIndex};
14+
use rustc_data_structures::bitvec::BitVector;
15+
use rustc_data_structures::graph::{NodeIndex, Graph};
1516

1617
use super::hash::*;
1718
use ich::Fingerprint;
@@ -33,11 +34,21 @@ pub struct Predecessors<'query> {
3334
impl<'q> Predecessors<'q> {
3435
pub fn new(query: &'q DepGraphQuery<DefId>, hcx: &mut HashContext) -> Self {
3536
// Find nodes for which we want to know the full set of preds
36-
let mut dfs = DepthFirstTraversal::new(&query.graph, INCOMING);
37-
let all_nodes = query.graph.all_nodes();
3837
let tcx = hcx.tcx;
38+
let node_count = query.graph.len_nodes();
3939

40-
let inputs: FxHashMap<_, _> = all_nodes.iter()
40+
// Set up some data structures the cache predecessor search needs:
41+
let mut visit_counts: Vec<u32> = Vec::new();
42+
let mut node_cache: Vec<Option<Box<[u32]>>> = Vec::new();
43+
visit_counts.resize(node_count, 0);
44+
node_cache.resize(node_count, None);
45+
let mut dfs_workspace1 = DfsWorkspace::new(node_count);
46+
let mut dfs_workspace2 = DfsWorkspace::new(node_count);
47+
48+
let inputs: FxHashMap<_, _> = query
49+
.graph
50+
.all_nodes()
51+
.iter()
4152
.enumerate()
4253
.filter(|&(_, node)| match node.data {
4354
DepNode::WorkProduct(_) => true,
@@ -51,11 +62,18 @@ impl<'q> Predecessors<'q> {
5162
_ => false,
5263
})
5364
.map(|(node_index, node)| {
54-
dfs.reset(NodeIndex(node_index));
55-
let inputs: Vec<_> = dfs.by_ref()
56-
.map(|i| &all_nodes[i.node_id()].data)
57-
.filter(|d| HashContext::is_hashable(d))
58-
.collect();
65+
find_roots(&query.graph,
66+
node_index as u32,
67+
&mut visit_counts,
68+
&mut node_cache[..],
69+
HashContext::is_hashable,
70+
&mut dfs_workspace1,
71+
Some(&mut dfs_workspace2));
72+
73+
let inputs: Vec<_> = dfs_workspace1.output.nodes.iter().map(|&i| {
74+
query.graph.node_data(NodeIndex(i as usize))
75+
}).collect();
76+
5977
(&node.data, inputs)
6078
})
6179
.collect();
@@ -72,3 +90,273 @@ impl<'q> Predecessors<'q> {
7290
}
7391
}
7492
}
93+
94+
const CACHING_THRESHOLD: u32 = 60;
95+
96+
// Starting at `start_node`, this function finds this node's "roots", that is,
97+
// anything that is hashable, in the dep-graph. It uses a simple depth-first
98+
// search to achieve that. However, since some sub-graphs are traversed over
99+
// and over again, the function also some caching built into it: Each time it
100+
// visits a node it increases a counter for that node. If a node has been
101+
// visited more often than CACHING_THRESHOLD, the function will allocate a
102+
// cache entry in the `cache` array. This cache entry contains a flat list of
103+
// all roots reachable from the given node. The next time the node is visited,
104+
// the search can just add the contents of this array to the output instead of
105+
// recursing further.
106+
//
107+
// The function takes two `DfsWorkspace` arguments. These contains some data
108+
// structures that would be expensive to re-allocate all the time, so they are
109+
// allocated once up-front. There are two of them because building a cache entry
110+
// requires a recursive invocation of this function. Two are enough though,
111+
// since function never recurses more than once.
112+
fn find_roots<T, F>(graph: &Graph<T, ()>,
113+
start_node: u32,
114+
visit_counts: &mut [u32],
115+
cache: &mut [Option<Box<[u32]>>],
116+
is_root: F,
117+
workspace: &mut DfsWorkspace,
118+
mut sub_workspace: Option<&mut DfsWorkspace>)
119+
where F: Copy + Fn(&T) -> bool,
120+
T: ::std::fmt::Debug,
121+
{
122+
workspace.visited.clear();
123+
workspace.output.clear();
124+
workspace.stack.clear();
125+
workspace.stack.push(start_node);
126+
127+
loop {
128+
let node = match workspace.stack.pop() {
129+
Some(node) => node,
130+
None => return,
131+
};
132+
133+
if !workspace.visited.insert(node as usize) {
134+
continue
135+
}
136+
137+
if is_root(graph.node_data(NodeIndex(node as usize))) {
138+
// If this is a root, just add it to the output.
139+
workspace.output.insert(node);
140+
} else {
141+
if let Some(ref cached) = cache[node as usize] {
142+
for &n in &cached[..] {
143+
workspace.output.insert(n);
144+
}
145+
// No need to recurse further from this node
146+
continue
147+
}
148+
149+
visit_counts[node as usize] += 1;
150+
151+
// If this node has been visited often enough to be cached ...
152+
if visit_counts[node as usize] > CACHING_THRESHOLD {
153+
// ... we are actually allowed to cache something, do so:
154+
if let Some(ref mut sub_workspace) = sub_workspace {
155+
// Note that the following recursive invocation does never
156+
// write to the cache (since we pass None as sub_workspace).
157+
// This is intentional: The graph we are working with
158+
// contains cycles and this prevent us from simply building
159+
// our caches recursively on-demand.
160+
// However, we can just do a regular, non-caching DFS to
161+
// yield the set of roots and cache that.
162+
find_roots(graph,
163+
node,
164+
visit_counts,
165+
cache,
166+
is_root,
167+
sub_workspace,
168+
None);
169+
170+
for &n in &sub_workspace.output.nodes {
171+
workspace.output.insert(n);
172+
}
173+
174+
cache[node as usize] = Some(sub_workspace.output
175+
.nodes
176+
.clone()
177+
.into_boxed_slice());
178+
// No need to recurse further from this node
179+
continue
180+
}
181+
}
182+
183+
for pred in graph.predecessor_nodes(NodeIndex(node as usize)) {
184+
workspace.stack.push(pred.node_id() as u32);
185+
}
186+
}
187+
}
188+
}
189+
190+
struct DfsWorkspace {
191+
stack: Vec<u32>,
192+
visited: BitVector,
193+
output: NodeIndexSet,
194+
}
195+
196+
impl DfsWorkspace {
197+
fn new(total_node_count: usize) -> DfsWorkspace {
198+
DfsWorkspace {
199+
stack: Vec::new(),
200+
visited: BitVector::new(total_node_count),
201+
output: NodeIndexSet::new(total_node_count),
202+
}
203+
}
204+
}
205+
206+
struct NodeIndexSet {
207+
bitset: BitVector,
208+
nodes: Vec<u32>,
209+
}
210+
211+
impl NodeIndexSet {
212+
fn new(total_node_count: usize) -> NodeIndexSet {
213+
NodeIndexSet {
214+
bitset: BitVector::new(total_node_count),
215+
nodes: Vec::new(),
216+
}
217+
}
218+
219+
#[inline]
220+
fn clear(&mut self) {
221+
self.bitset.clear();
222+
self.nodes.clear();
223+
}
224+
225+
#[inline]
226+
fn insert(&mut self, node: u32) {
227+
if self.bitset.insert(node as usize) {
228+
self.nodes.push(node)
229+
}
230+
}
231+
}
232+
233+
#[test]
234+
fn test_cached_dfs_acyclic() {
235+
236+
// 0 1 2
237+
// | \ /
238+
// 3---+ |
239+
// | | |
240+
// | | |
241+
// 4 5 6
242+
// \ / \ / \
243+
// | | |
244+
// 7 8 9
245+
246+
let mut g: Graph<bool, ()> = Graph::new();
247+
g.add_node(false);
248+
g.add_node(false);
249+
g.add_node(false);
250+
g.add_node(false);
251+
g.add_node(false);
252+
g.add_node(false);
253+
g.add_node(false);
254+
g.add_node(true);
255+
g.add_node(true);
256+
g.add_node(true);
257+
258+
g.add_edge(NodeIndex(3), NodeIndex(0), ());
259+
g.add_edge(NodeIndex(4), NodeIndex(3), ());
260+
g.add_edge(NodeIndex(7), NodeIndex(4), ());
261+
g.add_edge(NodeIndex(5), NodeIndex(3), ());
262+
g.add_edge(NodeIndex(7), NodeIndex(5), ());
263+
g.add_edge(NodeIndex(8), NodeIndex(5), ());
264+
g.add_edge(NodeIndex(8), NodeIndex(6), ());
265+
g.add_edge(NodeIndex(9), NodeIndex(6), ());
266+
g.add_edge(NodeIndex(6), NodeIndex(1), ());
267+
g.add_edge(NodeIndex(6), NodeIndex(2), ());
268+
269+
let mut ws1 = DfsWorkspace::new(g.len_nodes());
270+
let mut ws2 = DfsWorkspace::new(g.len_nodes());
271+
let mut visit_counts: Vec<_> = g.all_nodes().iter().map(|_| 0u32).collect();
272+
let mut cache: Vec<Option<Box<[u32]>>> = g.all_nodes().iter().map(|_| None).collect();
273+
274+
fn is_root(x: &bool) -> bool { *x }
275+
276+
for _ in 0 .. CACHING_THRESHOLD + 1 {
277+
find_roots(&g, 5, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
278+
ws1.output.nodes.sort();
279+
assert_eq!(ws1.output.nodes, vec![7, 8]);
280+
281+
find_roots(&g, 6, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
282+
ws1.output.nodes.sort();
283+
assert_eq!(ws1.output.nodes, vec![8, 9]);
284+
285+
find_roots(&g, 0, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
286+
ws1.output.nodes.sort();
287+
assert_eq!(ws1.output.nodes, vec![7, 8]);
288+
289+
find_roots(&g, 1, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
290+
ws1.output.nodes.sort();
291+
assert_eq!(ws1.output.nodes, vec![8, 9]);
292+
293+
find_roots(&g, 2, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
294+
ws1.output.nodes.sort();
295+
assert_eq!(ws1.output.nodes, vec![8, 9]);
296+
297+
find_roots(&g, 3, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
298+
ws1.output.nodes.sort();
299+
assert_eq!(ws1.output.nodes, vec![7, 8]);
300+
301+
find_roots(&g, 4, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
302+
ws1.output.nodes.sort();
303+
assert_eq!(ws1.output.nodes, vec![7]);
304+
}
305+
}
306+
307+
#[test]
308+
fn test_cached_dfs_cyclic() {
309+
310+
// 0 1 <---- 2 3
311+
// ^ | ^ ^
312+
// | v | |
313+
// 4 ----> 5 ----> 6 ----> 7
314+
// ^ ^ ^ ^
315+
// | | | |
316+
// 8 9 10 11
317+
318+
319+
let mut g: Graph<bool, ()> = Graph::new();
320+
g.add_node(false);
321+
g.add_node(false);
322+
g.add_node(false);
323+
g.add_node(false);
324+
g.add_node(false);
325+
g.add_node(false);
326+
g.add_node(false);
327+
g.add_node(false);
328+
g.add_node(true);
329+
g.add_node(true);
330+
g.add_node(true);
331+
g.add_node(true);
332+
333+
g.add_edge(NodeIndex( 4), NodeIndex(0), ());
334+
g.add_edge(NodeIndex( 8), NodeIndex(4), ());
335+
g.add_edge(NodeIndex( 4), NodeIndex(5), ());
336+
g.add_edge(NodeIndex( 1), NodeIndex(5), ());
337+
g.add_edge(NodeIndex( 9), NodeIndex(5), ());
338+
g.add_edge(NodeIndex( 5), NodeIndex(6), ());
339+
g.add_edge(NodeIndex( 6), NodeIndex(2), ());
340+
g.add_edge(NodeIndex( 2), NodeIndex(1), ());
341+
g.add_edge(NodeIndex(10), NodeIndex(6), ());
342+
g.add_edge(NodeIndex( 6), NodeIndex(7), ());
343+
g.add_edge(NodeIndex(11), NodeIndex(7), ());
344+
g.add_edge(NodeIndex( 7), NodeIndex(3), ());
345+
346+
let mut ws1 = DfsWorkspace::new(g.len_nodes());
347+
let mut ws2 = DfsWorkspace::new(g.len_nodes());
348+
let mut visit_counts: Vec<_> = g.all_nodes().iter().map(|_| 0u32).collect();
349+
let mut cache: Vec<Option<Box<[u32]>>> = g.all_nodes().iter().map(|_| None).collect();
350+
351+
fn is_root(x: &bool) -> bool { *x }
352+
353+
for _ in 0 .. CACHING_THRESHOLD + 1 {
354+
find_roots(&g, 2, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
355+
ws1.output.nodes.sort();
356+
assert_eq!(ws1.output.nodes, vec![8, 9, 10]);
357+
358+
find_roots(&g, 3, &mut visit_counts, &mut cache[..], is_root, &mut ws1, Some(&mut ws2));
359+
ws1.output.nodes.sort();
360+
assert_eq!(ws1.output.nodes, vec![8, 9, 10, 11]);
361+
}
362+
}

0 commit comments

Comments
 (0)