Skip to content

Commit 235ea68

Browse files
committed
Use a custom hash set for interning
1 parent 7d576f2 commit 235ea68

File tree

4 files changed

+294
-43
lines changed

4 files changed

+294
-43
lines changed

src/librustc/ty/context.rs

Lines changed: 32 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use ty::BindingMode;
5252
use ty::CanonicalTy;
5353
use util::nodemap::{DefIdSet, ItemLocalMap};
5454
use util::nodemap::{FxHashMap, FxHashSet};
55+
use rustc_data_structures::fx::FxInterner;
5556
use rustc_data_structures::accumulate_vec::AccumulateVec;
5657
use rustc_data_structures::stable_hasher::{HashStable, hash_stable_hashmap,
5758
StableHasher, StableHasherResult,
@@ -132,7 +133,7 @@ pub struct CtxtInterners<'tcx> {
132133

133134
/// Specifically use a speedy hash algorithm for these hash sets,
134135
/// they're accessed quite often.
135-
type_: InternedSet<'tcx, TyS<'tcx>>,
136+
type_: Lock<FxInterner<Interned<'tcx, TyS<'tcx>>>>,
136137
type_list: InternedSet<'tcx, Slice<Ty<'tcx>>>,
137138
substs: InternedSet<'tcx, Substs<'tcx>>,
138139
canonical_var_infos: InternedSet<'tcx, Slice<CanonicalVarInfo>>,
@@ -173,51 +174,39 @@ impl<'gcx: 'tcx, 'tcx> CtxtInterners<'tcx> {
173174
// determine that all contents are in the global tcx.
174175
// See comments on Lift for why we can't use that.
175176
if flags.flags.intersects(ty::TypeFlags::KEEP_IN_LOCAL_TCX) {
176-
let mut interner = local.type_.borrow_mut();
177-
if let Some(&Interned(ty)) = interner.get(&st) {
178-
return ty;
179-
}
180-
181-
let ty_struct = TyS {
182-
sty: st,
183-
flags: flags.flags,
184-
outer_exclusive_binder: flags.outer_exclusive_binder,
185-
};
177+
local.type_.borrow_mut().intern(st, |st| {
178+
let ty_struct = TyS {
179+
sty: st,
180+
flags: flags.flags,
181+
outer_exclusive_binder: flags.outer_exclusive_binder,
182+
};
186183

187-
// Make sure we don't end up with inference
188-
// types/regions in the global interner
189-
if local as *const _ as usize == global as *const _ as usize {
190-
bug!("Attempted to intern `{:?}` which contains \
191-
inference types/regions in the global type context",
192-
&ty_struct);
193-
}
184+
// Make sure we don't end up with inference
185+
// types/regions in the global interner
186+
if local as *const _ as usize == global as *const _ as usize {
187+
bug!("Attempted to intern `{:?}` which contains \
188+
inference types/regions in the global type context",
189+
&ty_struct);
190+
}
194191

195-
// Don't be &mut TyS.
196-
let ty: Ty<'tcx> = local.arena.alloc(ty_struct);
197-
interner.insert(Interned(ty));
198-
ty
192+
Interned(local.arena.alloc(ty_struct))
193+
}).0
199194
} else {
200-
let mut interner = global.type_.borrow_mut();
201-
if let Some(&Interned(ty)) = interner.get(&st) {
202-
return ty;
203-
}
204-
205-
let ty_struct = TyS {
206-
sty: st,
207-
flags: flags.flags,
208-
outer_exclusive_binder: flags.outer_exclusive_binder,
209-
};
195+
global.type_.borrow_mut().intern(st, |st| {
196+
let ty_struct = TyS {
197+
sty: st,
198+
flags: flags.flags,
199+
outer_exclusive_binder: flags.outer_exclusive_binder,
200+
};
210201

211-
// This is safe because all the types the ty_struct can point to
212-
// already is in the global arena
213-
let ty_struct: TyS<'gcx> = unsafe {
214-
mem::transmute(ty_struct)
215-
};
202+
// This is safe because all the types the ty_struct can point to
203+
// already is in the global arena
204+
let ty_struct: TyS<'gcx> = unsafe {
205+
mem::transmute(ty_struct)
206+
};
216207

217-
// Don't be &mut TyS.
218-
let ty: Ty<'gcx> = global.arena.alloc(ty_struct);
219-
interner.insert(Interned(ty));
220-
ty
208+
Interned(global.arena.alloc(ty_struct))
209+
}).0
221210
}
222211
}
223212
}
@@ -1929,7 +1918,7 @@ macro_rules! sty_debug_print {
19291918
};
19301919
$(let mut $variant = total;)*
19311920

1932-
1921+
/*
19331922
for &Interned(t) in tcx.interners.type_.borrow().iter() {
19341923
let variant = match t.sty {
19351924
ty::TyBool | ty::TyChar | ty::TyInt(..) | ty::TyUint(..) |
@@ -1945,7 +1934,7 @@ macro_rules! sty_debug_print {
19451934
if region { total.region_infer += 1; variant.region_infer += 1 }
19461935
if ty { total.ty_infer += 1; variant.ty_infer += 1 }
19471936
if region && ty { total.both_infer += 1; variant.both_infer += 1 }
1948-
}
1937+
}*/
19491938
println!("Ty interner total ty region both");
19501939
$(println!(" {:18}: {uses:6} {usespc:4.1}%, \
19511940
{ty:4.1}% {region:5.1}% {both:4.1}%",

src/librustc_data_structures/fx.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
use std::collections::{HashMap, HashSet};
1212
use std::default::Default;
1313
use std::hash::Hash;
14+
use interner;
1415

1516
pub use rustc_hash::FxHashMap;
1617
pub use rustc_hash::FxHashSet;
1718
pub use rustc_hash::FxHasher;
19+
pub type FxInterner<V> = interner::Interner<V, BuildHasherDefault<FxHasher>>;
1820

1921
#[allow(non_snake_case)]
2022
pub fn FxHashMap<K: Hash + Eq, V>() -> FxHashMap<K, V> {
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
use std::marker::PhantomData;
2+
use std::hash::Hash;
3+
use std::hash::Hasher;
4+
use std::hash::BuildHasher;
5+
use std::mem::{self, size_of};
6+
use std::ptr::{Unique, NonNull};
7+
use std::alloc::{Global, Alloc};
8+
use std::collections::hash_map::RandomState;
9+
use std::borrow::Borrow;
10+
11+
const ENTRIES_PER_GROUP: usize = 5;
12+
13+
#[repr(align(64), C)]
14+
pub struct Group {
15+
hashes: [u32; ENTRIES_PER_GROUP],
16+
size: u32,
17+
values: [u64; ENTRIES_PER_GROUP],
18+
}
19+
20+
impl Group {
21+
#[inline(always)]
22+
fn search_for_empty(&self) -> Option<usize> {
23+
if self.size != ENTRIES_PER_GROUP as u32 {
24+
Some(self.size as usize)
25+
} else {
26+
None
27+
}
28+
}
29+
30+
#[inline(always)]
31+
fn search_with<K, F: FnMut(&K) -> bool>(&self, eq: &mut F, hash: u32) -> Option<(usize, bool)> {
32+
for i in 0..ENTRIES_PER_GROUP {
33+
let h = unsafe { *self.hashes.get_unchecked(i) };
34+
if h == hash && eq(unsafe { mem::transmute(self.values.get_unchecked(i)) }) {
35+
return Some((i, false))
36+
}
37+
}
38+
self.search_for_empty().map(|i| (i, true))
39+
}
40+
41+
#[inline(always)]
42+
fn set(&mut self, pos: usize, hash: u32, value: u64) {
43+
unsafe {
44+
*self.hashes.get_unchecked_mut(pos) = hash;
45+
*self.values.get_unchecked_mut(pos) = value;
46+
}
47+
}
48+
49+
#[inline(always)]
50+
fn iter<F: FnMut(u32, u64)>(&self, f: &mut F) {
51+
for i in 0..ENTRIES_PER_GROUP {
52+
unsafe {
53+
let h = *self.hashes.get_unchecked(i);
54+
if h != 0 {
55+
f(h, *self.values.get_unchecked(i))
56+
}
57+
}
58+
}
59+
}
60+
}
61+
62+
pub struct Table {
63+
group_mask: usize,
64+
size: usize,
65+
capacity: usize,
66+
groups: Unique<Group>,
67+
}
68+
69+
pub struct RawEntry {
70+
group: *mut Group,
71+
pos: usize,
72+
empty: bool
73+
}
74+
75+
impl Drop for Table {
76+
fn drop(&mut self) {
77+
if self.group_mask == 0 {
78+
return;
79+
}
80+
81+
unsafe {
82+
Global.dealloc_array(
83+
NonNull::new_unchecked(self.groups.as_ptr()),
84+
self.group_mask + 1
85+
).unwrap();
86+
}
87+
}
88+
}
89+
90+
impl Table {
91+
unsafe fn new_uninitialized(group_count: usize) -> Table {
92+
assert!(size_of::<Group>() == 64);
93+
let groups: NonNull<Group> = Global.alloc_array(group_count).unwrap();
94+
let capacity2 = group_count * ENTRIES_PER_GROUP;
95+
let capacity1 = capacity2 - 1;
96+
//let capacity = (capacity1 * 10 + 10 - 1) / 11;
97+
let capacity = (capacity1 * 10 + 10 - 1) / 13;
98+
//println!("capacity1 {} capacity {}", capacity1, capacity);
99+
assert!(capacity < capacity2);
100+
101+
for i in 0..group_count {
102+
let group = unsafe {
103+
&mut (*groups.as_ptr().offset(i as isize))
104+
};
105+
group.hashes = [0; ENTRIES_PER_GROUP];
106+
group.size = 0;
107+
}
108+
109+
Table {
110+
group_mask: group_count.wrapping_sub(1),
111+
size: 0,
112+
capacity,
113+
groups: Unique::new_unchecked(groups.as_ptr()),
114+
}
115+
}
116+
117+
fn search_for_empty(&self, hash: u64) -> RawEntry {
118+
let group_idx = hash as u32 as usize;
119+
let mask = self.group_mask;
120+
let mut group_idx = group_idx & mask;
121+
122+
loop {
123+
let group_ptr = unsafe {
124+
self.groups.as_ptr().offset(group_idx as isize)
125+
};
126+
let group = unsafe {
127+
&(*group_ptr)
128+
};
129+
match unsafe { group.search_for_empty() } {
130+
Some(pos) => return RawEntry {
131+
group: group_ptr,
132+
pos,
133+
empty: true,
134+
},
135+
None => (),
136+
}
137+
group_idx = (group_idx + 1) & mask;
138+
}
139+
}
140+
141+
fn search_with<K, F: FnMut(&K) -> bool>(&self, mut eq: F, hash: u64) -> RawEntry {
142+
let group_idx = hash as u32 as usize;
143+
let mask = self.group_mask;
144+
let mut group_idx = group_idx & mask;
145+
146+
loop {
147+
let group_ptr = unsafe {
148+
self.groups.as_ptr().offset(group_idx as isize)
149+
};
150+
let group = unsafe {
151+
&(*group_ptr)
152+
};
153+
let r = unsafe { group.search_with(&mut eq, hash as u32) } ;
154+
match r {
155+
Some((pos, empty)) => return RawEntry {
156+
group: group_ptr,
157+
pos,
158+
empty,
159+
},
160+
None => (),
161+
}
162+
group_idx = (group_idx + 1) & mask;
163+
}
164+
}
165+
166+
fn iter<F: FnMut(u32, u64)>(&self, mut f: F) {
167+
if self.group_mask == 0 {
168+
return;
169+
}
170+
for i in 0..(self.group_mask + 1) {
171+
let group = unsafe {
172+
&(*self.groups.as_ptr().offset(i as isize))
173+
};
174+
group.iter(&mut f);
175+
}
176+
}
177+
}
178+
179+
pub struct Interner<K: Eq + Hash, S = RandomState> {
180+
hash_builder: S,
181+
table: Table,
182+
marker: PhantomData<K>,
183+
}
184+
185+
impl<K: Eq + Hash, S: Default> Default for Interner<K, S> {
186+
fn default() -> Self {
187+
assert!(size_of::<K>() == 8);
188+
Interner {
189+
hash_builder: S::default(),
190+
table: Table {
191+
group_mask: 0,
192+
size: 0,
193+
capacity: 0,
194+
groups: unsafe { Unique::new_unchecked(NonNull::dangling().as_ptr()) },
195+
},
196+
marker: PhantomData,
197+
}
198+
}
199+
}
200+
201+
pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> u64
202+
where T: Hash,
203+
S: BuildHasher
204+
{
205+
let mut state = hash_state.build_hasher();
206+
t.hash(&mut state);
207+
state.finish() | (1 << 31)
208+
}
209+
210+
impl<K: Eq + Hash, S: BuildHasher> Interner<K, S> {
211+
#[inline(never)]
212+
#[cold]
213+
fn expand(&mut self) {
214+
let mut new_table = unsafe {
215+
Table::new_uninitialized((self.table.group_mask + 1) << 1)
216+
};
217+
new_table.size = self.table.size;
218+
self.table.iter(|h, v| {
219+
let spot = new_table.search_for_empty(h as u64);
220+
unsafe {
221+
(*spot.group).size += 1;
222+
(*spot.group).set(spot.pos, h, v);
223+
}
224+
});
225+
self.table = new_table;
226+
}
227+
228+
#[inline(always)]
229+
fn incr(&mut self) {
230+
if self.table.size + 1 > self.table.capacity {
231+
self.expand()
232+
}
233+
}
234+
235+
pub fn len(&self) -> usize {
236+
self.table.size
237+
}
238+
239+
#[inline]
240+
pub fn intern<Q, F: FnOnce(Q) -> K>(&mut self, value: Q, make: F) -> &K
241+
where K: Borrow<Q>,
242+
Q: Hash + Eq
243+
{
244+
self.incr();
245+
let hash = make_hash(&self.hash_builder, &value);
246+
let spot = self.table.search_with::<K, _>(|k| value.eq(k.borrow()), hash);
247+
unsafe {
248+
if spot.empty {
249+
self.table.size += 1;
250+
(*spot.group).size += 1;
251+
let key = make(value);
252+
(*spot.group).set(spot.pos, hash as u32, *(&key as *const _ as *const u64));
253+
}
254+
&*((*spot.group).values.get_unchecked(spot.pos) as *const _ as *const K)
255+
}
256+
}
257+
}

0 commit comments

Comments
 (0)