Skip to content

Commit 09cebc2

Browse files
committed
auto merge of #16999 : brson/rust/fannkuch, r=alexcrichton
From the discussion on reddit: http://www.reddit.com/r/rust/comments/2fenlg/benchmark_improvement_fannkuchredux/ This adds two variants: the primary, that uses an unsafe block, and a secondary that is completely safe. The one with the unsafe block matches clang's performance and beats gcc's.
2 parents 38eb0e5 + 7e12e67 commit 09cebc2

File tree

2 files changed

+137
-49
lines changed

2 files changed

+137
-49
lines changed

src/libcore/slice.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,12 @@ impl<'a,T> MutableSlice<'a, T> for &'a mut [T] {
806806
let mut i: uint = 0;
807807
let ln = self.len();
808808
while i < ln / 2 {
809-
self.swap(i, ln - i - 1);
809+
// Unsafe swap to avoid the bounds check in safe swap.
810+
unsafe {
811+
let pa: *mut T = self.unsafe_mut_ref(i);
812+
let pb: *mut T = self.unsafe_mut_ref(ln - i - 1);
813+
ptr::swap(pa, pb);
814+
}
810815
i += 1;
811816
}
812817
}

src/test/bench/shootout-fannkuch-redux.rs

Lines changed: 131 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -38,68 +38,151 @@
3838
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
3939
// OF THE POSSIBILITY OF SUCH DAMAGE.
4040

41-
use std::cmp::max;
41+
use std::{cmp, iter, mem};
42+
use std::sync::Future;
4243

43-
fn fact(n: uint) -> uint {
44-
range(1, n + 1).fold(1, |accu, i| accu * i)
44+
fn rotate(x: &mut [i32]) {
45+
let mut prev = x[0];
46+
for place in x.mut_iter().rev() {
47+
prev = mem::replace(place, prev)
48+
}
4549
}
4650

47-
fn fannkuch(n: uint, i: uint) -> (int, int) {
48-
let mut perm = Vec::from_fn(n, |e| ((n + e - i) % n + 1) as i32);
49-
let mut tperm = perm.clone();
50-
let mut count = Vec::from_elem(n, 0u);
51-
let mut perm_count = 0i;
52-
let mut checksum = 0;
51+
fn next_permutation(perm: &mut [i32], count: &mut [i32]) {
52+
for i in range(1, perm.len()) {
53+
rotate(perm.mut_slice_to(i + 1));
54+
let count_i = &mut count[i];
55+
if *count_i >= i as i32 {
56+
*count_i = 0;
57+
} else {
58+
*count_i += 1;
59+
break
60+
}
61+
}
62+
}
63+
64+
struct P {
65+
p: [i32, .. 16],
66+
}
67+
68+
struct Perm {
69+
cnt: [i32, .. 16],
70+
fact: [u32, .. 16],
71+
n: u32,
72+
permcount: u32,
73+
perm: P,
74+
}
75+
76+
impl Perm {
77+
fn new(n: u32) -> Perm {
78+
let mut fact = [1, .. 16];
79+
for i in range(1, n as uint + 1) {
80+
fact[i] = fact[i - 1] * i as u32;
81+
}
82+
Perm {
83+
cnt: [0, .. 16],
84+
fact: fact,
85+
n: n,
86+
permcount: 0,
87+
perm: P { p: [0, .. 16 ] }
88+
}
89+
}
90+
91+
fn get(&mut self, mut idx: i32) -> P {
92+
let mut pp = [0u8, .. 16];
93+
self.permcount = idx as u32;
94+
for (i, place) in self.perm.p.mut_iter().enumerate() {
95+
*place = i as i32 + 1;
96+
}
5397

54-
for countdown in range(1, fact(n - 1) + 1).rev() {
55-
for i in range(1, n) {
56-
let perm0 = *perm.get(0);
57-
for j in range(0, i) {
58-
*perm.get_mut(j) = *perm.get(j + 1);
98+
for i in range(1, self.n as uint).rev() {
99+
let d = idx / self.fact[i] as i32;
100+
self.cnt[i] = d;
101+
idx %= self.fact[i] as i32;
102+
for (place, val) in pp.mut_iter().zip(self.perm.p.slice_to(i + 1).iter()) {
103+
*place = (*val) as u8
59104
}
60-
*perm.get_mut(i) = perm0;
61-
62-
let count_i = count.get_mut(i);
63-
if *count_i >= i {
64-
*count_i = 0;
65-
} else {
66-
*count_i += 1;
67-
break;
105+
106+
let d = d as uint;
107+
for j in range(0, i + 1) {
108+
self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32;
68109
}
69110
}
70111

71-
tperm.clone_from(&perm);
72-
let mut flips_count = 0;
73-
loop {
74-
let k = *tperm.get(0);
75-
if k == 1 { break; }
76-
tperm.mut_slice_to(k as uint).reverse();
77-
flips_count += 1;
112+
self.perm
113+
}
114+
115+
fn count(&self) -> u32 { self.permcount }
116+
fn max(&self) -> u32 { self.fact[self.n as uint] }
117+
118+
fn next(&mut self) -> P {
119+
next_permutation(self.perm.p, self.cnt);
120+
self.permcount += 1;
121+
122+
self.perm
123+
}
124+
}
125+
126+
127+
fn reverse(tperm: &mut [i32], mut k: uint) {
128+
tperm.mut_slice_to(k).reverse()
129+
}
130+
131+
fn work(mut perm: Perm, n: uint, max: uint) -> (i32, i32) {
132+
let mut checksum = 0;
133+
let mut maxflips = 0;
134+
135+
let mut p = perm.get(n as i32);
136+
137+
while perm.count() < max as u32 {
138+
let mut flips = 0;
139+
140+
while p.p[0] != 1 {
141+
let k = p.p[0] as uint;
142+
reverse(p.p, k);
143+
flips += 1;
78144
}
79-
perm_count = max(perm_count, flips_count);
80-
checksum += if countdown & 1 == 1 {flips_count} else {-flips_count}
145+
146+
checksum += if perm.count() % 2 == 0 {flips} else {-flips};
147+
maxflips = cmp::max(maxflips, flips);
148+
149+
p = perm.next();
81150
}
82-
(checksum, perm_count)
151+
152+
(checksum, maxflips)
83153
}
84154

85-
fn main() {
86-
let n = std::os::args().as_slice()
87-
.get(1)
88-
.and_then(|arg| from_str(arg.as_slice()))
89-
.unwrap_or(2u);
90-
91-
let (tx, rx) = channel();
92-
for i in range(0, n) {
93-
let tx = tx.clone();
94-
spawn(proc() tx.send(fannkuch(n, i)));
155+
fn fannkuch(n: i32) -> (i32, i32) {
156+
let perm = Perm::new(n as u32);
157+
158+
let N = 4;
159+
let mut futures = vec![];
160+
let k = perm.max() / N;
161+
162+
for (i, j) in range(0, N).zip(iter::count(0, k)) {
163+
let max = cmp::min(j+k, perm.max());
164+
165+
futures.push(Future::spawn(proc() {
166+
work(perm, j as uint, max as uint)
167+
}))
95168
}
96-
drop(tx);
97169

98170
let mut checksum = 0;
99-
let mut perm = 0;
100-
for (cur_cks, cur_perm) in rx.iter() {
101-
checksum += cur_cks;
102-
perm = max(perm, cur_perm);
171+
let mut maxflips = 0;
172+
for fut in futures.mut_iter() {
173+
let (cs, mf) = fut.get();
174+
checksum += cs;
175+
maxflips = cmp::max(maxflips, mf);
103176
}
104-
println!("{}\nPfannkuchen({}) = {}", checksum, n, perm);
177+
(checksum, maxflips)
178+
}
179+
180+
fn main() {
181+
let n = std::os::args().as_slice()
182+
.get(1)
183+
.and_then(|arg| from_str(arg.as_slice()))
184+
.unwrap_or(2i32);
185+
186+
let (checksum, maxflips) = fannkuch(n);
187+
println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips);
105188
}

0 commit comments

Comments
 (0)