Closed
Description
Consider the following benchmark:
#![feature(asm)]
extern crate test;
use test::{Bencher};
#[inline(never)]
fn gen() -> Vec<u8> {
Vec::from_elem(1024 * 65, 0)
}
#[bench]
fn position(b: &mut Bencher) {
let v = gen();
b.iter(|| {
test::black_box(v.as_slice().iter().position(|&c| c == 1));
});
}
#[bench]
fn iter(b: &mut Bencher) {
let v = gen();
b.iter(|| {
let mut res = None;
let mut i = 0u;
for &b in v.as_slice().iter() {
if b == 1 {
res = Some(i);
break;
}
i += 1;
}
test::black_box(res);
});
}
#[bench]
fn enumerate(b: &mut Bencher) {
let v = gen();
b.iter(|| {
let mut res = None;
for (i, &b) in v.as_slice().iter().enumerate() {
if b == 1 {
res = Some(i);
break;
}
}
test::black_box(res);
});
}
#[bench]
fn _range(b: &mut Bencher) {
let v = gen();
b.iter(|| {
let mut res = None;
for i in range(0, v.len()) {
if v[i] == 1 {
res = Some(i);
break;
}
}
test::black_box(res);
});
}
#[bench]
fn assembly(b: &mut Bencher) {
let v = gen();
b.iter(|| {
unsafe {
let mut start = v.as_ptr();
let end = start.offset(v.len() as int);
asm!("
dec $0
.align 16, 0x90
AGAIN:
inc $0
cmp $0, $1
je EXIT
cmpb $$1, ($0)
jne AGAIN
EXIT:
" : "+r"(start) : "r"(end));
if start < end {
test::black_box(Some(start as uint - v.as_ptr() as uint));
} else {
test::black_box(None::<u8>);
}
}
});
}
Which produces the following output:
test _range ... bench: 65200 ns/iter (+/- 1033)
test assembly ... bench: 60802 ns/iter (+/- 248)
test enumerate ... bench: 64441 ns/iter (+/- 566)
test iter ... bench: 91170 ns/iter (+/- 465)
test position ... bench: 91112 ns/iter (+/- 384)
position
is the correct abstraction for this but its code is 50% slower than the naive assembly implementation and 40% slower than enumerate
.
Metadata
Metadata
Assignees
Labels
No labels