Skip to content

Speedup int log10 branchless #88788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/core/benches/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// wasm32 does not support benches (no time).
#![cfg(not(target_arch = "wasm32"))]
#![feature(flt2dec)]
#![feature(int_log)]
#![feature(test)]

extern crate test;
Expand Down
58 changes: 58 additions & 0 deletions library/core/benches/num/int_log/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use rand::Rng;
use test::{black_box, Bencher};

macro_rules! int_log_bench {
($t:ty, $predictable:ident, $random:ident, $random_small:ident) => {
#[bench]
fn $predictable(bench: &mut Bencher) {
bench.iter(|| {
for n in 0..(<$t>::BITS / 8) {
for i in 1..=(100 as $t) {
let x = black_box(i << (n * 8));
black_box(x.log10());
}
}
});
}

#[bench]
fn $random(bench: &mut Bencher) {
let mut rng = rand::thread_rng();
/* Exponentially distributed random numbers from the whole range of the type. */
let numbers: Vec<$t> = (0..256)
.map(|_| {
let x = rng.gen::<$t>() >> rng.gen_range(0, <$t>::BITS);
if x != 0 { x } else { 1 }
})
.collect();
bench.iter(|| {
for x in &numbers {
black_box(black_box(x).log10());
}
});
}

#[bench]
fn $random_small(bench: &mut Bencher) {
let mut rng = rand::thread_rng();
/* Exponentially distributed random numbers from the range 0..256. */
let numbers: Vec<$t> = (0..256)
.map(|_| {
let x = (rng.gen::<u8>() >> rng.gen_range(0, u8::BITS)) as $t;
if x != 0 { x } else { 1 }
})
.collect();
bench.iter(|| {
for x in &numbers {
black_box(black_box(x).log10());
}
});
}
};
}

int_log_bench! {u8, u8_log10_predictable, u8_log10_random, u8_log10_random_small}
int_log_bench! {u16, u16_log10_predictable, u16_log10_random, u16_log10_random_small}
int_log_bench! {u32, u32_log10_predictable, u32_log10_random, u32_log10_random_small}
int_log_bench! {u64, u64_log10_predictable, u64_log10_random, u64_log10_random_small}
int_log_bench! {u128, u128_log10_predictable, u128_log10_random, u128_log10_random_small}
1 change: 1 addition & 0 deletions library/core/benches/num/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod dec2flt;
mod flt2dec;
mod int_log;

use std::str::FromStr;
use test::Bencher;
Expand Down
107 changes: 51 additions & 56 deletions library/core/src/num/int_log10.rs
Original file line number Diff line number Diff line change
@@ -1,76 +1,71 @@
mod unchecked {
// 0 < val <= u8::MAX
pub const fn u8(val: u8) -> u32 {
if val >= 100 {
2
} else if val >= 10 {
1
} else {
0
}
let val = val as u32;

// For better performance, avoid branches by assembling the solution
// in the bits above the low 8 bits.

// Adding c1 to val gives 10 in the top bits for val < 10, 11 for val >= 10
const C1: u32 = 0b11_00000000 - 10; // 758
// Adding c2 to val gives 01 in the top bits for val < 100, 10 for val >= 100
const C2: u32 = 0b10_00000000 - 100; // 412

// Value of top bits:
// +c1 +c2 1&2
// 0..=9 10 01 00 = 0
// 10..=99 11 01 01 = 1
// 100..=255 11 10 10 = 2
((val + C1) & (val + C2)) >> 8
}

// 0 < val <= u16::MAX
pub const fn u16(val: u16) -> u32 {
if val >= 10_000 {
4
} else if val >= 1000 {
3
} else if val >= 100 {
2
} else if val >= 10 {
1
} else {
0
}
// 0 < val < 100_000
const fn less_than_5(val: u32) -> u32 {
// Similar to u8, when adding one of these constants to val,
// we get two possible bit patterns above the low 17 bits,
// depending on whether val is below or above the threshold.
const C1: u32 = 0b011_00000000000000000 - 10; // 393206
const C2: u32 = 0b100_00000000000000000 - 100; // 524188
const C3: u32 = 0b111_00000000000000000 - 1000; // 916504
const C4: u32 = 0b100_00000000000000000 - 10000; // 514288

// Value of top bits:
// +c1 +c2 1&2 +c3 +c4 3&4 ^
// 0..=9 010 011 010 110 011 010 000 = 0
// 10..=99 011 011 011 110 011 010 001 = 1
// 100..=999 011 100 000 110 011 010 010 = 2
// 1000..=9999 011 100 000 111 011 011 011 = 3
// 10000..=99999 011 100 000 111 100 100 100 = 4
(((val + C1) & (val + C2)) ^ ((val + C3) & (val + C4))) >> 17
}

// 0 < val < 100_000_000
const fn less_than_8(mut val: u32) -> u32 {
let mut log = 0;
if val >= 10_000 {
val /= 10_000;
log += 4;
}
log + if val >= 1000 {
3
} else if val >= 100 {
2
} else if val >= 10 {
1
} else {
0
}
// 0 < val <= u16::MAX
pub const fn u16(val: u16) -> u32 {
less_than_5(val as u32)
}

// 0 < val <= u32::MAX
pub const fn u32(mut val: u32) -> u32 {
let mut log = 0;
if val >= 100_000_000 {
val /= 100_000_000;
log += 8;
}
log + less_than_8(val)
}

// 0 < val < 10_000_000_000_000_000
const fn less_than_16(mut val: u64) -> u32 {
let mut log = 0;
if val >= 100_000_000 {
val /= 100_000_000;
log += 8;
if val >= 100_000 {
val /= 100_000;
log += 5;
}
log + less_than_8(val as u32)
log + less_than_5(val)
}

// 0 < val <= u64::MAX
pub const fn u64(mut val: u64) -> u32 {
let mut log = 0;
if val >= 10_000_000_000_000_000 {
val /= 10_000_000_000_000_000;
log += 16;
if val >= 10_000_000_000 {
val /= 10_000_000_000;
log += 10;
}
if val >= 100_000 {
val /= 100_000;
log += 5;
}
log + less_than_16(val)
log + less_than_5(val as u32)
}

// 0 < val <= u128::MAX
Expand All @@ -79,13 +74,13 @@ mod unchecked {
if val >= 100_000_000_000_000_000_000_000_000_000_000 {
val /= 100_000_000_000_000_000_000_000_000_000_000;
log += 32;
return log + less_than_8(val as u32);
return log + u32(val as u32);
}
if val >= 10_000_000_000_000_000 {
val /= 10_000_000_000_000_000;
log += 16;
}
log + less_than_16(val as u64)
log + u64(val as u64)
}

// 0 < val <= i8::MAX
Expand Down
3 changes: 3 additions & 0 deletions library/core/tests/num/int_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ fn checked_log10() {
for i in 1..=u16::MAX {
assert_eq!(i.checked_log10(), Some((i as f32).log10() as u32));
}
for i in 1..=100_000u32 {
assert_eq!(i.checked_log10(), Some((i as f32).log10() as u32));
}
}

macro_rules! log10_loop {
Expand Down