Skip to content

Commit 12fc8d9

Browse files
authored
Merge pull request #100 from timClicks/inline-functions
Increase the #[inline] opportunities - 15-40% performance improvements
2 parents 87624ad + 9310f0f commit 12fc8d9

File tree

3 files changed

+38
-31
lines changed

3 files changed

+38
-31
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
2424

2525
[dev-dependencies]
2626
quickcheck = "0.7"
27-
bencher = "0.1"
27+
criterion = "0.3"
2828

2929
[[bench]]
3030
name = "graphemes"

benches/graphemes.rs

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,54 @@
1-
#[macro_use]
2-
extern crate bencher;
3-
extern crate unicode_segmentation;
1+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use unicode_segmentation;
43

5-
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
74
use std::fs;
5+
use unicode_segmentation::UnicodeSegmentation;
86

9-
fn graphemes(bench: &mut Bencher, path: &str) {
7+
fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
108
let text = fs::read_to_string(path).unwrap();
11-
bench.iter(|| {
12-
for g in UnicodeSegmentation::graphemes(&*text, true) {
13-
bencher::black_box(g);
14-
}
15-
});
169

17-
bench.bytes = text.len() as u64;
10+
c.bench_function(&format!("graphemes_{}",lang), |bench| {
11+
bench.iter(|| {
12+
for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
13+
black_box(g);
14+
}
15+
})
16+
});
1817
}
1918

20-
fn graphemes_arabic(bench: &mut Bencher) {
21-
graphemes(bench, "benches/texts/arabic.txt");
19+
fn graphemes_arabic(c: &mut Criterion) {
20+
graphemes(c, "arabic" ,"benches/texts/arabic.txt");
2221
}
2322

24-
fn graphemes_english(bench: &mut Bencher) {
25-
graphemes(bench, "benches/texts/english.txt");
23+
fn graphemes_english(c: &mut Criterion) {
24+
graphemes(c, "english" ,"benches/texts/english.txt");
2625
}
2726

28-
fn graphemes_hindi(bench: &mut Bencher) {
29-
graphemes(bench, "benches/texts/hindi.txt");
27+
fn graphemes_hindi(c: &mut Criterion) {
28+
graphemes(c, "hindi" ,"benches/texts/hindi.txt");
3029
}
3130

32-
fn graphemes_japanese(bench: &mut Bencher) {
33-
graphemes(bench, "benches/texts/japanese.txt");
31+
fn graphemes_japanese(c: &mut Criterion) {
32+
graphemes(c, "japanese" ,"benches/texts/japanese.txt");
3433
}
3534

36-
fn graphemes_korean(bench: &mut Bencher) {
37-
graphemes(bench, "benches/texts/korean.txt");
35+
fn graphemes_korean(c: &mut Criterion) {
36+
graphemes(c, "korean" ,"benches/texts/korean.txt");
3837
}
3938

40-
fn graphemes_mandarin(bench: &mut Bencher) {
41-
graphemes(bench, "benches/texts/mandarin.txt");
39+
fn graphemes_mandarin(c: &mut Criterion) {
40+
graphemes(c, "mandarin" ,"benches/texts/mandarin.txt");
4241
}
4342

44-
fn graphemes_russian(bench: &mut Bencher) {
45-
graphemes(bench, "benches/texts/russian.txt");
43+
fn graphemes_russian(c: &mut Criterion) {
44+
graphemes(c, "russian" ,"benches/texts/russian.txt");
4645
}
4746

48-
fn graphemes_source_code(bench: &mut Bencher) {
49-
graphemes(bench, "benches/texts/source_code.txt");
47+
fn graphemes_source_code(c: &mut Criterion) {
48+
graphemes(c, "source_code","benches/texts/source_code.txt");
5049
}
5150

52-
benchmark_group!(
51+
criterion_group!(
5352
benches,
5453
graphemes_arabic,
5554
graphemes_english,
@@ -61,4 +60,4 @@ benchmark_group!(
6160
graphemes_source_code,
6261
);
6362

64-
benchmark_main!(benches);
63+
criterion_main!(benches);

src/grapheme.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ enum PairResult {
228228
Emoji, // a break if preceded by emoji base and (Extend)*
229229
}
230230

231+
#[inline]
231232
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
232233
use crate::tables::grapheme::GraphemeCat::*;
233234
use self::PairResult::*;
@@ -407,6 +408,7 @@ impl GraphemeCursor {
407408
}
408409
}
409410

411+
#[inline]
410412
fn decide(&mut self, is_break: bool) {
411413
self.state = if is_break {
412414
GraphemeState::Break
@@ -415,11 +417,13 @@ impl GraphemeCursor {
415417
};
416418
}
417419

420+
#[inline]
418421
fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
419422
self.decide(is_break);
420423
Ok(is_break)
421424
}
422425

426+
#[inline]
423427
fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
424428
if self.state == GraphemeState::Break {
425429
Ok(true)
@@ -432,6 +436,7 @@ impl GraphemeCursor {
432436
}
433437
}
434438

439+
#[inline]
435440
fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
436441
use crate::tables::grapheme as gr;
437442
let mut ris_count = self.ris_count.unwrap_or(0);
@@ -452,6 +457,7 @@ impl GraphemeCursor {
452457
self.state = GraphemeState::Regional;
453458
}
454459

460+
#[inline]
455461
fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
456462
use crate::tables::grapheme as gr;
457463
let mut iter = chunk.chars().rev();
@@ -482,6 +488,7 @@ impl GraphemeCursor {
482488
self.state = GraphemeState::Emoji;
483489
}
484490

491+
#[inline]
485492
/// Determine whether the current cursor location is a grapheme cluster boundary.
486493
/// Only a part of the string need be supplied. If `chunk_start` is nonzero or
487494
/// the length of `chunk` is not equal to `len` on creation, then this method
@@ -563,6 +570,7 @@ impl GraphemeCursor {
563570
}
564571
}
565572

573+
#[inline]
566574
/// Find the next boundary after the current cursor position. Only a part of
567575
/// the string need be supplied. If the chunk is incomplete, then this
568576
/// method might return `GraphemeIncomplete::PreContext` or

0 commit comments

Comments
 (0)