Skip to content

Commit 088d7cb

Browse files
committed
Add Wikipedia benchmarks & instructions
1 parent 0de9001 commit 088d7cb

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

src/tests.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use std::iter;
1313
#[cfg(feature = "bench")]
1414
use test::{self, Bencher};
1515
#[cfg(feature = "bench")]
16-
use super::UnicodeWidthChar;
16+
use super::{UnicodeWidthChar, UnicodeWidthStr};
1717

1818
use std::prelude::v1::*;
1919

@@ -93,7 +93,23 @@ fn simple_width_match(c: char) -> Option<usize> {
9393
_ => UnicodeWidthChar::width(c)
9494
}
9595
}
96-
96+
#[cfg(all(feature = "bench", not(feature = "no_std")))]
97+
#[bench]
98+
fn enwik8(b: &mut Bencher) {
99+
// To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
100+
let data_path = "bench_data/enwik8";
101+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
102+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
103+
}
104+
#[cfg(all(feature = "bench", not(feature = "no_std")))]
105+
#[bench]
106+
fn jawiki(b: &mut Bencher) {
107+
// To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
108+
// https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
109+
let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
110+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
111+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
112+
}
97113
#[test]
98114
fn test_str() {
99115
use super::UnicodeWidthStr;

0 commit comments

Comments
 (0)