@@ -152,9 +152,17 @@ fn main() {
152152 std:: process:: exit ( 1 ) ;
153153 } ) ;
154154
155+ // Optional test path, which is a Rust source file testing that the unicode
156+ // property lookups are correct.
157+ let test_path = std:: env:: args ( ) . nth ( 2 ) ;
158+
155159 let unicode_data = load_data ( ) ;
156160 let ranges_by_property = & unicode_data. ranges ;
157161
162+ if let Some ( path) = test_path {
163+ std:: fs:: write ( & path, generate_tests ( & write_location, & ranges_by_property) ) . unwrap ( ) ;
164+ }
165+
158166 let mut total_bytes = 0 ;
159167 let mut modules = Vec :: new ( ) ;
160168 for ( property, ranges) in ranges_by_property {
@@ -236,6 +244,99 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
236244 out
237245}
238246
247+ fn generate_tests ( data_path : & str , ranges : & [ ( & str , Vec < Range < u32 > > ) ] ) -> String {
248+ let mut s = String :: new ( ) ;
249+ s. push_str ( "#![allow(incomplete_features, unused)]\n " ) ;
250+ s. push_str ( "#![feature(const_generics)]\n \n " ) ;
251+ s. push_str ( & format ! ( "#[path = \" {}\" ]\n " , data_path) ) ;
252+ s. push_str ( "mod unicode_data;\n \n " ) ;
253+
254+ s. push_str (
255+ "
256+ #[inline(always)]
257+ fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const N2: usize>(
258+ needle: u32,
259+ chunk_idx_map: &[u8; N],
260+ (last_chunk_idx, last_chunk_mapping): (u16, u8),
261+ bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
262+ bitset: &[u64; N2],
263+ ) -> bool {
264+ let bucket_idx = (needle / 64) as usize;
265+ let chunk_map_idx = bucket_idx / CHUNK_SIZE;
266+ let chunk_piece = bucket_idx % CHUNK_SIZE;
267+ let chunk_idx = if chunk_map_idx >= N {
268+ if chunk_map_idx == last_chunk_idx as usize {
269+ last_chunk_mapping
270+ } else {
271+ return false;
272+ }
273+ } else {
274+ chunk_idx_map[chunk_map_idx]
275+ };
276+ let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
277+ let word = bitset[(idx as usize)];
278+ (word & (1 << (needle % 64) as u64)) != 0
279+ }
280+ " ,
281+ ) ;
282+
283+ s. push_str ( "\n fn main() {\n " ) ;
284+
285+ for ( property, ranges) in ranges {
286+ s. push_str ( & format ! ( r#" println!("Testing {}");"# , property) ) ;
287+ s. push ( '\n' ) ;
288+ s. push_str ( & format ! ( " {}();\n " , property. to_lowercase( ) ) ) ;
289+ let mut is_true = Vec :: new ( ) ;
290+ let mut is_false = Vec :: new ( ) ;
291+ for ch_num in 0 ..( std:: char:: MAX as u32 ) {
292+ if std:: char:: from_u32 ( ch_num) . is_none ( ) {
293+ continue ;
294+ }
295+ if ranges. iter ( ) . any ( |r| r. contains ( & ch_num) ) {
296+ is_true. push ( ch_num) ;
297+ } else {
298+ is_false. push ( ch_num) ;
299+ }
300+ }
301+
302+ s. push_str ( & format ! ( " fn {}() {{\n " , property. to_lowercase( ) ) ) ;
303+ generate_asserts ( & mut s, property, & is_true, true ) ;
304+ generate_asserts ( & mut s, property, & is_false, false ) ;
305+ s. push_str ( " }\n \n " ) ;
306+ }
307+
308+ s. push_str ( "}" ) ;
309+ s
310+ }
311+
312+ fn generate_asserts ( s : & mut String , property : & str , points : & [ u32 ] , truthy : bool ) {
313+ for range in ranges_from_set ( points) {
314+ if range. end == range. start + 1 {
315+ s. push_str ( & format ! (
316+ " assert!({}unicode_data::{}::lookup(std::char::from_u32({}).unwrap()), \" {}\" );\n " ,
317+ if truthy { "" } else { "!" } ,
318+ property. to_lowercase( ) ,
319+ range. start,
320+ std:: char :: from_u32( range. start) . unwrap( ) ,
321+ ) ) ;
322+ } else {
323+ s. push_str ( & format ! ( " for chn in {:?}u32 {{\n " , range) ) ;
324+ s. push_str ( & format ! (
325+ " assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \" {{:?}}\" , chn);\n " ,
326+ if truthy { "" } else { "!" } ,
327+ property. to_lowercase( ) ,
328+ ) ) ;
329+ s. push_str ( " }\n " ) ;
330+ }
331+ }
332+ }
333+
334+ fn ranges_from_set ( set : & [ u32 ] ) -> Vec < Range < u32 > > {
335+ let mut ranges = set. iter ( ) . map ( |e| ( * e) ..( * e + 1 ) ) . collect :: < Vec < Range < u32 > > > ( ) ;
336+ merge_ranges ( & mut ranges) ;
337+ ranges
338+ }
339+
239340fn merge_ranges ( ranges : & mut Vec < Range < u32 > > ) {
240341 loop {
241342 let mut new_ranges = Vec :: new ( ) ;
0 commit comments