@@ -207,26 +207,23 @@ fn load_data() -> UnicodeData {
207207}
208208
209209fn main ( ) {
210- let write_location = std:: env:: args ( ) . nth ( 1 ) . unwrap_or_else ( || {
211- eprintln ! ( "Must provide path to write unicode tables to" ) ;
210+ let args = std:: env:: args ( ) . collect :: < Vec < _ > > ( ) ;
211+
212+ if args. len ( ) != 3 {
213+ eprintln ! ( "Must provide paths to write unicode tables and tests to" ) ;
212214 eprintln ! (
213- "e.g. {} library/core/src/unicode/unicode_data.rs" ,
214- std :: env :: args( ) . next ( ) . unwrap_or_default ( )
215+ "e.g. {} library/core/src/unicode/unicode_data.rs library/coretests/tests/char/unicode_data.rs " ,
216+ args[ 0 ]
215217 ) ;
216218 std:: process:: exit ( 1 ) ;
217- } ) ;
219+ }
218220
219- // Optional test path, which is a Rust source file testing that the unicode
220- // property lookups are correct.
221- let test_path = std:: env:: args ( ) . nth ( 2 ) ;
221+ let data_path = & args[ 1 ] ;
222+ let test_path = & args[ 2 ] ;
222223
223224 let unicode_data = load_data ( ) ;
224225 let ranges_by_property = & unicode_data. ranges ;
225226
226- if let Some ( path) = test_path {
227- std:: fs:: write ( & path, generate_tests ( & unicode_data) ) . unwrap ( ) ;
228- }
229-
230227 let mut table_file = String :: new ( ) ;
231228 writeln ! (
232229 table_file,
@@ -279,8 +276,12 @@ fn main() {
279276 writeln ! ( table_file, "}}\n " ) ;
280277 }
281278
282- std:: fs:: write ( & write_location, table_file) . unwrap ( ) ;
283- rustfmt ( & write_location) ;
279+ let test_file = generate_tests ( & unicode_data) ;
280+
281+ std:: fs:: write ( & test_path, test_file) . unwrap ( ) ;
282+ std:: fs:: write ( & data_path, table_file) . unwrap ( ) ;
283+ rustfmt ( & data_path) ;
284+ rustfmt ( & test_path) ;
284285}
285286
286287fn rustfmt ( path : & str ) {
@@ -303,79 +304,134 @@ fn version() -> String {
303304}
304305
305306fn generate_tests ( data : & UnicodeData ) -> String {
306- let mut s = format ! (
307- "#![feature(core_intrinsics)]
308- #![allow(internal_features, dead_code)]
309- // ignore-tidy-filelength
310- mod rt;
311- mod unicode_data;
312- fn main() {{"
307+ let mut s = String :: new ( ) ;
308+ writeln ! (
309+ s,
310+ "//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!\n "
313311 ) ;
312+ writeln ! ( s, "use std::ops::RangeInclusive;\n " ) ;
313+ writeln ! ( s, "use core::unicode::unicode_data;\n " ) ;
314314 for ( property, ranges) in & data. ranges {
315- let prop = property. to_lowercase ( ) ;
315+ let prop_lower = property. to_lowercase ( ) ;
316+ let prop_upper = property. to_uppercase ( ) ;
316317 let ( is_true, is_false) : ( Vec < _ > , Vec < _ > ) = ( char:: MIN ..=char:: MAX )
317318 . filter ( |c| !c. is_ascii ( ) )
318319 . map ( u32:: from)
319320 . partition ( |c| ranges. iter ( ) . any ( |r| r. contains ( c) ) ) ;
320321
322+ let is_true = ranges_from_set ( & is_true) ;
323+ let is_false = ranges_from_set ( & is_false) ;
324+
325+ let is_true = is_true
326+ . iter ( )
327+ . map ( |r| {
328+ let start = char:: from_u32 ( r. start ) . unwrap ( ) ;
329+ let end = char:: from_u32 ( r. end - 1 ) . unwrap ( ) ;
330+ start..=end
331+ } )
332+ . collect :: < Vec < _ > > ( ) ;
333+ let is_false = is_false
334+ . iter ( )
335+ . map ( |r| {
336+ let start = char:: from_u32 ( r. start ) . unwrap ( ) ;
337+ let end = char:: from_u32 ( r. end - 1 ) . unwrap ( ) ;
338+ start..=end
339+ } )
340+ . collect :: < Vec < _ > > ( ) ;
341+
321342 writeln ! (
322343 s,
323- "println!(\" Testing {prop}\" );
324- {prop}_true();
325- {prop}_false();
326- fn {prop}_true() {{\n {}\n }}
327- fn {prop}_false() {{\n {}\n }}" ,
328- generate_asserts( & prop, & is_true, true ) ,
329- generate_asserts( & prop, & is_false, false )
344+ "
345+ #[test]
346+ #[cfg_attr(miri, ignore)]
347+ fn {prop_lower}_true() {{
348+ for range in {prop_upper}_TRUE {{
349+ for c in range.clone() {{
350+ assert!(unicode_data::{prop_lower}::lookup(c), \" {{c:?}}\" );
351+ }}
352+ }}
353+ }}
354+ #[rustfmt::skip]
355+ static {prop_upper}_TRUE: &[RangeInclusive<char>; {is_true_len}] = &[{is_true}];
356+
357+ #[test]
358+ #[cfg_attr(miri, ignore)]
359+ fn {prop_lower}_false() {{
360+ for range in {prop_upper}_FALSE {{
361+ for c in range.clone() {{
362+ assert!(!unicode_data::{prop_lower}::lookup(c), \" {{c:?}}\" );
363+ }}
364+ }}
365+ }}
366+ #[rustfmt::skip]
367+ static {prop_upper}_FALSE: &[RangeInclusive<char>; {is_false_len}] = &[{is_false}];
368+ " ,
369+ is_true_len = is_true. len( ) ,
370+ is_false_len = is_false. len( ) ,
371+ is_true = fmt_list( is_true) ,
372+ is_false = fmt_list( is_false) ,
330373 ) ;
331374 }
332375
333- for ( name, conversion) in [ "to_lower" , "to_upper" ] . iter ( ) . zip ( [ & data. to_lower , & data. to_upper ] )
376+ for ( prop_lower, conversion) in
377+ [ "to_lower" , "to_upper" ] . iter ( ) . zip ( [ & data. to_lower , & data. to_upper ] )
334378 {
335- writeln ! ( s, r#"println!("Testing {name}");"# ) ;
336- for ( c, mapping) in conversion {
337- let c = char:: from_u32 ( * c) . unwrap ( ) ;
338- let mapping = mapping. map ( |c| char:: from_u32 ( c) . unwrap ( ) ) ;
339- writeln ! ( s, "assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});" ) ;
340- }
379+ let prop_upper = prop_lower. to_uppercase ( ) ;
380+
381+ let mapped = conversion
382+ . iter ( )
383+ . map ( |( c, chars) | {
384+ ( char:: from_u32 ( * c) . unwrap ( ) , chars. map ( |c| char:: from_u32 ( c) . unwrap ( ) ) )
385+ } )
386+ . collect :: < Vec < _ > > ( ) ;
387+
341388 let unmapped: Vec < _ > = ( char:: MIN ..=char:: MAX )
342389 . filter ( |c| !c. is_ascii ( ) )
343390 . map ( u32:: from)
344391 . filter ( |c| !conversion. contains_key ( c) )
345392 . collect ( ) ;
346- let unmapped_ranges = ranges_from_set ( & unmapped) ;
347- for range in unmapped_ranges {
348- let start = char:: from_u32 ( range. start ) . unwrap ( ) ;
349- let end = char:: from_u32 ( range. end - 1 ) . unwrap ( ) ;
350- writeln ! (
351- s,
352- r#"for c in {start:?}..={end:?} {{
353- assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);
354- }}"#
355- ) ;
356- }
357- }
358-
359- writeln ! ( s, "}}" ) ;
360- s
361- }
393+ let unmapped = ranges_from_set ( & unmapped) ;
394+ let unmapped = unmapped
395+ . iter ( )
396+ . map ( |r| {
397+ let start = char:: from_u32 ( r. start ) . unwrap ( ) ;
398+ let end = char:: from_u32 ( r. end - 1 ) . unwrap ( ) ;
399+ start..=end
400+ } )
401+ . collect :: < Vec < _ > > ( ) ;
362402
363- fn generate_asserts ( prop : & str , points : & [ u32 ] , truthy : bool ) -> String {
364- let mut s = String :: new ( ) ;
365- let truthy = if truthy { "" } else { "!" } ;
366- for range in ranges_from_set ( points) {
367- let start = char:: from_u32 ( range. start ) . unwrap ( ) ;
368- let end = char:: from_u32 ( range. end - 1 ) . unwrap ( ) ;
369- match range. len ( ) {
370- 1 => writeln ! ( s, "assert!({truthy}unicode_data::{prop}::lookup({start:?}));" ) ,
371- _ => writeln ! (
372- s,
373- "for c in {start:?}..={end:?} {{
374- assert!({truthy}unicode_data::{prop}::lookup(c));
375- }}"
376- ) ,
377- }
403+ writeln ! (
404+ s,
405+ r#"
406+ #[test]
407+ #[cfg_attr(miri, ignore)]
408+ fn {prop_lower}_mapped() {{
409+ for (c, chars) in {prop_upper}_MAPPED {{
410+ assert_eq!(unicode_data::conversions::{prop_lower}(*c), *chars, "{{c:?}}");
411+ }}
412+ }}
413+ #[rustfmt::skip]
414+ static {prop_upper}_MAPPED: &[(char, [char; 3]); {mapped_len}] = &[{mapped}];
415+
416+ #[test]
417+ #[cfg_attr(miri, ignore)]
418+ fn {prop_lower}_unmapped() {{
419+ for range in {prop_upper}_UNMAPPED {{
420+ for c in range.clone() {{
421+ assert_eq!(unicode_data::conversions::{prop_lower}(c), [c, '\0', '\0'], "{{c:?}}");
422+ }}
423+ }}
424+ }}
425+ #[rustfmt::skip]
426+ static {prop_upper}_UNMAPPED: &[RangeInclusive<char>; {unmapped_len}] = &[{unmapped}];
427+ "# ,
428+ mapped_len = mapped. len( ) ,
429+ unmapped_len = unmapped. len( ) ,
430+ mapped = fmt_list( mapped) ,
431+ unmapped = fmt_list( unmapped) ,
432+ ) ;
378433 }
434+
379435 s
380436}
381437
0 commit comments