@@ -152,9 +152,17 @@ fn main() {
152
152
std:: process:: exit ( 1 ) ;
153
153
} ) ;
154
154
155
+ // Optional test path, which is a Rust source file testing that the unicode
156
+ // property lookups are correct.
157
+ let test_path = std:: env:: args ( ) . nth ( 2 ) ;
158
+
155
159
let unicode_data = load_data ( ) ;
156
160
let ranges_by_property = & unicode_data. ranges ;
157
161
162
+ if let Some ( path) = test_path {
163
+ std:: fs:: write ( & path, generate_tests ( & write_location, & ranges_by_property) ) . unwrap ( ) ;
164
+ }
165
+
158
166
let mut total_bytes = 0 ;
159
167
let mut modules = Vec :: new ( ) ;
160
168
for ( property, ranges) in ranges_by_property {
@@ -236,6 +244,99 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
236
244
out
237
245
}
238
246
247
+ fn generate_tests ( data_path : & str , ranges : & [ ( & str , Vec < Range < u32 > > ) ] ) -> String {
248
+ let mut s = String :: new ( ) ;
249
+ s. push_str ( "#![allow(incomplete_features, unused)]\n " ) ;
250
+ s. push_str ( "#![feature(const_generics)]\n \n " ) ;
251
+ s. push_str ( & format ! ( "#[path = \" {}\" ]\n " , data_path) ) ;
252
+ s. push_str ( "mod unicode_data;\n \n " ) ;
253
+
254
+ s. push_str (
255
+ "
256
+ #[inline(always)]
257
+ fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const N2: usize>(
258
+ needle: u32,
259
+ chunk_idx_map: &[u8; N],
260
+ (last_chunk_idx, last_chunk_mapping): (u16, u8),
261
+ bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
262
+ bitset: &[u64; N2],
263
+ ) -> bool {
264
+ let bucket_idx = (needle / 64) as usize;
265
+ let chunk_map_idx = bucket_idx / CHUNK_SIZE;
266
+ let chunk_piece = bucket_idx % CHUNK_SIZE;
267
+ let chunk_idx = if chunk_map_idx >= N {
268
+ if chunk_map_idx == last_chunk_idx as usize {
269
+ last_chunk_mapping
270
+ } else {
271
+ return false;
272
+ }
273
+ } else {
274
+ chunk_idx_map[chunk_map_idx]
275
+ };
276
+ let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
277
+ let word = bitset[(idx as usize)];
278
+ (word & (1 << (needle % 64) as u64)) != 0
279
+ }
280
+ " ,
281
+ ) ;
282
+
283
+ s. push_str ( "\n fn main() {\n " ) ;
284
+
285
+ for ( property, ranges) in ranges {
286
+ s. push_str ( & format ! ( r#" println!("Testing {}");"# , property) ) ;
287
+ s. push ( '\n' ) ;
288
+ s. push_str ( & format ! ( " {}();\n " , property. to_lowercase( ) ) ) ;
289
+ let mut is_true = Vec :: new ( ) ;
290
+ let mut is_false = Vec :: new ( ) ;
291
+ for ch_num in 0 ..( std:: char:: MAX as u32 ) {
292
+ if std:: char:: from_u32 ( ch_num) . is_none ( ) {
293
+ continue ;
294
+ }
295
+ if ranges. iter ( ) . any ( |r| r. contains ( & ch_num) ) {
296
+ is_true. push ( ch_num) ;
297
+ } else {
298
+ is_false. push ( ch_num) ;
299
+ }
300
+ }
301
+
302
+ s. push_str ( & format ! ( " fn {}() {{\n " , property. to_lowercase( ) ) ) ;
303
+ generate_asserts ( & mut s, property, & is_true, true ) ;
304
+ generate_asserts ( & mut s, property, & is_false, false ) ;
305
+ s. push_str ( " }\n \n " ) ;
306
+ }
307
+
308
+ s. push_str ( "}" ) ;
309
+ s
310
+ }
311
+
312
+ fn generate_asserts ( s : & mut String , property : & str , points : & [ u32 ] , truthy : bool ) {
313
+ for range in ranges_from_set ( points) {
314
+ if range. end == range. start + 1 {
315
+ s. push_str ( & format ! (
316
+ " assert!({}unicode_data::{}::lookup(std::char::from_u32({}).unwrap()), \" {}\" );\n " ,
317
+ if truthy { "" } else { "!" } ,
318
+ property. to_lowercase( ) ,
319
+ range. start,
320
+ std:: char :: from_u32( range. start) . unwrap( ) ,
321
+ ) ) ;
322
+ } else {
323
+ s. push_str ( & format ! ( " for chn in {:?}u32 {{\n " , range) ) ;
324
+ s. push_str ( & format ! (
325
+ " assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \" {{:?}}\" , chn);\n " ,
326
+ if truthy { "" } else { "!" } ,
327
+ property. to_lowercase( ) ,
328
+ ) ) ;
329
+ s. push_str ( " }\n " ) ;
330
+ }
331
+ }
332
+ }
333
+
334
+ fn ranges_from_set ( set : & [ u32 ] ) -> Vec < Range < u32 > > {
335
+ let mut ranges = set. iter ( ) . map ( |e| ( * e) ..( * e + 1 ) ) . collect :: < Vec < Range < u32 > > > ( ) ;
336
+ merge_ranges ( & mut ranges) ;
337
+ ranges
338
+ }
339
+
239
340
fn merge_ranges ( ranges : & mut Vec < Range < u32 > > ) {
240
341
loop {
241
342
let mut new_ranges = Vec :: new ( ) ;
0 commit comments