@@ -404,29 +404,163 @@ pub fn len_utf8_bytes(c: char) -> uint {
404
404
}
405
405
}
406
406
407
- # [ allow ( missing_doc ) ]
407
+ /// Useful functions for Unicode characters.
408
408
pub trait Char {
409
+ /// Returns whether the specified character is considered a Unicode
410
+ /// alphabetic code point.
409
411
fn is_alphabetic ( & self ) -> bool ;
412
+
413
+ /// Returns whether the specified character satisfies the 'XID_Start'
414
+ /// Unicode property.
415
+ ///
416
+ /// 'XID_Start' is a Unicode Derived Property specified in
417
+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
418
+ /// mostly similar to ID_Start but modified for closure under NFKx.
410
419
fn is_XID_start ( & self ) -> bool ;
420
+
421
+ /// Returns whether the specified `char` satisfies the 'XID_Continue'
422
+ /// Unicode property.
423
+ ///
424
+ /// 'XID_Continue' is a Unicode Derived Property specified in
425
+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
426
+ /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
411
427
fn is_XID_continue ( & self ) -> bool ;
428
+
429
+
430
+ /// Indicates whether a character is in lowercase.
431
+ ///
432
+ /// This is defined according to the terms of the Unicode Derived Core
433
+ /// Property `Lowercase`.
412
434
fn is_lowercase ( & self ) -> bool ;
435
+
436
+ /// Indicates whether a character is in uppercase.
437
+ ///
438
+ /// This is defined according to the terms of the Unicode Derived Core
439
+ /// Property `Uppercase`.
413
440
fn is_uppercase ( & self ) -> bool ;
441
+
442
+ /// Indicates whether a character is whitespace.
443
+ ///
444
+ /// Whitespace is defined in terms of the Unicode Property `White_Space`.
414
445
fn is_whitespace ( & self ) -> bool ;
446
+
447
+ /// Indicates whether a character is alphanumeric.
448
+ ///
449
+ /// Alphanumericness is defined in terms of the Unicode General Categories
450
+ /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
415
451
fn is_alphanumeric ( & self ) -> bool ;
452
+
453
+ /// Indicates whether a character is a control code point.
454
+ ///
455
+ /// Control code points are defined in terms of the Unicode General
456
+ /// Category `Cc`.
416
457
fn is_control ( & self ) -> bool ;
458
+
459
+ /// Indicates whether the character is numeric (Nd, Nl, or No).
417
460
fn is_digit ( & self ) -> bool ;
461
+
462
+ /// Checks if a `char` parses as a numeric digit in the given radix.
463
+ ///
464
+ /// Compared to `is_digit()`, this function only recognizes the characters
465
+ /// `0-9`, `a-z` and `A-Z`.
466
+ ///
467
+ /// # Return value
468
+ ///
469
+ /// Returns `true` if `c` is a valid digit under `radix`, and `false`
470
+ /// otherwise.
471
+ ///
472
+ /// # Failure
473
+ ///
474
+ /// Fails if given a radix > 36.
418
475
fn is_digit_radix ( & self , radix : uint ) -> bool ;
476
+
477
+ /// Converts a character to the corresponding digit.
478
+ ///
479
+ /// # Return value
480
+ ///
481
+ /// If `c` is between '0' and '9', the corresponding value between 0 and
482
+ /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns
483
+ /// none if the character does not refer to a digit in the given radix.
484
+ ///
485
+ /// # Failure
486
+ ///
487
+ /// Fails if given a radix outside the range [0..36].
419
488
fn to_digit ( & self , radix : uint ) -> Option < uint > ;
489
+
490
+ /// Converts a character to its lowercase equivalent.
491
+ ///
492
+ /// The case-folding performed is the common or simple mapping. See
493
+ /// `to_uppercase()` for references and more information.
494
+ ///
495
+ /// # Return value
496
+ ///
497
+ /// Returns the lowercase equivalent of the character, or the character
498
+ /// itself if no conversion is possible.
420
499
fn to_lowercase ( & self ) -> char ;
500
+
501
+ /// Converts a character to its uppercase equivalent.
502
+ ///
503
+ /// The case-folding performed is the common or simple mapping: it maps
504
+ /// one unicode codepoint (one character in Rust) to its uppercase
505
+ /// equivalent according to the Unicode database [1]. The additional
506
+ /// `SpecialCasing.txt` is not considered here, as it expands to multiple
507
+ /// codepoints in some cases.
508
+ ///
509
+ /// A full reference can be found here [2].
510
+ ///
511
+ /// # Return value
512
+ ///
513
+ /// Returns the uppercase equivalent of the character, or the character
514
+ /// itself if no conversion was made.
515
+ ///
516
+ /// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
517
+ ///
518
+ /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
421
519
fn to_uppercase ( & self ) -> char ;
520
+
521
+ /// Converts a number to the character representing it.
522
+ ///
523
+ /// # Return value
524
+ ///
525
+ /// Returns `Some(char)` if `num` represents one digit under `radix`,
526
+ /// using one character of `0-9` or `a-z`, or `None` if it doesn't.
527
+ ///
528
+ /// # Failure
529
+ ///
530
+ /// Fails if given a radix > 36.
422
531
fn from_digit ( num : uint , radix : uint ) -> Option < char > ;
532
+
533
+ /// Returns the hexadecimal Unicode escape of a character.
534
+ ///
535
+ /// The rules are as follows:
536
+ ///
537
+ /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
538
+ /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
539
+ /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
423
540
fn escape_unicode ( & self , f: |char|) ;
541
+
542
+ /// Returns a 'default' ASCII and C++11-like literal escape of a
543
+ /// character.
544
+ ///
545
+ /// The default is chosen with a bias toward producing literals that are
546
+ /// legal in a variety of languages, including C++11 and similar C-family
547
+ /// languages. The exact rules are:
548
+ ///
549
+ /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
550
+ /// * Single-quote, double-quote and backslash chars are backslash-
551
+ /// escaped.
552
+ /// * Any other chars in the range [0x20,0x7e] are not escaped.
553
+ /// * Any other chars are given hex unicode escapes; see `escape_unicode`.
424
554
fn escape_default ( & self , f: |char|) ;
555
+
556
+ /// Returns the amount of bytes this character would need if encoded in
557
+ /// UTF-8.
425
558
fn len_utf8_bytes ( & self ) -> uint ;
426
559
427
- /// Encodes this `char` as utf -8 into the provided byte- buffer
560
+ /// Encodes this character as UTF -8 into the provided byte buffer.
428
561
///
429
- /// The buffer must be at least 4 bytes long or a runtime failure will occur.
562
+ /// The buffer must be at least 4 bytes long or a runtime failure will
563
+ /// occur.
430
564
///
431
565
/// This will then return the number of characters written to the slice.
432
566
fn encode_utf8 ( & self , dst : & mut [ u8 ] ) -> uint ;
0 commit comments