core/char/
methods.rs

1//! impl char {}
2
3use super::*;
4use crate::panic::const_panic;
5use crate::slice;
6use crate::str::from_utf8_unchecked_mut;
7use crate::ub_checks::assert_unsafe_precondition;
8use crate::unicode::{self, conversions};
9
10impl char {
11    /// The lowest valid code point a `char` can have, `'\0'`.
12    ///
13    /// Unlike integer types, `char` actually has a gap in the middle,
14    /// meaning that the range of possible `char`s is smaller than you
15    /// might expect. Ranges of `char` will automatically hop this gap
16    /// for you:
17    ///
18    /// ```
19    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
20    /// let size = (char::MIN..=char::MAX).count() as u32;
21    /// assert!(size < dist);
22    /// ```
23    ///
24    /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
25    /// all `char` values.
26    ///
27    /// [`MAX`]: char::MAX
28    ///
29    /// # Examples
30    ///
31    /// ```
32    /// # fn something_which_returns_char() -> char { 'a' }
33    /// let c: char = something_which_returns_char();
34    /// assert!(char::MIN <= c);
35    ///
36    /// let value_at_min = u32::from(char::MIN);
37    /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
38    /// ```
39    #[stable(feature = "char_min", since = "1.83.0")]
40    pub const MIN: char = '\0';
41
42    /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
43    ///
44    /// Unlike integer types, `char` actually has a gap in the middle,
45    /// meaning that the range of possible `char`s is smaller than you
46    /// might expect. Ranges of `char` will automatically hop this gap
47    /// for you:
48    ///
49    /// ```
50    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
51    /// let size = (char::MIN..=char::MAX).count() as u32;
52    /// assert!(size < dist);
53    /// ```
54    ///
55    /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
56    /// all `char` values.
57    ///
58    /// [`MIN`]: char::MIN
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// # fn something_which_returns_char() -> char { 'a' }
64    /// let c: char = something_which_returns_char();
65    /// assert!(c <= char::MAX);
66    ///
67    /// let value_at_max = u32::from(char::MAX);
68    /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
69    /// assert_eq!(char::from_u32(value_at_max + 1), None);
70    /// ```
71    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
72    pub const MAX: char = '\u{10FFFF}';
73
74    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
75    /// UTF-8 encoding.
76    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
77    pub const MAX_LEN_UTF8: usize = 4;
78
79    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
80    /// to UTF-16 encoding.
81    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
82    pub const MAX_LEN_UTF16: usize = 2;
83
84    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
85    /// decoding error.
86    ///
87    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
88    /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
89    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
90    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
91
92    /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
93    /// `char` and `str` methods are based on.
94    ///
95    /// New versions of Unicode are released regularly, and subsequently all methods
96    /// in the standard library depending on Unicode are updated. Therefore, the
97    /// behavior of some `char` and `str` methods, and the value of this constant,
98    /// change over time (within the boundaries of Unicode's [stability policies]).
99    /// This is *not* considered to be a breaking change.
100    ///
101    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
102    ///
103    /// The version numbering scheme is explained in
104    /// [Section 3.1 (Version Numbering)] of the Unicode Standard.
105    ///
106    /// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
107    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
108    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
109
110    /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
111    /// returning unpaired surrogates as `Err`s.
112    ///
113    /// # Examples
114    ///
115    /// Basic usage:
116    ///
117    /// ```
118    /// // 𝄞mus<invalid>ic<invalid>
119    /// let v = [
120    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
121    /// ];
122    ///
123    /// assert_eq!(
124    ///     char::decode_utf16(v)
125    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
126    ///         .collect::<Vec<_>>(),
127    ///     vec![
128    ///         Ok('𝄞'),
129    ///         Ok('m'), Ok('u'), Ok('s'),
130    ///         Err(0xDD1E),
131    ///         Ok('i'), Ok('c'),
132    ///         Err(0xD834)
133    ///     ]
134    /// );
135    /// ```
136    ///
137    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
138    ///
139    /// ```
140    /// // 𝄞mus<invalid>ic<invalid>
141    /// let v = [
142    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
143    /// ];
144    ///
145    /// assert_eq!(
146    ///     char::decode_utf16(v)
147    ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
148    ///        .collect::<String>(),
149    ///     "𝄞mus�ic�"
150    /// );
151    /// ```
152    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
153    #[inline]
154    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
155        super::decode::decode_utf16(iter)
156    }
157
158    /// Converts a `u32` to a `char`.
159    ///
160    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
161    /// [`as`](../std/keyword.as.html):
162    ///
163    /// ```
164    /// let c = '💯';
165    /// let i = c as u32;
166    ///
167    /// assert_eq!(128175, i);
168    /// ```
169    ///
170    /// However, the reverse is not true: not all valid [`u32`]s are valid
171    /// `char`s. `from_u32()` will return `None` if the input is not a valid value
172    /// for a `char`.
173    ///
174    /// For an unsafe version of this function which ignores these checks, see
175    /// [`from_u32_unchecked`].
176    ///
177    /// [`from_u32_unchecked`]: #method.from_u32_unchecked
178    ///
179    /// # Examples
180    ///
181    /// Basic usage:
182    ///
183    /// ```
184    /// let c = char::from_u32(0x2764);
185    ///
186    /// assert_eq!(Some('❤'), c);
187    /// ```
188    ///
189    /// Returning `None` when the input is not a valid `char`:
190    ///
191    /// ```
192    /// let c = char::from_u32(0x110000);
193    ///
194    /// assert_eq!(None, c);
195    /// ```
196    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
197    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
198    #[must_use]
199    #[inline]
200    pub const fn from_u32(i: u32) -> Option<char> {
201        super::convert::from_u32(i)
202    }
203
204    /// Converts a `u32` to a `char`, ignoring validity.
205    ///
206    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
207    /// `as`:
208    ///
209    /// ```
210    /// let c = '💯';
211    /// let i = c as u32;
212    ///
213    /// assert_eq!(128175, i);
214    /// ```
215    ///
216    /// However, the reverse is not true: not all valid [`u32`]s are valid
217    /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
218    /// `char`, possibly creating an invalid one.
219    ///
220    /// # Safety
221    ///
222    /// This function is unsafe, as it may construct invalid `char` values.
223    ///
224    /// For a safe version of this function, see the [`from_u32`] function.
225    ///
226    /// [`from_u32`]: #method.from_u32
227    ///
228    /// # Examples
229    ///
230    /// Basic usage:
231    ///
232    /// ```
233    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
234    ///
235    /// assert_eq!('❤', c);
236    /// ```
237    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
238    #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
239    #[must_use]
240    #[inline]
241    pub const unsafe fn from_u32_unchecked(i: u32) -> char {
242        // SAFETY: the safety contract must be upheld by the caller.
243        unsafe { super::convert::from_u32_unchecked(i) }
244    }
245
246    /// Converts a digit in the given radix to a `char`.
247    ///
248    /// A 'radix' here is sometimes also called a 'base'. A radix of two
249    /// indicates a binary number, a radix of ten, decimal, and a radix of
250    /// sixteen, hexadecimal, to give some common values. Arbitrary
251    /// radices are supported.
252    ///
253    /// `from_digit()` will return `None` if the input is not a digit in
254    /// the given radix.
255    ///
256    /// # Panics
257    ///
258    /// Panics if given a radix larger than 36.
259    ///
260    /// # Examples
261    ///
262    /// Basic usage:
263    ///
264    /// ```
265    /// let c = char::from_digit(4, 10);
266    ///
267    /// assert_eq!(Some('4'), c);
268    ///
269    /// // Decimal 11 is a single digit in base 16
270    /// let c = char::from_digit(11, 16);
271    ///
272    /// assert_eq!(Some('b'), c);
273    /// ```
274    ///
275    /// Returning `None` when the input is not a digit:
276    ///
277    /// ```
278    /// let c = char::from_digit(20, 10);
279    ///
280    /// assert_eq!(None, c);
281    /// ```
282    ///
283    /// Passing a large radix, causing a panic:
284    ///
285    /// ```should_panic
286    /// // this panics
287    /// let _c = char::from_digit(1, 37);
288    /// ```
289    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
290    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
291    #[must_use]
292    #[inline]
293    pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
294        super::convert::from_digit(num, radix)
295    }
296
297    /// Checks if a `char` is a digit in the given radix.
298    ///
299    /// A 'radix' here is sometimes also called a 'base'. A radix of two
300    /// indicates a binary number, a radix of ten, decimal, and a radix of
301    /// sixteen, hexadecimal, to give some common values. Arbitrary
302    /// radices are supported.
303    ///
304    /// Compared to [`is_numeric()`], this function only recognizes the characters
305    /// `0-9`, `a-z` and `A-Z`.
306    ///
307    /// 'Digit' is defined to be only the following characters:
308    ///
309    /// * `0-9`
310    /// * `a-z`
311    /// * `A-Z`
312    ///
313    /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
314    ///
315    /// [`is_numeric()`]: #method.is_numeric
316    ///
317    /// # Panics
318    ///
319    /// Panics if given a radix smaller than 2 or larger than 36.
320    ///
321    /// # Examples
322    ///
323    /// Basic usage:
324    ///
325    /// ```
326    /// assert!('1'.is_digit(10));
327    /// assert!('f'.is_digit(16));
328    /// assert!(!'f'.is_digit(10));
329    /// ```
330    ///
331    /// Passing a large radix, causing a panic:
332    ///
333    /// ```should_panic
334    /// // this panics
335    /// '1'.is_digit(37);
336    /// ```
337    ///
338    /// Passing a small radix, causing a panic:
339    ///
340    /// ```should_panic
341    /// // this panics
342    /// '1'.is_digit(1);
343    /// ```
344    #[stable(feature = "rust1", since = "1.0.0")]
345    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
346    #[inline]
347    pub const fn is_digit(self, radix: u32) -> bool {
348        self.to_digit(radix).is_some()
349    }
350
351    /// Converts a `char` to a digit in the given radix.
352    ///
353    /// A 'radix' here is sometimes also called a 'base'. A radix of two
354    /// indicates a binary number, a radix of ten, decimal, and a radix of
355    /// sixteen, hexadecimal, to give some common values. Arbitrary
356    /// radices are supported.
357    ///
358    /// 'Digit' is defined to be only the following characters:
359    ///
360    /// * `0-9`
361    /// * `a-z`
362    /// * `A-Z`
363    ///
364    /// # Errors
365    ///
366    /// Returns `None` if the `char` does not refer to a digit in the given radix.
367    ///
368    /// # Panics
369    ///
370    /// Panics if given a radix smaller than 2 or larger than 36.
371    ///
372    /// # Examples
373    ///
374    /// Basic usage:
375    ///
376    /// ```
377    /// assert_eq!('1'.to_digit(10), Some(1));
378    /// assert_eq!('f'.to_digit(16), Some(15));
379    /// ```
380    ///
381    /// Passing a non-digit results in failure:
382    ///
383    /// ```
384    /// assert_eq!('f'.to_digit(10), None);
385    /// assert_eq!('z'.to_digit(16), None);
386    /// ```
387    ///
388    /// Passing a large radix, causing a panic:
389    ///
390    /// ```should_panic
391    /// // this panics
392    /// let _ = '1'.to_digit(37);
393    /// ```
394    /// Passing a small radix, causing a panic:
395    ///
396    /// ```should_panic
397    /// // this panics
398    /// let _ = '1'.to_digit(1);
399    /// ```
400    #[stable(feature = "rust1", since = "1.0.0")]
401    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
402    #[rustc_diagnostic_item = "char_to_digit"]
403    #[must_use = "this returns the result of the operation, \
404                  without modifying the original"]
405    #[inline]
406    pub const fn to_digit(self, radix: u32) -> Option<u32> {
407        assert!(
408            radix >= 2 && radix <= 36,
409            "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
410        );
411        // check radix to remove letter handling code when radix is a known constant
412        let value = if self > '9' && radix > 10 {
413            // mask to convert ASCII letters to uppercase
414            const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
415            // Converts an ASCII letter to its corresponding integer value:
416            // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
417            //
418            // Add Overflow Safety:
419            // By applying the mask after the subtraction, the first addendum is
420            // constrained such that it never exceeds u32::MAX - 0x20.
421            ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
422        } else {
423            // convert digit to value, non-digits wrap to values > 36
424            (self as u32).wrapping_sub('0' as u32)
425        };
426        // FIXME(const-hack): once then_some is const fn, use it here
427        if value < radix { Some(value) } else { None }
428    }
429
430    /// Returns an iterator that yields the hexadecimal Unicode escape of a
431    /// character as `char`s.
432    ///
433    /// This will escape characters with the Rust syntax of the form
434    /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
435    ///
436    /// # Examples
437    ///
438    /// As an iterator:
439    ///
440    /// ```
441    /// for c in '❤'.escape_unicode() {
442    ///     print!("{c}");
443    /// }
444    /// println!();
445    /// ```
446    ///
447    /// Using `println!` directly:
448    ///
449    /// ```
450    /// println!("{}", '❤'.escape_unicode());
451    /// ```
452    ///
453    /// Both are equivalent to:
454    ///
455    /// ```
456    /// println!("\\u{{2764}}");
457    /// ```
458    ///
459    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
460    ///
461    /// ```
462    /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
463    /// ```
464    #[must_use = "this returns the escaped char as an iterator, \
465                  without modifying the original"]
466    #[stable(feature = "rust1", since = "1.0.0")]
467    #[inline]
468    pub fn escape_unicode(self) -> EscapeUnicode {
469        EscapeUnicode::new(self)
470    }
471
472    /// An extended version of `escape_debug` that optionally permits escaping
473    /// Extended Grapheme codepoints, single quotes, and double quotes. This
474    /// allows us to format characters like nonspacing marks better when they're
475    /// at the start of a string, and allows escaping single quotes in
476    /// characters, and double quotes in strings.
477    #[inline]
478    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
479        match self {
480            // Special escapes
481            '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
482            '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
483            '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
484            '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
485            '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
486            '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
487            '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
488
489            // ASCII fast path
490            '\x20'..='\x7E' => EscapeDebug::printable(self),
491
492            _ if self.is_control()
493                || self.is_private_use()
494                || self.is_whitespace()
495                || args.escape_grapheme_extender && self.is_grapheme_extender()
496                || self.is_default_ignorable()
497                || self.is_format_control()
498                || !self.is_assigned() =>
499            {
500                EscapeDebug::unicode(self)
501            }
502
503            _ => EscapeDebug::printable(self),
504        }
505    }
506
507    /// Returns an iterator that yields the literal escape code of a character
508    /// as `char`s.
509    ///
510    /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
511    /// of `str` or `char`.
512    ///
513    /// # Examples
514    ///
515    /// As an iterator:
516    ///
517    /// ```
518    /// for c in '\n'.escape_debug() {
519    ///     print!("{c}");
520    /// }
521    /// println!();
522    /// ```
523    ///
524    /// Using `println!` directly:
525    ///
526    /// ```
527    /// println!("{}", '\n'.escape_debug());
528    /// ```
529    ///
530    /// Both are equivalent to:
531    ///
532    /// ```
533    /// println!("\\n");
534    /// ```
535    ///
536    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
537    ///
538    /// ```
539    /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
540    /// ```
541    #[must_use = "this returns the escaped char as an iterator, \
542                  without modifying the original"]
543    #[stable(feature = "char_escape_debug", since = "1.20.0")]
544    #[inline]
545    pub fn escape_debug(self) -> EscapeDebug {
546        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
547    }
548
549    /// Returns an iterator that yields the literal escape code of a character
550    /// as `char`s.
551    ///
552    /// The default is chosen with a bias toward producing literals that are
553    /// legal in a variety of languages, including C++11 and similar C-family
554    /// languages. The exact rules are:
555    ///
556    /// * Tab is escaped as `\t`.
557    /// * Carriage return is escaped as `\r`.
558    /// * Line feed is escaped as `\n`.
559    /// * Single quote is escaped as `\'`.
560    /// * Double quote is escaped as `\"`.
561    /// * Backslash is escaped as `\\`.
562    /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
563    ///   inclusive is not escaped.
564    /// * All other characters are given hexadecimal Unicode escapes; see
565    ///   [`escape_unicode`].
566    ///
567    /// [`escape_unicode`]: #method.escape_unicode
568    ///
569    /// # Examples
570    ///
571    /// As an iterator:
572    ///
573    /// ```
574    /// for c in '"'.escape_default() {
575    ///     print!("{c}");
576    /// }
577    /// println!();
578    /// ```
579    ///
580    /// Using `println!` directly:
581    ///
582    /// ```
583    /// println!("{}", '"'.escape_default());
584    /// ```
585    ///
586    /// Both are equivalent to:
587    ///
588    /// ```
589    /// println!("\\\"");
590    /// ```
591    ///
592    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
593    ///
594    /// ```
595    /// assert_eq!('"'.escape_default().to_string(), "\\\"");
596    /// ```
597    #[must_use = "this returns the escaped char as an iterator, \
598                  without modifying the original"]
599    #[stable(feature = "rust1", since = "1.0.0")]
600    #[inline]
601    pub fn escape_default(self) -> EscapeDefault {
602        match self {
603            '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
604            '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
605            '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
606            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
607            '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
608            _ => EscapeDefault::unicode(self),
609        }
610    }
611
612    /// Returns the number of bytes this `char` would need if encoded in UTF-8.
613    ///
614    /// That number of bytes is always between 1 and 4, inclusive.
615    ///
616    /// # Examples
617    ///
618    /// Basic usage:
619    ///
620    /// ```
621    /// let len = 'A'.len_utf8();
622    /// assert_eq!(len, 1);
623    ///
624    /// let len = 'ß'.len_utf8();
625    /// assert_eq!(len, 2);
626    ///
627    /// let len = 'ℝ'.len_utf8();
628    /// assert_eq!(len, 3);
629    ///
630    /// let len = '💣'.len_utf8();
631    /// assert_eq!(len, 4);
632    /// ```
633    ///
634    /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
635    /// would take if each code point was represented as a `char` vs in the `&str` itself:
636    ///
637    /// ```
638    /// // as chars
639    /// let eastern = '東';
640    /// let capital = '京';
641    ///
642    /// // both can be represented as three bytes
643    /// assert_eq!(3, eastern.len_utf8());
644    /// assert_eq!(3, capital.len_utf8());
645    ///
646    /// // as a &str, these two are encoded in UTF-8
647    /// let tokyo = "東京";
648    ///
649    /// let len = eastern.len_utf8() + capital.len_utf8();
650    ///
651    /// // we can see that they take six bytes total...
652    /// assert_eq!(6, tokyo.len());
653    ///
654    /// // ... just like the &str
655    /// assert_eq!(len, tokyo.len());
656    /// ```
657    #[stable(feature = "rust1", since = "1.0.0")]
658    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
659    #[inline]
660    #[must_use]
661    pub const fn len_utf8(self) -> usize {
662        len_utf8(self as u32)
663    }
664
665    /// Returns the number of 16-bit code units this `char` would need if
666    /// encoded in UTF-16.
667    ///
668    /// That number of code units is always either 1 or 2, for unicode scalar values in
669    /// the [basic multilingual plane] or [supplementary planes] respectively.
670    ///
671    /// See the documentation for [`len_utf8()`] for more explanation of this
672    /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
673    ///
674    /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
675    /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
676    /// [`len_utf8()`]: #method.len_utf8
677    ///
678    /// # Examples
679    ///
680    /// Basic usage:
681    ///
682    /// ```
683    /// let n = 'ß'.len_utf16();
684    /// assert_eq!(n, 1);
685    ///
686    /// let len = '💣'.len_utf16();
687    /// assert_eq!(len, 2);
688    /// ```
689    #[stable(feature = "rust1", since = "1.0.0")]
690    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
691    #[inline]
692    #[must_use]
693    pub const fn len_utf16(self) -> usize {
694        len_utf16(self as u32)
695    }
696
697    /// Encodes this character as UTF-8 into the provided byte buffer,
698    /// and then returns the subslice of the buffer that contains the encoded character.
699    ///
700    /// # Panics
701    ///
702    /// Panics if the buffer is not large enough.
703    /// A buffer of length four is large enough to encode any `char`.
704    ///
705    /// # Examples
706    ///
707    /// In both of these examples, 'ß' takes two bytes to encode.
708    ///
709    /// ```
710    /// let mut b = [0; 2];
711    ///
712    /// let result = 'ß'.encode_utf8(&mut b);
713    ///
714    /// assert_eq!(result, "ß");
715    ///
716    /// assert_eq!(result.len(), 2);
717    /// ```
718    ///
719    /// A buffer that's too small:
720    ///
721    /// ```should_panic
722    /// let mut b = [0; 1];
723    ///
724    /// // this panics
725    /// 'ß'.encode_utf8(&mut b);
726    /// ```
727    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
728    #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
729    #[inline]
730    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
731        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
732        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
733    }
734
735    /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
736    /// and then returns the subslice of the buffer that contains the encoded character.
737    ///
738    /// # Panics
739    ///
740    /// Panics if the buffer is not large enough.
741    /// A buffer of length 2 is large enough to encode any `char`.
742    ///
743    /// # Examples
744    ///
745    /// In both of these examples, '𝕊' takes two `u16`s to encode.
746    ///
747    /// ```
748    /// let mut b = [0; 2];
749    ///
750    /// let result = '𝕊'.encode_utf16(&mut b);
751    ///
752    /// assert_eq!(result.len(), 2);
753    /// ```
754    ///
755    /// A buffer that's too small:
756    ///
757    /// ```should_panic
758    /// let mut b = [0; 1];
759    ///
760    /// // this panics
761    /// '𝕊'.encode_utf16(&mut b);
762    /// ```
763    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
764    #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
765    #[inline]
766    pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
767        encode_utf16_raw(self as u32, dst)
768    }
769
770    /// Returns `true` if this `char` has the `Alphabetic` property.
771    ///
772    /// `Alphabetic` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
773    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
774    ///
775    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
776    /// [specified]: https://www.unicode.org/reports/tr44/#Alphabetic
777    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
778    ///
779    /// # Examples
780    ///
781    /// Basic usage:
782    ///
783    /// ```
784    /// assert!('a'.is_alphabetic());
785    /// assert!('京'.is_alphabetic());
786    ///
787    /// let c = '💝';
788    /// // love is many things, but it is not alphabetic
789    /// assert!(!c.is_alphabetic());
790    /// ```
791    #[must_use]
792    #[stable(feature = "rust1", since = "1.0.0")]
793    #[inline]
794    pub fn is_alphabetic(self) -> bool {
795        match self {
796            'a'..='z' | 'A'..='Z' => true,
797            '\0'..='\u{A9}' => false,
798            _ => unicode::Alphabetic(self),
799        }
800    }
801
802    /// Returns `true` if this `char` has the `Cased` property.
803    /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
804    ///
805    /// `Cased` is [described] in Chapter 3 (Character Properties) of the Unicode Standard and
806    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
807    ///
808    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
809    /// [specified]: https://www.unicode.org/reports/tr44/#Cased
810    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
811    ///
812    /// # Examples
813    ///
814    /// Basic usage:
815    ///
816    /// ```
817    /// #![feature(titlecase)]
818    /// assert!('A'.is_cased());
819    /// assert!('a'.is_cased());
820    /// assert!(!'京'.is_cased());
821    /// ```
822    #[must_use]
823    #[unstable(feature = "titlecase", issue = "153892")]
824    #[inline]
825    pub fn is_cased(self) -> bool {
826        match self {
827            'a'..='z' | 'A'..='Z' => true,
828            '\0'..='\u{A9}' => false,
829            _ => unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self),
830        }
831    }
832
833    /// Returns the case of this character:
834    /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
835    /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
836    /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
837    /// `None` if [`!self.is_cased()`][`char::is_cased`].
838    ///
839    /// # Examples
840    ///
841    /// ```
842    /// #![feature(titlecase)]
843    /// use core::char::CharCase;
844    /// assert_eq!('a'.case(), Some(CharCase::Lower));
845    /// assert_eq!('δ'.case(), Some(CharCase::Lower));
846    /// assert_eq!('A'.case(), Some(CharCase::Upper));
847    /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
848    /// assert_eq!('ǅ'.case(), Some(CharCase::Title));
849    /// assert_eq!('中'.case(), None);
850    /// ```
851    #[must_use]
852    #[unstable(feature = "titlecase", issue = "153892")]
853    #[inline]
854    pub fn case(self) -> Option<CharCase> {
855        match self {
856            'a'..='z' => Some(CharCase::Lower),
857            'A'..='Z' => Some(CharCase::Upper),
858            '\0'..='\u{A9}' => None,
859            _ if unicode::Lowercase(self) => Some(CharCase::Lower),
860            _ if unicode::Uppercase(self) => Some(CharCase::Upper),
861            _ if unicode::Lt(self) => Some(CharCase::Title),
862            _ => None,
863        }
864    }
865
866    /// Returns `true` if this `char` has the `Lowercase` property.
867    ///
868    /// `Lowercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
869    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
870    ///
871    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
872    /// [specified]: https://www.unicode.org/reports/tr44/#Lowercase
873    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
874    ///
875    /// # Examples
876    ///
877    /// Basic usage:
878    ///
879    /// ```
880    /// assert!('a'.is_lowercase());
881    /// assert!('δ'.is_lowercase());
882    /// assert!(!'A'.is_lowercase());
883    /// assert!(!'Δ'.is_lowercase());
884    ///
885    /// // The various Chinese scripts and punctuation do not have case, and so:
886    /// assert!(!'中'.is_lowercase());
887    /// assert!(!' '.is_lowercase());
888    /// ```
889    ///
890    /// In a const context:
891    ///
892    /// ```
893    /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
894    /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
895    /// ```
896    #[must_use]
897    #[stable(feature = "rust1", since = "1.0.0")]
898    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
899    #[inline]
900    pub const fn is_lowercase(self) -> bool {
901        match self {
902            'a'..='z' => true,
903            '\0'..='\u{A9}' => false,
904            _ => unicode::Lowercase(self),
905        }
906    }
907
908    /// Returns `true` if this `char` is in the general category for titlecase letters.
909    /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
910    ///
911    /// Titlecase letters (code points with the general category of `Lt`) are [described] in Chapter 4
912    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
913    /// Database [`UnicodeData.txt`].
914    ///
915    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
916    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
917    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
918    ///
919    /// # Examples
920    ///
921    /// Basic usage:
922    ///
923    /// ```
924    /// #![feature(titlecase)]
925    /// assert!('ǅ'.is_titlecase());
926    /// assert!('ῼ'.is_titlecase());
927    /// assert!(!'D'.is_titlecase());
928    /// assert!(!'z'.is_titlecase());
929    /// assert!(!'中'.is_titlecase());
930    /// assert!(!' '.is_titlecase());
931    /// ```
932    #[must_use]
933    #[unstable(feature = "titlecase", issue = "153892")]
934    #[inline]
935    pub fn is_titlecase(self) -> bool {
936        match self {
937            '\0'..='\u{01C4}' => false,
938            _ => unicode::Lt(self),
939        }
940    }
941
942    /// Returns `true` if this `char` has the `Uppercase` property.
943    ///
944    /// `Uppercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
945    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
946    ///
947    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
948    /// [specified]: https://www.unicode.org/reports/tr44/#Uppercase
949    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
950    ///
951    /// # Examples
952    ///
953    /// Basic usage:
954    ///
955    /// ```
956    /// assert!(!'a'.is_uppercase());
957    /// assert!(!'δ'.is_uppercase());
958    /// assert!('A'.is_uppercase());
959    /// assert!('Δ'.is_uppercase());
960    ///
961    /// // The various Chinese scripts and punctuation do not have case, and so:
962    /// assert!(!'中'.is_uppercase());
963    /// assert!(!' '.is_uppercase());
964    /// ```
965    ///
966    /// In a const context:
967    ///
968    /// ```
969    /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
970    /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
971    /// ```
972    #[must_use]
973    #[stable(feature = "rust1", since = "1.0.0")]
974    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
975    #[inline]
976    pub const fn is_uppercase(self) -> bool {
977        match self {
978            'A'..='Z' => true,
979            '\0'..='\u{BF}' => false,
980            _ => unicode::Uppercase(self),
981        }
982    }
983
984    /// Returns `true` if this `char` has one of the general categories for numbers.
985    ///
986    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
987    /// characters, and `No` for other numeric characters) are [specified] in the Unicode Character
988    /// Database [`UnicodeData.txt`].
989    ///
990    /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
991    /// If you want everything including characters with overlapping purposes, then you might want to use
992    /// a Unicode or language-processing library that exposes the appropriate character properties
993    /// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
994    ///
995    /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
996    /// `is_ascii_digit` or `is_digit` instead.
997    ///
998    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
999    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1000    /// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
1001    ///
1002    /// # Examples
1003    ///
1004    /// Basic usage:
1005    ///
1006    /// ```
1007    /// assert!('٣'.is_numeric());
1008    /// assert!('7'.is_numeric());
1009    /// assert!('৬'.is_numeric());
1010    /// assert!('¾'.is_numeric());
1011    /// assert!('①'.is_numeric());
1012    /// assert!(!'K'.is_numeric());
1013    /// assert!(!'و'.is_numeric());
1014    /// assert!(!'藏'.is_numeric());
1015    /// assert!(!'三'.is_numeric());
1016    /// ```
1017    #[must_use]
1018    #[stable(feature = "rust1", since = "1.0.0")]
1019    #[inline]
1020    pub fn is_numeric(self) -> bool {
1021        match self {
1022            '0'..='9' => true,
1023            '\0'..='\u{B1}' => false,
1024            _ => unicode::N(self),
1025        }
1026    }
1027
1028    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1029    ///
1030    /// [`is_alphabetic()`]: Self::is_alphabetic
1031    /// [`is_numeric()`]: Self::is_numeric
1032    ///
1033    /// # Examples
1034    ///
1035    /// Basic usage:
1036    ///
1037    /// ```
1038    /// assert!('٣'.is_alphanumeric());
1039    /// assert!('7'.is_alphanumeric());
1040    /// assert!('৬'.is_alphanumeric());
1041    /// assert!('¾'.is_alphanumeric());
1042    /// assert!('①'.is_alphanumeric());
1043    /// assert!('K'.is_alphanumeric());
1044    /// assert!('و'.is_alphanumeric());
1045    /// assert!('藏'.is_alphanumeric());
1046    /// ```
1047    #[must_use]
1048    #[stable(feature = "rust1", since = "1.0.0")]
1049    #[inline]
1050    pub fn is_alphanumeric(self) -> bool {
1051        match self {
1052            'a'..='z' | 'A'..='Z' | '0'..='9' => true,
1053            '\0'..='\u{A9}' => false,
1054            _ => unicode::Alphabetic(self) || unicode::N(self),
1055        }
1056    }
1057
1058    /// Returns `true` if this `char` has the `White_Space` property.
1059    ///
1060    /// `White_Space` is [specified] in the Unicode Character Database [`PropList.txt`].
1061    ///
1062    /// [specified]: https://www.unicode.org/reports/tr44/#White_Space
1063    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
1064    ///
1065    /// # Examples
1066    ///
1067    /// Basic usage:
1068    ///
1069    /// ```
1070    /// assert!(' '.is_whitespace());
1071    ///
1072    /// // line break
1073    /// assert!('\n'.is_whitespace());
1074    ///
1075    /// // a non-breaking space
1076    /// assert!('\u{A0}'.is_whitespace());
1077    ///
1078    /// assert!(!'越'.is_whitespace());
1079    /// ```
1080    #[must_use]
1081    #[stable(feature = "rust1", since = "1.0.0")]
1082    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
1083    #[inline]
1084    pub const fn is_whitespace(self) -> bool {
1085        match self {
1086            ' ' | '\x09'..='\x0d' => true,
1087            '\0'..='\u{84}' => false,
1088            _ => unicode::White_Space(self),
1089        }
1090    }
1091
1092    /// Returns `true` if this `char` has the general category for control codes.
1093    ///
1094    /// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1095    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1096    /// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1097    /// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
1098    ///
1099    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1100    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1101    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1102    ///
1103    /// # Examples
1104    ///
1105    /// Basic usage:
1106    ///
1107    /// ```
1108    /// assert!('\t'.is_control());
1109    /// assert!('\n'.is_control());
1110    /// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
1111    /// assert!(!'q'.is_control());
1112    /// ```
1113    #[must_use]
1114    #[stable(feature = "rust1", since = "1.0.0")]
1115    #[rustc_const_stable(feature = "const_is_control", since = "1.97.0")]
1116    #[inline]
1117    pub const fn is_control(self) -> bool {
1118        // According to
1119        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1120        // the set of codepoints in `Cc` will never change.
1121        // So we can just hard-code the patterns to match against instead of using a table.
1122        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
1123    }
1124
1125    /// Returns `true` if this `char` has the general category for [private-use characters].
1126    /// These characters do not have an interpretation specified by Unicode; individual programs
1127    /// and users are free to assign them whatever meaning they like.
1128    ///
1129    /// [private-use characters]: https://www.unicode.org/faq/private_use#private_use
1130    ///
1131    /// Private-use characters (code points with the general category of `Co`) are [described] in Chapter 23
1132    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the
1133    /// Unicode Character Database [`UnicodeData.txt`]. The full set of private-use characters is
1134    /// `'\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}'`,
1135    /// and will never change.
1136    ///
1137    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G19184
1138    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1139    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1140    ///
1141    #[must_use]
1142    #[unstable(feature = "char_unassigned_private_use", issue = "158322")]
1143    #[inline]
1144    pub const fn is_private_use(self) -> bool {
1145        // According to
1146        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1147        // the set of codepoints in `Co` will never change.
1148        // So we can just hard-code the patterns to match against instead of using a table.
1149        matches!(self, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1150    }
1151
1152    /// Returns `true` if this `char` has the general category for format control characters.
1153    ///
1154    /// Format controls (code points with the general category of `Cf`) are [described] in Chapter 4
1155    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
1156    /// Database [`UnicodeData.txt`].
1157    ///
1158    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1159    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1160    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1161    ///
1162    /// # Examples
1163    ///
1164    /// Basic usage:
1165    ///
1166    /// ```ignore(private)
1167    /// assert!('\u{AD}'.is_format_control()); // SOFT HYPHEN
1168    /// assert!('\u{200B}'.is_format_control()); // ZERO WIDTH SPACE
1169    /// assert!('\u{E0041}'.is_format_control()); // TAG LATIN CAPITAL LETTER A
1170    /// assert!('۝'.is_format_control()); // ARABIC END OF AYAH
1171    /// assert!('𓐲'.is_format_control()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1172    /// assert!(!'q'.is_format_control());
1173    /// ```
1174    #[must_use]
1175    #[inline]
1176    fn is_format_control(self) -> bool {
1177        self > '\u{AC}' && unicode::Cf(self)
1178    }
1179
1180    /// Returns `true` if this `char` has been assigned a meaning by Unicode, as of
1181    /// [`UNICODE_VERSION`].
1182    ///
1183    /// [`UNICODE_VERSION`]: Self::UNICODE_VERSION
1184    ///
1185    /// Many of Unicode's [stability policies] apply only to assigned characters.
1186    ///
1187    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
1188    ///
1189    /// Currently unassigned characters (characters for which this method returns `false`)
1190    /// may have a meaning assigned in a future version of Unicode,
1191    /// except for the 66 [noncharacters] which will never be assigned a meaning.
1192    ///
1193    /// [noncharacters]: https://www.unicode.org/faq/private_use.html#noncharacters
1194    ///
1195    /// A character is considered assigned if it is present in [`UnicodeData.txt`].
1196    /// Unassigned characters have general category `Cn`, as [described] in Chapter 4
1197    /// (Character Properties) of the Unicode Standard.
1198    ///
1199    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1200    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1201    ///
1202    /// # Examples
1203    ///
1204    /// Basic usage:
1205    ///
1206    /// ```
1207    /// #![feature(char_unassigned_private_use)]
1208    /// assert!('γ'.is_assigned()); // once a character is assigned, it stays assigned forever
1209    /// assert!(!'\u{FFFE}'.is_assigned()); // noncharacter, will never be assigned
1210    ///
1211    /// // Not currently assigned, but may be in the future,
1212    /// // so we shouldn't rely on the current status
1213    /// /* assert!(!'\u{7AAAA}'.is_assigned()); */
1214    /// ```
1215    #[must_use]
1216    #[unstable(feature = "char_unassigned_private_use", issue = "158322")]
1217    #[inline]
1218    pub fn is_assigned(self) -> bool {
1219        match self {
1220            '\0'..='\u{377}' => true,
1221            '\u{378}'..='\u{3FFFD}' => !unicode::Cn_planes_0_3(self),
1222            // Assigned character ranges in planes 4 and above.
1223            // `src/tools/unicode-table-generator/src/main.rs` asserts that this is correct
1224            '\u{E0001}'
1225            | '\u{E0020}'..='\u{E007F}'
1226            | '\u{E0100}'..='\u{E01EF}'
1227            | '\u{F0000}'..='\u{FFFFD}'
1228            | '\u{100000}'..='\u{10FFFD}' => true,
1229            _ => false,
1230        }
1231    }
1232
1233    /// Returns `true` if this `char` has the `Default_Ignorable_Code_Point` property.
1234    /// These characters [should be displayed as invisible in fallback rendering](https://www.unicode.org/faq/unsup_char#3).
1235    ///
1236    /// `Default_Ignorable_Code_Point` is [described] in Chapter 5 (Implementation Guidelines) of the Unicode Standard,
1237    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1238    ///
1239    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-5/#G40120
1240    /// [specified]: https://www.unicode.org/reports/tr44/#Default_Ignorable_Code_Point
1241    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1242    ///
1243    /// # Examples
1244    ///
1245    /// Basic usage:
1246    ///
1247    /// ```ignore(private)
1248    /// assert!('\u{AD}'.is_default_ignorable()); // SOFT HYPHEN
1249    /// assert!('\u{115F}'.is_default_ignorable()); // HANGUL CHOSEONG FILLER
1250    /// assert!('\u{200B}'.is_default_ignorable()); // ZERO WIDTH SPACE
1251    /// assert!('\u{E0041}'.is_default_ignorable()); // TAG LATIN CAPITAL LETTER A
1252    /// assert!(!'۝'.is_default_ignorable()); // ARABIC END OF AYAH
1253    /// assert!(!'𓐲'.is_default_ignorable()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1254    /// assert!(!' '.is_default_ignorable());
1255    /// assert!(!'\n'.is_default_ignorable());
1256    /// assert!(!'\0'.is_default_ignorable());
1257    /// assert!(!'q'.is_default_ignorable());
1258    #[must_use]
1259    #[inline]
1260    fn is_default_ignorable(self) -> bool {
1261        self > '\u{AC}' && unicode::Default_Ignorable_Code_Point(self)
1262    }
1263
1264    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
1265    ///
1266    /// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1267    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1268    ///
1269    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1270    /// [specified]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
1271    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1272    #[must_use]
1273    #[inline]
1274    fn is_grapheme_extender(self) -> bool {
1275        self > '\u{02FF}' && unicode::Grapheme_Extend(self)
1276    }
1277
1278    /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1279    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
1280    /// which has two lowercase forms.
1281    ///
1282    /// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1283    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1284    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
1285    ///
1286    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1287    /// [specified]: https://www.unicode.org/reports/tr44/#Case_Ignorable
1288    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1289    /// [`to_lowercase()`]: Self::to_lowercase()
1290    #[must_use]
1291    #[inline]
1292    #[unstable(feature = "case_ignorable", issue = "154848")]
1293    pub fn is_case_ignorable(self) -> bool {
1294        if self.is_ascii() {
1295            matches!(self, '\'' | '.' | ':' | '^' | '`')
1296        } else {
1297            unicode::Case_Ignorable(self)
1298        }
1299    }
1300
1301    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1302    /// `char`s.
1303    ///
1304    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1305    ///
1306    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1307    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1308    ///
1309    /// [ucd]: https://www.unicode.org/reports/tr44/
1310    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1311    ///
1312    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1313    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1314    ///
1315    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1316    /// is independent of context and language. See [below](#notes-on-context-and-locale)
1317    /// for more information.
1318    ///
1319    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1320    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1321    ///
1322    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1323    ///
1324    /// # Examples
1325    ///
1326    /// As an iterator:
1327    ///
1328    /// ```
1329    /// for c in 'İ'.to_lowercase() {
1330    ///     print!("{c}");
1331    /// }
1332    /// println!();
1333    /// ```
1334    ///
1335    /// Using `println!` directly:
1336    ///
1337    /// ```
1338    /// println!("{}", 'İ'.to_lowercase());
1339    /// ```
1340    ///
1341    /// Both are equivalent to:
1342    ///
1343    /// ```
1344    /// println!("i\u{307}");
1345    /// ```
1346    ///
1347    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1348    ///
1349    /// ```
1350    /// assert_eq!('C'.to_lowercase().to_string(), "c");
1351    ///
1352    /// // Sometimes the result is more than one character:
1353    /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1354    ///
1355    /// // Characters that do not have both uppercase and lowercase
1356    /// // convert into themselves.
1357    /// assert_eq!('山'.to_lowercase().to_string(), "山");
1358    /// ```
1359    /// # Notes on context and locale
1360    ///
1361    /// As stated earlier, this method does not take into account language or context.
1362    /// Below is a non-exhaustive list of situations where this can be relevant.
1363    /// If you need to handle locale-depedendent casing in your code, consider using
1364    /// an external crate, like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1365    /// which is developed by Unicode.
1366    ///
1367    /// ## Greek sigma
1368    ///
1369    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
1370    /// 'σ' which is used in most situations, and 'ς' which appears only
1371    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
1372    ///
1373    /// ```
1374    /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
1375    /// ```
1376    ///
1377    /// `str::to_lowercase()` (only available with the `alloc` crate)
1378    /// *does* properly handle this contextual mapping,
1379    /// so prefer using that method if you can. Alternatively, you can use
1380    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1381    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1382    /// along with [`SpecialCasing.txt`], for more details.
1383    ///
1384    /// [`is_cased()`]: Self::is_cased()
1385    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
1386    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1387    ///
1388    /// ## Turkish and Azeri I/ı/İ/i
1389    ///
1390    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1391    ///
1392    /// * 'Dotless': I / ı, sometimes written ï
1393    /// * 'Dotted': İ / i
1394    ///
1395    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1396    ///
1397    /// ```
1398    /// let lower_i = 'I'.to_lowercase().to_string();
1399    /// ```
1400    ///
1401    /// `'I'`'s correct lowercase relies on the language of the text: if we're
1402    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1403    /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
1404    ///
1405    /// ```
1406    /// let lower_i = 'I'.to_lowercase().to_string();
1407    ///
1408    /// assert_eq!(lower_i, "i");
1409    /// ```
1410    ///
1411    /// holds across languages.
1412    ///
1413    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1414    #[must_use = "this returns the lowercased character as a new iterator, \
1415                  without modifying the original"]
1416    #[stable(feature = "rust1", since = "1.0.0")]
1417    #[inline]
1418    pub fn to_lowercase(self) -> ToLowercase {
1419        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1420    }
1421
1422    /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1423    /// `char`s.
1424    ///
1425    /// This is usually, but not always, equivalent to the uppercase mapping
1426    /// returned by [`to_uppercase()`]. Prefer this method when seeking to capitalize
1427    /// Only The First Letter of a word, but use [`to_uppercase()`] for ALL CAPS.
1428    /// See [below](#difference-from-uppercase) for a thorough explanation
1429    /// of the difference between the two methods.
1430    ///
1431    /// If this `char` does not have a titlecase mapping, the iterator yields the same `char`.
1432    ///
1433    /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1434    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1435    ///
1436    /// [ucd]: https://www.unicode.org/reports/tr44/
1437    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1438    ///
1439    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1440    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1441    ///
1442    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1443    ///
1444    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1445    /// is independent of context and language. See [below](#note-on-locale)
1446    /// for more information.
1447    ///
1448    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1449    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1450    ///
1451    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1452    ///
1453    /// # Examples
1454    ///
1455    /// As an iterator:
1456    ///
1457    /// ```
1458    /// #![feature(titlecase)]
1459    /// for c in 'ß'.to_titlecase() {
1460    ///     print!("{c}");
1461    /// }
1462    /// println!();
1463    /// ```
1464    ///
1465    /// Using `println!` directly:
1466    ///
1467    /// ```
1468    /// #![feature(titlecase)]
1469    /// println!("{}", 'ß'.to_titlecase());
1470    /// ```
1471    ///
1472    /// Both are equivalent to:
1473    ///
1474    /// ```
1475    /// println!("Ss");
1476    /// ```
1477    ///
1478    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1479    ///
1480    /// ```
1481    /// #![feature(titlecase)]
1482    /// assert_eq!('c'.to_titlecase().to_string(), "C");
1483    /// assert_eq!('ა'.to_titlecase().to_string(), "ა");
1484    /// assert_eq!('ǆ'.to_titlecase().to_string(), "ǅ");
1485    /// assert_eq!('ᾨ'.to_titlecase().to_string(), "ᾨ");
1486    ///
1487    /// // Sometimes the result is more than one character:
1488    /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1489    ///
1490    /// // Characters that do not have separate cased forms
1491    /// // convert into themselves.
1492    /// assert_eq!('山'.to_titlecase().to_string(), "山");
1493    /// ```
1494    ///
1495    /// # Difference from uppercase
1496    ///
1497    /// Currently, there are three classes of characters where [`to_uppercase()`]
1498    /// and `to_titlecase()` give different results:
1499    ///
1500    /// ## Georgian script
1501    ///
1502    /// Each letter in the modern Georgian alphabet can be written in one of two forms:
1503    /// the typical lowercase-like "mkhedruli" form, and a variant uppercase-like "mtavruli"
1504    /// form. However, unlike uppercase in most cased scripts, mtavruli is not typically used
1505    /// to start sentences, denote proper nouns, or for any other purpose
1506    /// in running text. It is instead confined to titles and headings, which are written entirely
1507    /// in mtavruli. For this reason, [`to_uppercase()`] applied to a Georgian letter
1508    /// will return the mtavruli form, but `to_titlecase()` will return the mkhedruli form.
1509    ///
1510    /// ```
1511    /// #![feature(titlecase)]
1512    /// let ani = 'ა'; // First letter of the Georgian alphabet, in mkhedruli form
1513    ///
1514    /// // Titlecasing mkhedruli maps it to itself...
1515    /// assert_eq!(ani.to_titlecase().to_string(), ani.to_string());
1516    ///
1517    /// // but uppercasing it maps it to mtavruli
1518    /// assert_eq!(ani.to_uppercase().to_string(), "Ა");
1519    /// ```
1520    ///
1521    /// ## Compatibility digraphs for Latin-alphabet Serbo-Croatian
1522    ///
1523    /// The standard Latin alphabet for the Serbo-Croatian language
1524    /// (Bosnian, Croatian, Montenegrin, and Serbian) contains
1525    /// three digraphs: Dž, Lj, and Nj. These are usually represented as
1526    /// two characters. However, for compatibility with older character sets,
1527    /// Unicode includes single-character versions of these digraphs.
1528    /// Each has a uppercase, titlecase, and lowercase version:
1529    ///
1530    /// - `'Ǆ'`, `'ǅ'`, `'ǆ'`
1531    /// - `'Ǉ'`, `'ǈ'`, `'ǉ'`
1532    /// - `'Ǌ'`, `'ǋ'`, `'ǌ'`
1533    ///
1534    /// Unicode additionally encodes a casing triad for the Dz digraph
1535    /// without the caron: `'Ǳ'`, `'ǲ'`, `'ǳ'`.
1536    ///
1537    /// ## Iota-subscritped Greek vowels
1538    ///
1539    /// In ancient Greek, the long vowels alpha (α), eta (η), and omega (ω)
1540    /// were sometimes followed by an iota (ι), forming a diphthong. Over time,
1541    /// the diphthong pronunciation was slowly lost, with the iota becoming mute.
1542    /// Eventually, the ι disappeared from the spelling as well.
1543    /// However, there remains a need to represent ancient texts faithfully.
1544    ///
1545    /// Modern editions of ancient Greek texts commonly use a reduced-sized
1546    /// ι symbol to denote mute iotas, while distinguishing them from ιs
1547    /// which continued to affect pronunciation. The exact standard differs
1548    /// between different publications. Some render the mute ι below its associated
1549    /// vowel (subscript), while others place it to the right of said vowel (adscript).
1550    /// The interaction of mute ι symbols with casing also varies.
1551    ///
1552    /// The Unicode Standard, for its default casing rules, chose to make lowercase
1553    /// Greek vowels with iota subscipt (e.g. `'ᾠ'`) titlecase to the uppercase vowel
1554    /// with iota subscript (`'ᾨ'`) but uppercase to the uppercase vowel followed by
1555    /// full-size uppercase iota (`"ὨΙ"`). This is just one convention among many
1556    /// in common use, but it is the one Unicode settled on,
1557    /// so it is what this method does also.
1558    ///
1559    /// # Note on locale
1560    ///
1561    /// As stated above, this method is locale-insensitive.
1562    /// If you need locale support, consider using an external crate,
1563    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1564    /// which is developed by Unicode. A description of one common
1565    /// locale-dependent casing issue follows (there are others):
1566    ///
1567    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1568    ///
1569    /// * 'Dotless': I / ı, sometimes written ï
1570    /// * 'Dotted': İ / i
1571    ///
1572    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1573    ///
1574    /// ```
1575    /// #![feature(titlecase)]
1576    /// let upper_i = 'i'.to_titlecase().to_string();
1577    /// ```
1578    ///
1579    /// `'i'`'s correct titlecase relies on the language of the text: if we're
1580    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1581    /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1582    ///
1583    /// ```
1584    /// #![feature(titlecase)]
1585    /// let upper_i = 'i'.to_titlecase().to_string();
1586    ///
1587    /// assert_eq!(upper_i, "I");
1588    /// ```
1589    ///
1590    /// holds across languages.
1591    ///
1592    /// [`to_uppercase()`]: Self::to_uppercase()
1593    #[must_use = "this returns the titlecased character as a new iterator, \
1594                  without modifying the original"]
1595    #[unstable(feature = "titlecase", issue = "153892")]
1596    #[inline]
1597    pub fn to_titlecase(self) -> ToTitlecase {
1598        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
1599    }
1600
1601    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1602    /// `char`s.
1603    ///
1604    /// Prefer this method when converting a word into ALL CAPS, but consider [`to_titlecase()`]
1605    /// instead if you seek to capitalize Only The First Letter. See that method's documentation
1606    /// for more information on the difference between the two.
1607    ///
1608    /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1609    ///
1610    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1611    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1612    ///
1613    /// [ucd]: https://www.unicode.org/reports/tr44/
1614    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1615    ///
1616    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1617    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1618    ///
1619    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1620    ///
1621    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1622    /// is independent of context and language. See [below](#note-on-locale)
1623    /// for more information.
1624    ///
1625    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1626    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1627    ///
1628    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1629    ///
1630    /// # Examples
1631    ///
1632    /// `'ﬅ'` (U+FB05) is a single Unicode code point (a ligature) that maps to "ST" in uppercase.
1633    ///
1634    /// As an iterator:
1635    ///
1636    /// ```
1637    /// for c in 'ﬅ'.to_uppercase() {
1638    ///     print!("{c}");
1639    /// }
1640    /// println!();
1641    /// ```
1642    ///
1643    /// Using `println!` directly:
1644    ///
1645    /// ```
1646    /// println!("{}", 'ﬅ'.to_uppercase());
1647    /// ```
1648    ///
1649    /// Both are equivalent to:
1650    ///
1651    /// ```
1652    /// println!("ST");
1653    /// ```
1654    ///
1655    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1656    ///
1657    /// ```
1658    /// assert_eq!('c'.to_uppercase().to_string(), "C");
1659    /// assert_eq!('ა'.to_uppercase().to_string(), "Ა");
1660    /// assert_eq!('ǆ'.to_uppercase().to_string(), "Ǆ");
1661    ///
1662    /// // Sometimes the result is more than one character:
1663    /// assert_eq!('ﬅ'.to_uppercase().to_string(), "ST");
1664    /// assert_eq!('ᾨ'.to_uppercase().to_string(), "ὨΙ");
1665    ///
1666    /// // Characters that do not have both uppercase and lowercase
1667    /// // convert into themselves.
1668    /// assert_eq!('山'.to_uppercase().to_string(), "山");
1669    /// ```
1670    ///
1671    /// # Note on locale
1672    ///
1673    /// As stated above, this method is locale-insensitive.
1674    /// If you need locale support, consider using an external crate,
1675    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1676    /// which is developed by Unicode. A description of one common
1677    /// locale-dependent casing issue follows (there are others):
1678    ///
1679    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1680    ///
1681    /// * 'Dotless': I / ı, sometimes written ï
1682    /// * 'Dotted': İ / i
1683    ///
1684    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1685    ///
1686    /// ```
1687    /// let upper_i = 'i'.to_uppercase().to_string();
1688    /// ```
1689    ///
1690    /// `'i'`'s correct uppercase relies on the language of the text: if we're
1691    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1692    /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1693    ///
1694    /// ```
1695    /// let upper_i = 'i'.to_uppercase().to_string();
1696    ///
1697    /// assert_eq!(upper_i, "I");
1698    /// ```
1699    ///
1700    /// holds across languages.
1701    ///
1702    /// [`to_titlecase()`]: Self::to_titlecase()
1703    #[must_use = "this returns the uppercased character as a new iterator, \
1704                  without modifying the original"]
1705    #[stable(feature = "rust1", since = "1.0.0")]
1706    #[inline]
1707    pub fn to_uppercase(self) -> ToUppercase {
1708        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1709    }
1710
1711    /// Returns an iterator that yields the case folding of this `char` as one or more
1712    /// `char`s.
1713    ///
1714    /// Case folding is meant to be used when performing case-insensitive string comparisons.
1715    /// Case-folded strings should not usually be exposed directly to users. For most,
1716    /// but not all, characters, the casefold mapping is identical to the lowercase one.
1717    ///
1718    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
1719    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1720    /// The maximum number of `char`s in a case folding is 3.
1721    ///
1722    /// [ucd]: https://www.unicode.org/reports/tr44/
1723    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
1724    ///
1725    ///
1726    /// No [normalization] (e.g. NFC) is performed, so visually and semantically identical characters
1727    /// might still casefold differently. For example, `'ά'` (U+03AC GREEK SMALL LETTER ALPHA WITH TONOS)
1728    /// is considered distinct from `'ά'` (U+1F71 GREEK SMALL LETTER ALPHA WITH OXIA),
1729    /// even though Unicode considers them canonically equivalent.
1730    ///
1731    /// In addition, this method is independent of language/locale,
1732    /// so the special behavior of I/ı/İ/i in Turkish and Azeri is not handled.
1733    ///
1734    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
1735    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
1736    ///
1737    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1738    ///
1739    /// # Examples
1740    ///
1741    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
1742    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
1743    /// has the same case-folding.
1744    ///
1745    /// As an iterator:
1746    ///
1747    /// ```
1748    /// #![feature(casefold)]
1749    /// assert!('ß'.to_casefold_unnormalized().eq(['s', 's']));
1750    /// assert!('ẞ'.to_casefold_unnormalized().eq(['s', 's']));
1751    /// ```
1752    ///
1753    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1754    ///
1755    /// ```
1756    /// #![feature(casefold)]
1757    /// assert_eq!('ß'.to_casefold_unnormalized().to_string(), "ss");
1758    /// assert_eq!('ẞ'.to_casefold_unnormalized().to_string(), "ss");
1759    /// ```
1760    ///
1761    /// No [normalization] is performed:
1762    ///
1763    /// ```rust
1764    /// #![feature(casefold)]
1765    /// // These two characters are visually and semantically identical;
1766    /// // Unicode considers them to be canonically equivalent.
1767    /// let alpha_tonos = 'ά';
1768    /// let alpha_oxia = 'ά';
1769    ///
1770    /// // However, they are different codepoints:
1771    /// assert_eq!(alpha_tonos, '\u{03AC}');
1772    /// assert_eq!(alpha_oxia, '\u{1F71}');
1773    ///
1774    /// // Their case-foldings are likewise unequal:
1775    /// assert!(alpha_tonos.to_casefold_unnormalized().eq(['\u{03AC}']));
1776    /// assert!(alpha_oxia.to_casefold_unnormalized().eq(['\u{1F71}']));
1777    /// ```
1778    ///
1779    /// # Note on locale
1780    ///
1781    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1782    ///
1783    /// * 'Dotless': I / ı, sometimes written ï
1784    /// * 'Dotted': İ / i
1785    ///
1786    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1787    ///
1788    /// ```
1789    /// #![feature(casefold)]
1790    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1791    /// ```
1792    ///
1793    /// `'I'`'s correct case folding relies on the language of the text: if we're
1794    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1795    /// be `"ı"`. `to_casefold_unnormalized()` does not take this into account, and so:
1796    ///
1797    /// ```
1798    /// #![feature(casefold)]
1799    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1800    ///
1801    /// assert_eq!(casefold_i, "i");
1802    /// ```
1803    ///
1804    /// holds across languages.
1805    ///
1806    /// [normalization]: https://www.unicode.org/faq/normalization.html
1807    #[must_use = "this returns the case-folded character as a new iterator, \
1808                  without modifying the original"]
1809    #[unstable(feature = "casefold", issue = "154742")]
1810    #[inline]
1811    pub fn to_casefold_unnormalized(self) -> ToCasefold {
1812        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
1813    }
1814
1815    /// Returns the code point value as a `u32`.
1816    ///
1817    /// # Examples
1818    ///
1819    /// ```
1820    /// #![feature(char_to_u32)]
1821    ///
1822    /// let ascii = 'a';
1823    /// let heart = '❤';
1824    ///
1825    /// assert_eq!(ascii.to_u32(), 97_u32);
1826    /// assert_eq!(heart.to_u32(), 0x2764_u32);
1827    /// ```
1828    #[must_use = "this returns the result of the operation, \
1829                  without modifying the original"]
1830    #[unstable(feature = "char_to_u32", issue = "158938")]
1831    #[rustc_const_unstable(feature = "char_to_u32", issue = "158938")]
1832    #[inline(always)]
1833    pub const fn to_u32(self) -> u32 {
1834        self as u32
1835    }
1836
1837    /// Checks if the value is within the ASCII range.
1838    ///
1839    /// # Examples
1840    ///
1841    /// ```
1842    /// let ascii = 'a';
1843    /// let non_ascii = '❤';
1844    ///
1845    /// assert!(ascii.is_ascii());
1846    /// assert!(!non_ascii.is_ascii());
1847    /// ```
1848    #[must_use]
1849    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1850    #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1851    #[rustc_diagnostic_item = "char_is_ascii"]
1852    #[inline]
1853    pub const fn is_ascii(&self) -> bool {
1854        *self as u32 <= 0x7F
1855    }
1856
1857    /// Returns `Some` if the value is within the ASCII range,
1858    /// or `None` if it's not.
1859    ///
1860    /// This is preferred to [`Self::is_ascii`] when you're passing the value
1861    /// along to something else that can take [`ascii::Char`] rather than
1862    /// needing to check again for itself whether the value is in ASCII.
1863    #[must_use]
1864    #[unstable(feature = "ascii_char", issue = "110998")]
1865    #[inline]
1866    pub const fn as_ascii(&self) -> Option<ascii::Char> {
1867        if self.is_ascii() {
1868            // SAFETY: Just checked that this is ASCII.
1869            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1870        } else {
1871            None
1872        }
1873    }
1874
1875    /// Converts this char into an [ASCII character](`ascii::Char`), without
1876    /// checking whether it is valid.
1877    ///
1878    /// # Safety
1879    ///
1880    /// This char must be within the ASCII range, or else this is UB.
1881    #[must_use]
1882    #[unstable(feature = "ascii_char", issue = "110998")]
1883    #[inline]
1884    pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1885        assert_unsafe_precondition!(
1886            check_library_ub,
1887            "as_ascii_unchecked requires that the char is valid ASCII",
1888            (it: &char = self) => it.is_ascii()
1889        );
1890
1891        // SAFETY: the caller promised that this char is ASCII.
1892        unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1893    }
1894
1895    /// Makes a copy of the value in its ASCII upper case equivalent.
1896    ///
1897    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1898    /// but non-ASCII letters are unchanged.
1899    ///
1900    /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1901    ///
1902    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1903    /// [`to_uppercase()`].
1904    ///
1905    /// # Examples
1906    ///
1907    /// ```
1908    /// let ascii = 'a';
1909    /// let non_ascii = '❤';
1910    ///
1911    /// assert_eq!('A', ascii.to_ascii_uppercase());
1912    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1913    /// ```
1914    ///
1915    /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1916    /// [`to_uppercase()`]: #method.to_uppercase
1917    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1918    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1919    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1920    #[inline]
1921    pub const fn to_ascii_uppercase(&self) -> char {
1922        if self.is_ascii_lowercase() {
1923            (*self as u8).ascii_change_case_unchecked() as char
1924        } else {
1925            *self
1926        }
1927    }
1928
1929    /// Makes a copy of the value in its ASCII lower case equivalent.
1930    ///
1931    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1932    /// but non-ASCII letters are unchanged.
1933    ///
1934    /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1935    ///
1936    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1937    /// [`to_lowercase()`].
1938    ///
1939    /// # Examples
1940    ///
1941    /// ```
1942    /// let ascii = 'A';
1943    /// let non_ascii = '❤';
1944    ///
1945    /// assert_eq!('a', ascii.to_ascii_lowercase());
1946    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1947    /// ```
1948    ///
1949    /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1950    /// [`to_lowercase()`]: #method.to_lowercase
1951    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1952    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1953    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1954    #[inline]
1955    pub const fn to_ascii_lowercase(&self) -> char {
1956        if self.is_ascii_uppercase() {
1957            (*self as u8).ascii_change_case_unchecked() as char
1958        } else {
1959            *self
1960        }
1961    }
1962
1963    /// Checks that two values are an ASCII case-insensitive match.
1964    ///
1965    /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1966    ///
1967    /// # Examples
1968    ///
1969    /// ```
1970    /// let upper_a = 'A';
1971    /// let lower_a = 'a';
1972    /// let lower_z = 'z';
1973    ///
1974    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1975    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1976    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1977    /// ```
1978    ///
1979    /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1980    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1981    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1982    #[inline]
1983    pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1984        self.to_ascii_lowercase() == other.to_ascii_lowercase()
1985    }
1986
1987    /// Converts this type to its ASCII upper case equivalent in-place.
1988    ///
1989    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1990    /// but non-ASCII letters are unchanged.
1991    ///
1992    /// To return a new uppercased value without modifying the existing one, use
1993    /// [`to_ascii_uppercase()`].
1994    ///
1995    /// # Examples
1996    ///
1997    /// ```
1998    /// let mut ascii = 'a';
1999    ///
2000    /// ascii.make_ascii_uppercase();
2001    ///
2002    /// assert_eq!('A', ascii);
2003    /// ```
2004    ///
2005    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
2006    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2007    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2008    #[inline]
2009    pub const fn make_ascii_uppercase(&mut self) {
2010        *self = self.to_ascii_uppercase();
2011    }
2012
2013    /// Converts this type to its ASCII lower case equivalent in-place.
2014    ///
2015    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2016    /// but non-ASCII letters are unchanged.
2017    ///
2018    /// To return a new lowercased value without modifying the existing one, use
2019    /// [`to_ascii_lowercase()`].
2020    ///
2021    /// # Examples
2022    ///
2023    /// ```
2024    /// let mut ascii = 'A';
2025    ///
2026    /// ascii.make_ascii_lowercase();
2027    ///
2028    /// assert_eq!('a', ascii);
2029    /// ```
2030    ///
2031    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2032    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2033    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2034    #[inline]
2035    pub const fn make_ascii_lowercase(&mut self) {
2036        *self = self.to_ascii_lowercase();
2037    }
2038
2039    /// Checks if the value is an ASCII alphabetic character:
2040    ///
2041    /// - U+0041 'A' ..= U+005A 'Z', or
2042    /// - U+0061 'a' ..= U+007A 'z'.
2043    ///
2044    /// # Examples
2045    ///
2046    /// ```
2047    /// let uppercase_a = 'A';
2048    /// let uppercase_g = 'G';
2049    /// let a = 'a';
2050    /// let g = 'g';
2051    /// let zero = '0';
2052    /// let percent = '%';
2053    /// let space = ' ';
2054    /// let lf = '\n';
2055    /// let esc = '\x1b';
2056    ///
2057    /// assert!(uppercase_a.is_ascii_alphabetic());
2058    /// assert!(uppercase_g.is_ascii_alphabetic());
2059    /// assert!(a.is_ascii_alphabetic());
2060    /// assert!(g.is_ascii_alphabetic());
2061    /// assert!(!zero.is_ascii_alphabetic());
2062    /// assert!(!percent.is_ascii_alphabetic());
2063    /// assert!(!space.is_ascii_alphabetic());
2064    /// assert!(!lf.is_ascii_alphabetic());
2065    /// assert!(!esc.is_ascii_alphabetic());
2066    /// ```
2067    #[must_use]
2068    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2069    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2070    #[inline]
2071    pub const fn is_ascii_alphabetic(&self) -> bool {
2072        matches!(*self, 'a'..='z' | 'A'..='Z')
2073    }
2074
2075    /// Checks if the value is an ASCII uppercase character:
2076    /// U+0041 'A' ..= U+005A 'Z'.
2077    ///
2078    /// # Examples
2079    ///
2080    /// ```
2081    /// let uppercase_a = 'A';
2082    /// let uppercase_g = 'G';
2083    /// let a = 'a';
2084    /// let g = 'g';
2085    /// let zero = '0';
2086    /// let percent = '%';
2087    /// let space = ' ';
2088    /// let lf = '\n';
2089    /// let esc = '\x1b';
2090    ///
2091    /// assert!(uppercase_a.is_ascii_uppercase());
2092    /// assert!(uppercase_g.is_ascii_uppercase());
2093    /// assert!(!a.is_ascii_uppercase());
2094    /// assert!(!g.is_ascii_uppercase());
2095    /// assert!(!zero.is_ascii_uppercase());
2096    /// assert!(!percent.is_ascii_uppercase());
2097    /// assert!(!space.is_ascii_uppercase());
2098    /// assert!(!lf.is_ascii_uppercase());
2099    /// assert!(!esc.is_ascii_uppercase());
2100    /// ```
2101    #[must_use]
2102    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2103    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2104    #[inline]
2105    pub const fn is_ascii_uppercase(&self) -> bool {
2106        matches!(*self, 'A'..='Z')
2107    }
2108
2109    /// Checks if the value is an ASCII lowercase character:
2110    /// U+0061 'a' ..= U+007A 'z'.
2111    ///
2112    /// # Examples
2113    ///
2114    /// ```
2115    /// let uppercase_a = 'A';
2116    /// let uppercase_g = 'G';
2117    /// let a = 'a';
2118    /// let g = 'g';
2119    /// let zero = '0';
2120    /// let percent = '%';
2121    /// let space = ' ';
2122    /// let lf = '\n';
2123    /// let esc = '\x1b';
2124    ///
2125    /// assert!(!uppercase_a.is_ascii_lowercase());
2126    /// assert!(!uppercase_g.is_ascii_lowercase());
2127    /// assert!(a.is_ascii_lowercase());
2128    /// assert!(g.is_ascii_lowercase());
2129    /// assert!(!zero.is_ascii_lowercase());
2130    /// assert!(!percent.is_ascii_lowercase());
2131    /// assert!(!space.is_ascii_lowercase());
2132    /// assert!(!lf.is_ascii_lowercase());
2133    /// assert!(!esc.is_ascii_lowercase());
2134    /// ```
2135    #[must_use]
2136    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2137    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2138    #[inline]
2139    pub const fn is_ascii_lowercase(&self) -> bool {
2140        matches!(*self, 'a'..='z')
2141    }
2142
2143    /// Checks if the value is an ASCII alphanumeric character:
2144    ///
2145    /// - U+0041 'A' ..= U+005A 'Z', or
2146    /// - U+0061 'a' ..= U+007A 'z', or
2147    /// - U+0030 '0' ..= U+0039 '9'.
2148    ///
2149    /// # Examples
2150    ///
2151    /// ```
2152    /// let uppercase_a = 'A';
2153    /// let uppercase_g = 'G';
2154    /// let a = 'a';
2155    /// let g = 'g';
2156    /// let zero = '0';
2157    /// let percent = '%';
2158    /// let space = ' ';
2159    /// let lf = '\n';
2160    /// let esc = '\x1b';
2161    ///
2162    /// assert!(uppercase_a.is_ascii_alphanumeric());
2163    /// assert!(uppercase_g.is_ascii_alphanumeric());
2164    /// assert!(a.is_ascii_alphanumeric());
2165    /// assert!(g.is_ascii_alphanumeric());
2166    /// assert!(zero.is_ascii_alphanumeric());
2167    /// assert!(!percent.is_ascii_alphanumeric());
2168    /// assert!(!space.is_ascii_alphanumeric());
2169    /// assert!(!lf.is_ascii_alphanumeric());
2170    /// assert!(!esc.is_ascii_alphanumeric());
2171    /// ```
2172    #[must_use]
2173    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2174    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2175    #[inline]
2176    pub const fn is_ascii_alphanumeric(&self) -> bool {
2177        matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
2178    }
2179
2180    /// Checks if the value is an ASCII decimal digit:
2181    /// U+0030 '0' ..= U+0039 '9'.
2182    ///
2183    /// # Examples
2184    ///
2185    /// ```
2186    /// let uppercase_a = 'A';
2187    /// let uppercase_g = 'G';
2188    /// let a = 'a';
2189    /// let g = 'g';
2190    /// let zero = '0';
2191    /// let percent = '%';
2192    /// let space = ' ';
2193    /// let lf = '\n';
2194    /// let esc = '\x1b';
2195    ///
2196    /// assert!(!uppercase_a.is_ascii_digit());
2197    /// assert!(!uppercase_g.is_ascii_digit());
2198    /// assert!(!a.is_ascii_digit());
2199    /// assert!(!g.is_ascii_digit());
2200    /// assert!(zero.is_ascii_digit());
2201    /// assert!(!percent.is_ascii_digit());
2202    /// assert!(!space.is_ascii_digit());
2203    /// assert!(!lf.is_ascii_digit());
2204    /// assert!(!esc.is_ascii_digit());
2205    /// ```
2206    #[must_use]
2207    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2208    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2209    #[inline]
2210    pub const fn is_ascii_digit(&self) -> bool {
2211        matches!(*self, '0'..='9')
2212    }
2213
2214    /// Checks if the value is an ASCII octal digit:
2215    /// U+0030 '0' ..= U+0037 '7'.
2216    ///
2217    /// # Examples
2218    ///
2219    /// ```
2220    /// #![feature(is_ascii_octdigit)]
2221    ///
2222    /// let uppercase_a = 'A';
2223    /// let a = 'a';
2224    /// let zero = '0';
2225    /// let seven = '7';
2226    /// let nine = '9';
2227    /// let percent = '%';
2228    /// let lf = '\n';
2229    ///
2230    /// assert!(!uppercase_a.is_ascii_octdigit());
2231    /// assert!(!a.is_ascii_octdigit());
2232    /// assert!(zero.is_ascii_octdigit());
2233    /// assert!(seven.is_ascii_octdigit());
2234    /// assert!(!nine.is_ascii_octdigit());
2235    /// assert!(!percent.is_ascii_octdigit());
2236    /// assert!(!lf.is_ascii_octdigit());
2237    /// ```
2238    #[must_use]
2239    #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
2240    #[inline]
2241    pub const fn is_ascii_octdigit(&self) -> bool {
2242        matches!(*self, '0'..='7')
2243    }
2244
2245    /// Checks if the value is an ASCII hexadecimal digit:
2246    ///
2247    /// - U+0030 '0' ..= U+0039 '9', or
2248    /// - U+0041 'A' ..= U+0046 'F', or
2249    /// - U+0061 'a' ..= U+0066 'f'.
2250    ///
2251    /// # Examples
2252    ///
2253    /// ```
2254    /// let uppercase_a = 'A';
2255    /// let uppercase_g = 'G';
2256    /// let a = 'a';
2257    /// let g = 'g';
2258    /// let zero = '0';
2259    /// let percent = '%';
2260    /// let space = ' ';
2261    /// let lf = '\n';
2262    /// let esc = '\x1b';
2263    ///
2264    /// assert!(uppercase_a.is_ascii_hexdigit());
2265    /// assert!(!uppercase_g.is_ascii_hexdigit());
2266    /// assert!(a.is_ascii_hexdigit());
2267    /// assert!(!g.is_ascii_hexdigit());
2268    /// assert!(zero.is_ascii_hexdigit());
2269    /// assert!(!percent.is_ascii_hexdigit());
2270    /// assert!(!space.is_ascii_hexdigit());
2271    /// assert!(!lf.is_ascii_hexdigit());
2272    /// assert!(!esc.is_ascii_hexdigit());
2273    /// ```
2274    #[must_use]
2275    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2276    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2277    #[inline]
2278    pub const fn is_ascii_hexdigit(&self) -> bool {
2279        matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
2280    }
2281
2282    /// Checks if the value is an ASCII punctuation or symbol character
2283    /// (i.e. not alphanumeric, whitespace, or control):
2284    ///
2285    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
2286    /// - U+003A ..= U+0040 `: ; < = > ? @`, or
2287    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
2288    /// - U+007B ..= U+007E `{ | } ~`
2289    ///
2290    /// # Examples
2291    ///
2292    /// ```
2293    /// let uppercase_a = 'A';
2294    /// let uppercase_g = 'G';
2295    /// let a = 'a';
2296    /// let g = 'g';
2297    /// let zero = '0';
2298    /// let percent = '%';
2299    /// let space = ' ';
2300    /// let lf = '\n';
2301    /// let esc = '\x1b';
2302    ///
2303    /// assert!(!uppercase_a.is_ascii_punctuation());
2304    /// assert!(!uppercase_g.is_ascii_punctuation());
2305    /// assert!(!a.is_ascii_punctuation());
2306    /// assert!(!g.is_ascii_punctuation());
2307    /// assert!(!zero.is_ascii_punctuation());
2308    /// assert!(percent.is_ascii_punctuation());
2309    /// assert!(!space.is_ascii_punctuation());
2310    /// assert!(!lf.is_ascii_punctuation());
2311    /// assert!(!esc.is_ascii_punctuation());
2312    /// ```
2313    #[must_use]
2314    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2315    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2316    #[inline]
2317    pub const fn is_ascii_punctuation(&self) -> bool {
2318        matches!(*self, '!'..='/')
2319            | matches!(*self, ':'..='@')
2320            | matches!(*self, '['..='`')
2321            | matches!(*self, '{'..='~')
2322    }
2323
2324    /// Checks if the value is an ASCII graphic character
2325    /// (i.e. not whitespace or control):
2326    /// U+0021 '!' ..= U+007E '~'.
2327    ///
2328    /// # Examples
2329    ///
2330    /// ```
2331    /// let uppercase_a = 'A';
2332    /// let uppercase_g = 'G';
2333    /// let a = 'a';
2334    /// let g = 'g';
2335    /// let zero = '0';
2336    /// let percent = '%';
2337    /// let space = ' ';
2338    /// let lf = '\n';
2339    /// let esc = '\x1b';
2340    ///
2341    /// assert!(uppercase_a.is_ascii_graphic());
2342    /// assert!(uppercase_g.is_ascii_graphic());
2343    /// assert!(a.is_ascii_graphic());
2344    /// assert!(g.is_ascii_graphic());
2345    /// assert!(zero.is_ascii_graphic());
2346    /// assert!(percent.is_ascii_graphic());
2347    /// assert!(!space.is_ascii_graphic());
2348    /// assert!(!lf.is_ascii_graphic());
2349    /// assert!(!esc.is_ascii_graphic());
2350    /// ```
2351    #[must_use]
2352    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2353    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2354    #[inline]
2355    pub const fn is_ascii_graphic(&self) -> bool {
2356        matches!(*self, '!'..='~')
2357    }
2358
2359    /// Checks if the value is an ASCII whitespace character:
2360    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
2361    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
2362    ///
2363    /// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
2364    /// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`.
2365    ///
2366    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
2367    /// whitespace][infra-aw]. There are several other definitions in
2368    /// wide use. For instance, [the POSIX locale][pct] includes
2369    /// U+000B VERTICAL TAB as well as all the above characters,
2370    /// but—from the very same specification—[the default rule for
2371    /// "field splitting" in the Bourne shell][bfs] considers *only*
2372    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
2373    ///
2374    /// If you are writing a program that will process an existing
2375    /// file format, check what that format's definition of whitespace is
2376    /// before using this function.
2377    ///
2378    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
2379    /// [pct]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_01
2380    /// [bfs]: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_06_05
2381    ///
2382    /// # Examples
2383    ///
2384    /// ```
2385    /// let uppercase_a = 'A';
2386    /// let uppercase_g = 'G';
2387    /// let a = 'a';
2388    /// let g = 'g';
2389    /// let zero = '0';
2390    /// let percent = '%';
2391    /// let space = ' ';
2392    /// let lf = '\n';
2393    /// let esc = '\x1b';
2394    ///
2395    /// assert!(!uppercase_a.is_ascii_whitespace());
2396    /// assert!(!uppercase_g.is_ascii_whitespace());
2397    /// assert!(!a.is_ascii_whitespace());
2398    /// assert!(!g.is_ascii_whitespace());
2399    /// assert!(!zero.is_ascii_whitespace());
2400    /// assert!(!percent.is_ascii_whitespace());
2401    /// assert!(space.is_ascii_whitespace());
2402    /// assert!(lf.is_ascii_whitespace());
2403    /// assert!(!esc.is_ascii_whitespace());
2404    /// ```
2405    #[must_use]
2406    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2407    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2408    #[inline]
2409    pub const fn is_ascii_whitespace(&self) -> bool {
2410        matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
2411    }
2412
2413    /// Checks if the value is an ASCII control character:
2414    /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
2415    /// Note that most ASCII whitespace characters are control
2416    /// characters, but SPACE is not.
2417    ///
2418    /// # Examples
2419    ///
2420    /// ```
2421    /// let uppercase_a = 'A';
2422    /// let uppercase_g = 'G';
2423    /// let a = 'a';
2424    /// let g = 'g';
2425    /// let zero = '0';
2426    /// let percent = '%';
2427    /// let space = ' ';
2428    /// let lf = '\n';
2429    /// let esc = '\x1b';
2430    ///
2431    /// assert!(!uppercase_a.is_ascii_control());
2432    /// assert!(!uppercase_g.is_ascii_control());
2433    /// assert!(!a.is_ascii_control());
2434    /// assert!(!g.is_ascii_control());
2435    /// assert!(!zero.is_ascii_control());
2436    /// assert!(!percent.is_ascii_control());
2437    /// assert!(!space.is_ascii_control());
2438    /// assert!(lf.is_ascii_control());
2439    /// assert!(esc.is_ascii_control());
2440    /// ```
2441    #[must_use]
2442    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2443    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2444    #[inline]
2445    pub const fn is_ascii_control(&self) -> bool {
2446        matches!(*self, '\0'..='\x1F' | '\x7F')
2447    }
2448}
2449
2450pub(crate) struct EscapeDebugExtArgs {
2451    /// Escape Grapheme Extender codepoints?
2452    pub(crate) escape_grapheme_extender: bool,
2453
2454    /// Escape single quotes?
2455    pub(crate) escape_single_quote: bool,
2456
2457    /// Escape double quotes?
2458    pub(crate) escape_double_quote: bool,
2459}
2460
2461impl EscapeDebugExtArgs {
2462    pub(crate) const ESCAPE_ALL: Self = Self {
2463        escape_grapheme_extender: true,
2464        escape_single_quote: true,
2465        escape_double_quote: true,
2466    };
2467}
2468
2469#[inline]
2470#[must_use]
2471const fn len_utf8(code: u32) -> usize {
2472    match code {
2473        ..MAX_ONE_B => 1,
2474        ..MAX_TWO_B => 2,
2475        ..MAX_THREE_B => 3,
2476        _ => 4,
2477    }
2478}
2479
2480#[inline]
2481#[must_use]
2482const fn len_utf16(code: u32) -> usize {
2483    if (code & 0xFFFF) == code { 1 } else { 2 }
2484}
2485
2486/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
2487/// and then returns the subslice of the buffer that contains the encoded character.
2488///
2489/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2490/// (Creating a `char` in the surrogate range is UB.)
2491/// The result is valid [generalized UTF-8] but not valid UTF-8.
2492///
2493/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2494///
2495/// # Panics
2496///
2497/// Panics if the buffer is not large enough.
2498/// A buffer of length four is large enough to encode any `char`.
2499#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2500#[doc(hidden)]
2501#[inline]
2502pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
2503    let len = len_utf8(code);
2504    if dst.len() < len {
2505        const_panic!(
2506            "encode_utf8: buffer does not have enough bytes to encode code point",
2507            "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2508            code: u32 = code,
2509            len: usize = len,
2510            dst_len: usize = dst.len(),
2511        );
2512    }
2513
2514    // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
2515    unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
2516
2517    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2518    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2519}
2520
2521/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
2522///
2523/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2524/// (Creating a `char` in the surrogate range is UB.)
2525/// The result is valid [generalized UTF-8] but not valid UTF-8.
2526///
2527/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2528///
2529/// # Safety
2530///
2531/// The behavior is undefined if the buffer pointed to by `dst` is not
2532/// large enough to hold the encoded codepoint. A buffer of length four
2533/// is large enough to encode any `char`.
2534///
2535/// For a safe version of this function, see the [`encode_utf8_raw`] function.
2536#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2537#[doc(hidden)]
2538#[inline]
2539pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
2540    let len = len_utf8(code);
2541    // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
2542    // is at least `len` bytes long.
2543    unsafe {
2544        if len == 1 {
2545            *dst = code as u8;
2546            return;
2547        }
2548
2549        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
2550        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
2551        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
2552        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
2553
2554        if len == 2 {
2555            *dst = last2 | TAG_TWO_B;
2556            *dst.add(1) = last1;
2557            return;
2558        }
2559
2560        if len == 3 {
2561            *dst = last3 | TAG_THREE_B;
2562            *dst.add(1) = last2;
2563            *dst.add(2) = last1;
2564            return;
2565        }
2566
2567        *dst = last4;
2568        *dst.add(1) = last3;
2569        *dst.add(2) = last2;
2570        *dst.add(3) = last1;
2571    }
2572}
2573
2574/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2575/// and then returns the subslice of the buffer that contains the encoded character.
2576///
2577/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2578/// (Creating a `char` in the surrogate range is UB.)
2579///
2580/// # Panics
2581///
2582/// Panics if the buffer is not large enough.
2583/// A buffer of length 2 is large enough to encode any `char`.
2584#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2585#[doc(hidden)]
2586#[inline]
2587pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2588    let len = len_utf16(code);
2589    match (len, &mut *dst) {
2590        (1, [a, ..]) => {
2591            *a = code as u16;
2592        }
2593        (2, [a, b, ..]) => {
2594            code -= 0x1_0000;
2595            *a = (code >> 10) as u16 | 0xD800;
2596            *b = (code & 0x3FF) as u16 | 0xDC00;
2597        }
2598        _ => {
2599            const_panic!(
2600                "encode_utf16: buffer does not have enough bytes to encode code point",
2601                "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2602                code: u32 = code,
2603                len: usize = len,
2604                dst_len: usize = dst.len(),
2605            )
2606        }
2607    };
2608    // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2609    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2610}
core/char/methods.rs

core/char/
methods.rs