kernel/
str.rs

1// SPDX-License-Identifier: GPL-2.0
2
3//! String representations.
4
5use crate::{
6    alloc::{flags::*, AllocError, KVec},
7    error::{to_result, Result},
8    fmt::{self, Write},
9    prelude::*,
10};
11use core::{
12    marker::PhantomData,
13    ops::{self, Deref, DerefMut, Index},
14};
15
16/// Byte string without UTF-8 validity guarantee.
17#[repr(transparent)]
18pub struct BStr([u8]);
19
20impl BStr {
21    /// Returns the length of this string.
22    #[inline]
23    pub const fn len(&self) -> usize {
24        self.0.len()
25    }
26
27    /// Returns `true` if the string is empty.
28    #[inline]
29    pub const fn is_empty(&self) -> bool {
30        self.len() == 0
31    }
32
33    /// Creates a [`BStr`] from a `[u8]`.
34    #[inline]
35    pub const fn from_bytes(bytes: &[u8]) -> &Self {
36        // SAFETY: `BStr` is transparent to `[u8]`.
37        unsafe { &*(core::ptr::from_ref(bytes) as *const BStr) }
38    }
39
40    /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`].
41    ///
42    /// # Examples
43    ///
44    /// ```
45    /// # use kernel::b_str;
46    /// assert_eq!(Some(b_str!("bar")), b_str!("foobar").strip_prefix(b_str!("foo")));
47    /// assert_eq!(None, b_str!("foobar").strip_prefix(b_str!("bar")));
48    /// assert_eq!(Some(b_str!("foobar")), b_str!("foobar").strip_prefix(b_str!("")));
49    /// assert_eq!(Some(b_str!("")), b_str!("foobar").strip_prefix(b_str!("foobar")));
50    /// ```
51    pub fn strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr> {
52        self.deref()
53            .strip_prefix(pattern.as_ref().deref())
54            .map(Self::from_bytes)
55    }
56}
57
58impl fmt::Display for BStr {
59    /// Formats printable ASCII characters, escaping the rest.
60    ///
61    /// ```
62    /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}};
63    /// let ascii = b_str!("Hello, BStr!");
64    /// let s = CString::try_from_fmt(fmt!("{ascii}"))?;
65    /// assert_eq!(s.to_bytes(), "Hello, BStr!".as_bytes());
66    ///
67    /// let non_ascii = b_str!("🦀");
68    /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?;
69    /// assert_eq!(s.to_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes());
70    /// # Ok::<(), kernel::error::Error>(())
71    /// ```
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        for &b in &self.0 {
74            match b {
75                // Common escape codes.
76                b'\t' => f.write_str("\\t")?,
77                b'\n' => f.write_str("\\n")?,
78                b'\r' => f.write_str("\\r")?,
79                // Printable characters.
80                0x20..=0x7e => f.write_char(b as char)?,
81                _ => write!(f, "\\x{b:02x}")?,
82            }
83        }
84        Ok(())
85    }
86}
87
88impl fmt::Debug for BStr {
89    /// Formats printable ASCII characters with a double quote on either end,
90    /// escaping the rest.
91    ///
92    /// ```
93    /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}};
94    /// // Embedded double quotes are escaped.
95    /// let ascii = b_str!("Hello, \"BStr\"!");
96    /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?;
97    /// assert_eq!(s.to_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes());
98    ///
99    /// let non_ascii = b_str!("😺");
100    /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?;
101    /// assert_eq!(s.to_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes());
102    /// # Ok::<(), kernel::error::Error>(())
103    /// ```
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        f.write_char('"')?;
106        for &b in &self.0 {
107            match b {
108                // Common escape codes.
109                b'\t' => f.write_str("\\t")?,
110                b'\n' => f.write_str("\\n")?,
111                b'\r' => f.write_str("\\r")?,
112                // String escape characters.
113                b'\"' => f.write_str("\\\"")?,
114                b'\\' => f.write_str("\\\\")?,
115                // Printable characters.
116                0x20..=0x7e => f.write_char(b as char)?,
117                _ => write!(f, "\\x{b:02x}")?,
118            }
119        }
120        f.write_char('"')
121    }
122}
123
124impl Deref for BStr {
125    type Target = [u8];
126
127    #[inline]
128    fn deref(&self) -> &Self::Target {
129        &self.0
130    }
131}
132
133impl PartialEq for BStr {
134    fn eq(&self, other: &Self) -> bool {
135        self.deref().eq(other.deref())
136    }
137}
138
139impl<Idx> Index<Idx> for BStr
140where
141    [u8]: Index<Idx, Output = [u8]>,
142{
143    type Output = Self;
144
145    fn index(&self, index: Idx) -> &Self::Output {
146        BStr::from_bytes(&self.0[index])
147    }
148}
149
150impl AsRef<BStr> for [u8] {
151    fn as_ref(&self) -> &BStr {
152        BStr::from_bytes(self)
153    }
154}
155
156impl AsRef<BStr> for BStr {
157    fn as_ref(&self) -> &BStr {
158        self
159    }
160}
161
162/// Creates a new [`BStr`] from a string literal.
163///
164/// `b_str!` converts the supplied string literal to byte string, so non-ASCII
165/// characters can be included.
166///
167/// # Examples
168///
169/// ```
170/// # use kernel::b_str;
171/// # use kernel::str::BStr;
172/// const MY_BSTR: &BStr = b_str!("My awesome BStr!");
173/// ```
174#[macro_export]
175macro_rules! b_str {
176    ($str:literal) => {{
177        const S: &'static str = $str;
178        const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes());
179        C
180    }};
181}
182
183/// Returns a C pointer to the string.
184// It is a free function rather than a method on an extension trait because:
185//
186// - error[E0379]: functions in trait impls cannot be declared const
187#[inline]
188pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char {
189    c_str.0.as_ptr()
190}
191
192/// Possible errors when using conversion functions in [`CStr`].
193#[derive(Debug, Clone, Copy)]
194pub enum CStrConvertError {
195    /// Supplied bytes contain an interior `NUL`.
196    InteriorNul,
197
198    /// Supplied bytes are not terminated by `NUL`.
199    NotNulTerminated,
200}
201
202impl From<CStrConvertError> for Error {
203    #[inline]
204    fn from(_: CStrConvertError) -> Error {
205        EINVAL
206    }
207}
208
209/// A string that is guaranteed to have exactly one `NUL` byte, which is at the
210/// end.
211///
212/// Used for interoperability with kernel APIs that take C strings.
213#[repr(transparent)]
214pub struct CStr([u8]);
215
216impl CStr {
217    /// Returns the length of this string excluding `NUL`.
218    #[inline]
219    pub const fn len(&self) -> usize {
220        self.len_with_nul() - 1
221    }
222
223    /// Returns the length of this string with `NUL`.
224    #[inline]
225    pub const fn len_with_nul(&self) -> usize {
226        if self.0.is_empty() {
227            // SAFETY: This is one of the invariant of `CStr`.
228            // We add a `unreachable_unchecked` here to hint the optimizer that
229            // the value returned from this function is non-zero.
230            unsafe { core::hint::unreachable_unchecked() };
231        }
232        self.0.len()
233    }
234
235    /// Returns `true` if the string only includes `NUL`.
236    #[inline]
237    pub const fn is_empty(&self) -> bool {
238        self.len() == 0
239    }
240
241    /// Wraps a raw C string pointer.
242    ///
243    /// # Safety
244    ///
245    /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must
246    /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr`
247    /// must not be mutated.
248    #[inline]
249    pub unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self {
250        // SAFETY: The safety precondition guarantees `ptr` is a valid pointer
251        // to a `NUL`-terminated C string.
252        let len = unsafe { bindings::strlen(ptr) } + 1;
253        // SAFETY: Lifetime guaranteed by the safety precondition.
254        let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) };
255        // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
256        // As we have added 1 to `len`, the last byte is known to be `NUL`.
257        unsafe { Self::from_bytes_with_nul_unchecked(bytes) }
258    }
259
260    /// Creates a [`CStr`] from a `[u8]`.
261    ///
262    /// The provided slice must be `NUL`-terminated, does not contain any
263    /// interior `NUL` bytes.
264    pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> {
265        if bytes.is_empty() {
266            return Err(CStrConvertError::NotNulTerminated);
267        }
268        if bytes[bytes.len() - 1] != 0 {
269            return Err(CStrConvertError::NotNulTerminated);
270        }
271        let mut i = 0;
272        // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking,
273        // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`.
274        while i + 1 < bytes.len() {
275            if bytes[i] == 0 {
276                return Err(CStrConvertError::InteriorNul);
277            }
278            i += 1;
279        }
280        // SAFETY: We just checked that all properties hold.
281        Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) })
282    }
283
284    /// Creates a [`CStr`] from a `[u8]` without performing any additional
285    /// checks.
286    ///
287    /// # Safety
288    ///
289    /// `bytes` *must* end with a `NUL` byte, and should only have a single
290    /// `NUL` byte (or the string will be truncated).
291    #[inline]
292    pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr {
293        // SAFETY: Properties of `bytes` guaranteed by the safety precondition.
294        unsafe { core::mem::transmute(bytes) }
295    }
296
297    /// Creates a mutable [`CStr`] from a `[u8]` without performing any
298    /// additional checks.
299    ///
300    /// # Safety
301    ///
302    /// `bytes` *must* end with a `NUL` byte, and should only have a single
303    /// `NUL` byte (or the string will be truncated).
304    #[inline]
305    pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr {
306        // SAFETY: Properties of `bytes` guaranteed by the safety precondition.
307        unsafe { &mut *(core::ptr::from_mut(bytes) as *mut CStr) }
308    }
309
310    /// Returns a C pointer to the string.
311    ///
312    /// Using this function in a const context is deprecated in favor of
313    /// [`as_char_ptr_in_const_context`] in preparation for replacing `CStr` with `core::ffi::CStr`
314    /// which does not have this method.
315    #[inline]
316    pub const fn as_char_ptr(&self) -> *const c_char {
317        as_char_ptr_in_const_context(self)
318    }
319
320    /// Convert the string to a byte slice without the trailing `NUL` byte.
321    #[inline]
322    pub fn to_bytes(&self) -> &[u8] {
323        &self.0[..self.len()]
324    }
325
326    /// Convert the string to a byte slice without the trailing `NUL` byte.
327    ///
328    /// This function is deprecated in favor of [`Self::to_bytes`] in preparation for replacing
329    /// `CStr` with `core::ffi::CStr` which does not have this method.
330    #[inline]
331    pub fn as_bytes(&self) -> &[u8] {
332        self.to_bytes()
333    }
334
335    /// Convert the string to a byte slice containing the trailing `NUL` byte.
336    #[inline]
337    pub const fn to_bytes_with_nul(&self) -> &[u8] {
338        &self.0
339    }
340
341    /// Convert the string to a byte slice containing the trailing `NUL` byte.
342    ///
343    /// This function is deprecated in favor of [`Self::to_bytes_with_nul`] in preparation for
344    /// replacing `CStr` with `core::ffi::CStr` which does not have this method.
345    #[inline]
346    pub const fn as_bytes_with_nul(&self) -> &[u8] {
347        self.to_bytes_with_nul()
348    }
349
350    /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8.
351    ///
352    /// If the contents of the [`CStr`] are valid UTF-8 data, this
353    /// function will return the corresponding [`&str`] slice. Otherwise,
354    /// it will return an error with details of where UTF-8 validation failed.
355    ///
356    /// # Examples
357    ///
358    /// ```
359    /// # use kernel::str::CStr;
360    /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?;
361    /// assert_eq!(cstr.to_str(), Ok("foo"));
362    /// # Ok::<(), kernel::error::Error>(())
363    /// ```
364    #[inline]
365    pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> {
366        core::str::from_utf8(self.as_bytes())
367    }
368
369    /// Unsafely convert this [`CStr`] into a [`&str`], without checking for
370    /// valid UTF-8.
371    ///
372    /// # Safety
373    ///
374    /// The contents must be valid UTF-8.
375    ///
376    /// # Examples
377    ///
378    /// ```
379    /// # use kernel::c_str;
380    /// # use kernel::str::CStr;
381    /// let bar = c_str!("ツ");
382    /// // SAFETY: String literals are guaranteed to be valid UTF-8
383    /// // by the Rust compiler.
384    /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
385    /// ```
386    #[inline]
387    pub unsafe fn as_str_unchecked(&self) -> &str {
388        // SAFETY: TODO.
389        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
390    }
391
392    /// Convert this [`CStr`] into a [`CString`] by allocating memory and
393    /// copying over the string data.
394    pub fn to_cstring(&self) -> Result<CString, AllocError> {
395        CString::try_from(self)
396    }
397
398    /// Converts this [`CStr`] to its ASCII lower case equivalent in-place.
399    ///
400    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
401    /// but non-ASCII letters are unchanged.
402    ///
403    /// To return a new lowercased value without modifying the existing one, use
404    /// [`to_ascii_lowercase()`].
405    ///
406    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
407    pub fn make_ascii_lowercase(&mut self) {
408        // INVARIANT: This doesn't introduce or remove NUL bytes in the C
409        // string.
410        self.0.make_ascii_lowercase();
411    }
412
413    /// Converts this [`CStr`] to its ASCII upper case equivalent in-place.
414    ///
415    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
416    /// but non-ASCII letters are unchanged.
417    ///
418    /// To return a new uppercased value without modifying the existing one, use
419    /// [`to_ascii_uppercase()`].
420    ///
421    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
422    pub fn make_ascii_uppercase(&mut self) {
423        // INVARIANT: This doesn't introduce or remove NUL bytes in the C
424        // string.
425        self.0.make_ascii_uppercase();
426    }
427
428    /// Returns a copy of this [`CString`] where each character is mapped to its
429    /// ASCII lower case equivalent.
430    ///
431    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
432    /// but non-ASCII letters are unchanged.
433    ///
434    /// To lowercase the value in-place, use [`make_ascii_lowercase`].
435    ///
436    /// [`make_ascii_lowercase`]: str::make_ascii_lowercase
437    pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> {
438        let mut s = self.to_cstring()?;
439
440        s.make_ascii_lowercase();
441
442        Ok(s)
443    }
444
445    /// Returns a copy of this [`CString`] where each character is mapped to its
446    /// ASCII upper case equivalent.
447    ///
448    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
449    /// but non-ASCII letters are unchanged.
450    ///
451    /// To uppercase the value in-place, use [`make_ascii_uppercase`].
452    ///
453    /// [`make_ascii_uppercase`]: str::make_ascii_uppercase
454    pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> {
455        let mut s = self.to_cstring()?;
456
457        s.make_ascii_uppercase();
458
459        Ok(s)
460    }
461}
462
463impl fmt::Display for CStr {
464    /// Formats printable ASCII characters, escaping the rest.
465    ///
466    /// ```
467    /// # use kernel::c_str;
468    /// # use kernel::prelude::fmt;
469    /// # use kernel::str::CStr;
470    /// # use kernel::str::CString;
471    /// let penguin = c_str!("🐧");
472    /// let s = CString::try_from_fmt(fmt!("{penguin}"))?;
473    /// assert_eq!(s.to_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes());
474    ///
475    /// let ascii = c_str!("so \"cool\"");
476    /// let s = CString::try_from_fmt(fmt!("{ascii}"))?;
477    /// assert_eq!(s.to_bytes_with_nul(), "so \"cool\"\0".as_bytes());
478    /// # Ok::<(), kernel::error::Error>(())
479    /// ```
480    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
481        for &c in self.to_bytes() {
482            if (0x20..0x7f).contains(&c) {
483                // Printable character.
484                f.write_char(c as char)?;
485            } else {
486                write!(f, "\\x{c:02x}")?;
487            }
488        }
489        Ok(())
490    }
491}
492
493impl fmt::Debug for CStr {
494    /// Formats printable ASCII characters with a double quote on either end, escaping the rest.
495    ///
496    /// ```
497    /// # use kernel::c_str;
498    /// # use kernel::prelude::fmt;
499    /// # use kernel::str::CStr;
500    /// # use kernel::str::CString;
501    /// let penguin = c_str!("🐧");
502    /// let s = CString::try_from_fmt(fmt!("{penguin:?}"))?;
503    /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes());
504    ///
505    /// // Embedded double quotes are escaped.
506    /// let ascii = c_str!("so \"cool\"");
507    /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?;
508    /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes());
509    /// # Ok::<(), kernel::error::Error>(())
510    /// ```
511    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
512        f.write_str("\"")?;
513        for &c in self.as_bytes() {
514            match c {
515                // Printable characters.
516                b'\"' => f.write_str("\\\"")?,
517                0x20..=0x7e => f.write_char(c as char)?,
518                _ => write!(f, "\\x{c:02x}")?,
519            }
520        }
521        f.write_str("\"")
522    }
523}
524
525impl AsRef<BStr> for CStr {
526    #[inline]
527    fn as_ref(&self) -> &BStr {
528        BStr::from_bytes(self.as_bytes())
529    }
530}
531
532impl Deref for CStr {
533    type Target = BStr;
534
535    #[inline]
536    fn deref(&self) -> &Self::Target {
537        self.as_ref()
538    }
539}
540
541impl Index<ops::RangeFrom<usize>> for CStr {
542    type Output = CStr;
543
544    #[inline]
545    fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output {
546        // Delegate bounds checking to slice.
547        // Assign to _ to mute clippy's unnecessary operation warning.
548        let _ = &self.as_bytes()[index.start..];
549        // SAFETY: We just checked the bounds.
550        unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) }
551    }
552}
553
554impl Index<ops::RangeFull> for CStr {
555    type Output = CStr;
556
557    #[inline]
558    fn index(&self, _index: ops::RangeFull) -> &Self::Output {
559        self
560    }
561}
562
563mod private {
564    use core::ops;
565
566    // Marker trait for index types that can be forward to `BStr`.
567    pub trait CStrIndex {}
568
569    impl CStrIndex for usize {}
570    impl CStrIndex for ops::Range<usize> {}
571    impl CStrIndex for ops::RangeInclusive<usize> {}
572    impl CStrIndex for ops::RangeToInclusive<usize> {}
573}
574
575impl<Idx> Index<Idx> for CStr
576where
577    Idx: private::CStrIndex,
578    BStr: Index<Idx>,
579{
580    type Output = <BStr as Index<Idx>>::Output;
581
582    #[inline]
583    fn index(&self, index: Idx) -> &Self::Output {
584        &self.as_ref()[index]
585    }
586}
587
588/// Creates a new [`CStr`] from a string literal.
589///
590/// The string literal should not contain any `NUL` bytes.
591///
592/// # Examples
593///
594/// ```
595/// # use kernel::c_str;
596/// # use kernel::str::CStr;
597/// const MY_CSTR: &CStr = c_str!("My awesome CStr!");
598/// ```
599#[macro_export]
600macro_rules! c_str {
601    ($str:expr) => {{
602        const S: &str = concat!($str, "\0");
603        const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) {
604            Ok(v) => v,
605            Err(_) => panic!("string contains interior NUL"),
606        };
607        C
608    }};
609}
610
611#[kunit_tests(rust_kernel_str)]
612mod tests {
613    use super::*;
614
615    macro_rules! format {
616        ($($f:tt)*) => ({
617            CString::try_from_fmt(fmt!($($f)*))?.to_str()?
618        })
619    }
620
621    const ALL_ASCII_CHARS: &str =
622        "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\
623        \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \
624        !\"#$%&'()*+,-./0123456789:;<=>?@\
625        ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\
626        \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\
627        \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\
628        \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\
629        \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\
630        \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\
631        \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\
632        \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\
633        \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff";
634
635    #[test]
636    fn test_cstr_to_str() -> Result {
637        let good_bytes = b"\xf0\x9f\xa6\x80\0";
638        let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?;
639        let checked_str = checked_cstr.to_str()?;
640        assert_eq!(checked_str, "🦀");
641        Ok(())
642    }
643
644    #[test]
645    fn test_cstr_to_str_invalid_utf8() -> Result {
646        let bad_bytes = b"\xc3\x28\0";
647        let checked_cstr = CStr::from_bytes_with_nul(bad_bytes)?;
648        assert!(checked_cstr.to_str().is_err());
649        Ok(())
650    }
651
652    #[test]
653    fn test_cstr_as_str_unchecked() -> Result {
654        let good_bytes = b"\xf0\x9f\x90\xA7\0";
655        let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?;
656        // SAFETY: The contents come from a string literal which contains valid UTF-8.
657        let unchecked_str = unsafe { checked_cstr.as_str_unchecked() };
658        assert_eq!(unchecked_str, "🐧");
659        Ok(())
660    }
661
662    #[test]
663    fn test_cstr_display() -> Result {
664        let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?;
665        assert_eq!(format!("{hello_world}"), "hello, world!");
666        let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?;
667        assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a");
668        let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?;
669        assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu");
670        let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?;
671        assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80");
672        Ok(())
673    }
674
675    #[test]
676    fn test_cstr_display_all_bytes() -> Result {
677        let mut bytes: [u8; 256] = [0; 256];
678        // fill `bytes` with [1..=255] + [0]
679        for i in u8::MIN..=u8::MAX {
680            bytes[i as usize] = i.wrapping_add(1);
681        }
682        let cstr = CStr::from_bytes_with_nul(&bytes)?;
683        assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS);
684        Ok(())
685    }
686
687    #[test]
688    fn test_cstr_debug() -> Result {
689        let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?;
690        assert_eq!(format!("{hello_world:?}"), "\"hello, world!\"");
691        let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?;
692        assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\"");
693        let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?;
694        assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\"");
695        let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?;
696        assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\"");
697        Ok(())
698    }
699
700    #[test]
701    fn test_bstr_display() -> Result {
702        let hello_world = BStr::from_bytes(b"hello, world!");
703        assert_eq!(format!("{hello_world}"), "hello, world!");
704        let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
705        assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_");
706        let others = BStr::from_bytes(b"\x01");
707        assert_eq!(format!("{others}"), "\\x01");
708        let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
709        assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu");
710        let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
711        assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80");
712        Ok(())
713    }
714
715    #[test]
716    fn test_bstr_debug() -> Result {
717        let hello_world = BStr::from_bytes(b"hello, world!");
718        assert_eq!(format!("{hello_world:?}"), "\"hello, world!\"");
719        let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
720        assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\"");
721        let others = BStr::from_bytes(b"\x01");
722        assert_eq!(format!("{others:?}"), "\"\\x01\"");
723        let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
724        assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\"");
725        let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
726        assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\"");
727        Ok(())
728    }
729}
730
731/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
732///
733/// It does not fail if callers write past the end of the buffer so that they can calculate the
734/// size required to fit everything.
735///
736/// # Invariants
737///
738/// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos`
739/// is less than `end`.
740pub struct RawFormatter {
741    // Use `usize` to use `saturating_*` functions.
742    beg: usize,
743    pos: usize,
744    end: usize,
745}
746
747impl RawFormatter {
748    /// Creates a new instance of [`RawFormatter`] with an empty buffer.
749    fn new() -> Self {
750        // INVARIANT: The buffer is empty, so the region that needs to be writable is empty.
751        Self {
752            beg: 0,
753            pos: 0,
754            end: 0,
755        }
756    }
757
758    /// Creates a new instance of [`RawFormatter`] with the given buffer pointers.
759    ///
760    /// # Safety
761    ///
762    /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end`
763    /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`].
764    pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self {
765        // INVARIANT: The safety requirements guarantee the type invariants.
766        Self {
767            beg: pos as usize,
768            pos: pos as usize,
769            end: end as usize,
770        }
771    }
772
773    /// Creates a new instance of [`RawFormatter`] with the given buffer.
774    ///
775    /// # Safety
776    ///
777    /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes
778    /// for the lifetime of the returned [`RawFormatter`].
779    pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self {
780        let pos = buf as usize;
781        // INVARIANT: We ensure that `end` is never less than `buf`, and the safety requirements
782        // guarantees that the memory region is valid for writes.
783        Self {
784            pos,
785            beg: pos,
786            end: pos.saturating_add(len),
787        }
788    }
789
790    /// Returns the current insert position.
791    ///
792    /// N.B. It may point to invalid memory.
793    pub(crate) fn pos(&self) -> *mut u8 {
794        self.pos as *mut u8
795    }
796
797    /// Returns the number of bytes written to the formatter.
798    pub fn bytes_written(&self) -> usize {
799        self.pos - self.beg
800    }
801}
802
803impl fmt::Write for RawFormatter {
804    fn write_str(&mut self, s: &str) -> fmt::Result {
805        // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we
806        // don't want it to wrap around to 0.
807        let pos_new = self.pos.saturating_add(s.len());
808
809        // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`.
810        let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos);
811
812        if len_to_copy > 0 {
813            // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end`
814            // yet, so it is valid for write per the type invariants.
815            unsafe {
816                core::ptr::copy_nonoverlapping(
817                    s.as_bytes().as_ptr(),
818                    self.pos as *mut u8,
819                    len_to_copy,
820                )
821            };
822        }
823
824        self.pos = pos_new;
825        Ok(())
826    }
827}
828
829/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
830///
831/// Fails if callers attempt to write more than will fit in the buffer.
832pub struct Formatter<'a>(RawFormatter, PhantomData<&'a mut ()>);
833
834impl Formatter<'_> {
835    /// Creates a new instance of [`Formatter`] with the given buffer.
836    ///
837    /// # Safety
838    ///
839    /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes
840    /// for the lifetime of the returned [`Formatter`].
841    pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self {
842        // SAFETY: The safety requirements of this function satisfy those of the callee.
843        Self(unsafe { RawFormatter::from_buffer(buf, len) }, PhantomData)
844    }
845
846    /// Create a new [`Self`] instance.
847    pub fn new(buffer: &mut [u8]) -> Self {
848        // SAFETY: `buffer` is valid for writes for the entire length for
849        // the lifetime of `Self`.
850        unsafe { Formatter::from_buffer(buffer.as_mut_ptr(), buffer.len()) }
851    }
852}
853
854impl Deref for Formatter<'_> {
855    type Target = RawFormatter;
856
857    fn deref(&self) -> &Self::Target {
858        &self.0
859    }
860}
861
862impl fmt::Write for Formatter<'_> {
863    fn write_str(&mut self, s: &str) -> fmt::Result {
864        self.0.write_str(s)?;
865
866        // Fail the request if we go past the end of the buffer.
867        if self.0.pos > self.0.end {
868            Err(fmt::Error)
869        } else {
870            Ok(())
871        }
872    }
873}
874
875/// A mutable reference to a byte buffer where a string can be written into.
876///
877/// The buffer will be automatically null terminated after the last written character.
878///
879/// # Invariants
880///
881/// * The first byte of `buffer` is always zero.
882/// * The length of `buffer` is at least 1.
883pub(crate) struct NullTerminatedFormatter<'a> {
884    buffer: &'a mut [u8],
885}
886
887impl<'a> NullTerminatedFormatter<'a> {
888    /// Create a new [`Self`] instance.
889    pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {
890        *(buffer.first_mut()?) = 0;
891
892        // INVARIANT:
893        //  - We wrote zero to the first byte above.
894        //  - If buffer was not at least length 1, `buffer.first_mut()` would return None.
895        Some(Self { buffer })
896    }
897}
898
899impl Write for NullTerminatedFormatter<'_> {
900    fn write_str(&mut self, s: &str) -> fmt::Result {
901        let bytes = s.as_bytes();
902        let len = bytes.len();
903
904        // We want space for a zero. By type invariant, buffer length is always at least 1, so no
905        // underflow.
906        if len > self.buffer.len() - 1 {
907            return Err(fmt::Error);
908        }
909
910        let buffer = core::mem::take(&mut self.buffer);
911        // We break the zero start invariant for a short while.
912        buffer[..len].copy_from_slice(bytes);
913        // INVARIANT: We checked above that buffer will have size at least 1 after this assignment.
914        self.buffer = &mut buffer[len..];
915
916        // INVARIANT: We write zero to the first byte of the buffer.
917        self.buffer[0] = 0;
918
919        Ok(())
920    }
921}
922
923/// # Safety
924///
925/// - `string` must point to a null terminated string that is valid for read.
926unsafe fn kstrtobool_raw(string: *const u8) -> Result<bool> {
927    let mut result: bool = false;
928
929    // SAFETY:
930    // - By function safety requirement, `string` is a valid null-terminated string.
931    // - `result` is a valid `bool` that we own.
932    to_result(unsafe { bindings::kstrtobool(string, &mut result) })?;
933    Ok(result)
934}
935
936/// Convert common user inputs into boolean values using the kernel's `kstrtobool` function.
937///
938/// This routine returns `Ok(bool)` if the first character is one of 'YyTt1NnFf0', or
939/// \[oO\]\[NnFf\] for "on" and "off". Otherwise it will return `Err(EINVAL)`.
940///
941/// # Examples
942///
943/// ```
944/// # use kernel::{c_str, str::kstrtobool};
945///
946/// // Lowercase
947/// assert_eq!(kstrtobool(c_str!("true")), Ok(true));
948/// assert_eq!(kstrtobool(c_str!("tr")), Ok(true));
949/// assert_eq!(kstrtobool(c_str!("t")), Ok(true));
950/// assert_eq!(kstrtobool(c_str!("twrong")), Ok(true));
951/// assert_eq!(kstrtobool(c_str!("false")), Ok(false));
952/// assert_eq!(kstrtobool(c_str!("f")), Ok(false));
953/// assert_eq!(kstrtobool(c_str!("yes")), Ok(true));
954/// assert_eq!(kstrtobool(c_str!("no")), Ok(false));
955/// assert_eq!(kstrtobool(c_str!("on")), Ok(true));
956/// assert_eq!(kstrtobool(c_str!("off")), Ok(false));
957///
958/// // Camel case
959/// assert_eq!(kstrtobool(c_str!("True")), Ok(true));
960/// assert_eq!(kstrtobool(c_str!("False")), Ok(false));
961/// assert_eq!(kstrtobool(c_str!("Yes")), Ok(true));
962/// assert_eq!(kstrtobool(c_str!("No")), Ok(false));
963/// assert_eq!(kstrtobool(c_str!("On")), Ok(true));
964/// assert_eq!(kstrtobool(c_str!("Off")), Ok(false));
965///
966/// // All caps
967/// assert_eq!(kstrtobool(c_str!("TRUE")), Ok(true));
968/// assert_eq!(kstrtobool(c_str!("FALSE")), Ok(false));
969/// assert_eq!(kstrtobool(c_str!("YES")), Ok(true));
970/// assert_eq!(kstrtobool(c_str!("NO")), Ok(false));
971/// assert_eq!(kstrtobool(c_str!("ON")), Ok(true));
972/// assert_eq!(kstrtobool(c_str!("OFF")), Ok(false));
973///
974/// // Numeric
975/// assert_eq!(kstrtobool(c_str!("1")), Ok(true));
976/// assert_eq!(kstrtobool(c_str!("0")), Ok(false));
977///
978/// // Invalid input
979/// assert_eq!(kstrtobool(c_str!("invalid")), Err(EINVAL));
980/// assert_eq!(kstrtobool(c_str!("2")), Err(EINVAL));
981/// ```
982pub fn kstrtobool(string: &CStr) -> Result<bool> {
983    // SAFETY:
984    // - The pointer returned by `CStr::as_char_ptr` is guaranteed to be
985    //   null terminated.
986    // - `string` is live and thus the string is valid for read.
987    unsafe { kstrtobool_raw(string.as_char_ptr()) }
988}
989
990/// Convert `&[u8]` to `bool` by deferring to [`kernel::str::kstrtobool`].
991///
992/// Only considers at most the first two bytes of `bytes`.
993pub fn kstrtobool_bytes(bytes: &[u8]) -> Result<bool> {
994    // `ktostrbool` only considers the first two bytes of the input.
995    let stack_string = [*bytes.first().unwrap_or(&0), *bytes.get(1).unwrap_or(&0), 0];
996    // SAFETY: `stack_string` is null terminated and it is live on the stack so
997    // it is valid for read.
998    unsafe { kstrtobool_raw(stack_string.as_ptr()) }
999}
1000
1001/// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end.
1002///
1003/// Used for interoperability with kernel APIs that take C strings.
1004///
1005/// # Invariants
1006///
1007/// The string is always `NUL`-terminated and contains no other `NUL` bytes.
1008///
1009/// # Examples
1010///
1011/// ```
1012/// use kernel::{str::CString, prelude::fmt};
1013///
1014/// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?;
1015/// assert_eq!(s.to_bytes_with_nul(), "abc1020\0".as_bytes());
1016///
1017/// let tmp = "testing";
1018/// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?;
1019/// assert_eq!(s.to_bytes_with_nul(), "testing123\0".as_bytes());
1020///
1021/// // This fails because it has an embedded `NUL` byte.
1022/// let s = CString::try_from_fmt(fmt!("a\0b{}", 123));
1023/// assert_eq!(s.is_ok(), false);
1024/// # Ok::<(), kernel::error::Error>(())
1025/// ```
1026pub struct CString {
1027    buf: KVec<u8>,
1028}
1029
1030impl CString {
1031    /// Creates an instance of [`CString`] from the given formatted arguments.
1032    pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> {
1033        // Calculate the size needed (formatted string plus `NUL` terminator).
1034        let mut f = RawFormatter::new();
1035        f.write_fmt(args)?;
1036        f.write_str("\0")?;
1037        let size = f.bytes_written();
1038
1039        // Allocate a vector with the required number of bytes, and write to it.
1040        let mut buf = KVec::with_capacity(size, GFP_KERNEL)?;
1041        // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes.
1042        let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) };
1043        f.write_fmt(args)?;
1044        f.write_str("\0")?;
1045
1046        // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is
1047        // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`.
1048        unsafe { buf.inc_len(f.bytes_written()) };
1049
1050        // Check that there are no `NUL` bytes before the end.
1051        // SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size`
1052        // (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator)
1053        // so `f.bytes_written() - 1` doesn't underflow.
1054        let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) };
1055        if !ptr.is_null() {
1056            return Err(EINVAL);
1057        }
1058
1059        // INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes
1060        // exist in the buffer.
1061        Ok(Self { buf })
1062    }
1063}
1064
1065impl Deref for CString {
1066    type Target = CStr;
1067
1068    fn deref(&self) -> &Self::Target {
1069        // SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no
1070        // other `NUL` bytes exist.
1071        unsafe { CStr::from_bytes_with_nul_unchecked(self.buf.as_slice()) }
1072    }
1073}
1074
1075impl DerefMut for CString {
1076    fn deref_mut(&mut self) -> &mut Self::Target {
1077        // SAFETY: A `CString` is always NUL-terminated and contains no other
1078        // NUL bytes.
1079        unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) }
1080    }
1081}
1082
1083impl<'a> TryFrom<&'a CStr> for CString {
1084    type Error = AllocError;
1085
1086    fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> {
1087        let mut buf = KVec::new();
1088
1089        buf.extend_from_slice(cstr.to_bytes_with_nul(), GFP_KERNEL)?;
1090
1091        // INVARIANT: The `CStr` and `CString` types have the same invariants for
1092        // the string data, and we copied it over without changes.
1093        Ok(CString { buf })
1094    }
1095}
1096
1097impl fmt::Debug for CString {
1098    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1099        fmt::Debug::fmt(&**self, f)
1100    }
1101}