kernel/
uaccess.rs

1// SPDX-License-Identifier: GPL-2.0
2
3//! Slices to user space memory regions.
4//!
5//! C header: [`include/linux/uaccess.h`](srctree/include/linux/uaccess.h)
6
7use crate::{
8    alloc::{Allocator, Flags},
9    bindings,
10    error::Result,
11    ffi::{c_char, c_void},
12    fs::file,
13    prelude::*,
14    transmute::{AsBytes, FromBytes},
15};
16use core::mem::{size_of, MaybeUninit};
17
18/// A pointer into userspace.
19///
20/// This is the Rust equivalent to C pointers tagged with `__user`.
21#[repr(transparent)]
22#[derive(Copy, Clone)]
23pub struct UserPtr(*mut c_void);
24
25impl UserPtr {
26    /// Create a `UserPtr` from an integer representing the userspace address.
27    #[inline]
28    pub fn from_addr(addr: usize) -> Self {
29        Self(addr as *mut c_void)
30    }
31
32    /// Create a `UserPtr` from a pointer representing the userspace address.
33    #[inline]
34    pub fn from_ptr(addr: *mut c_void) -> Self {
35        Self(addr)
36    }
37
38    /// Cast this userspace pointer to a raw const void pointer.
39    ///
40    /// It is up to the caller to use the returned pointer correctly.
41    #[inline]
42    pub fn as_const_ptr(self) -> *const c_void {
43        self.0
44    }
45
46    /// Cast this userspace pointer to a raw mutable void pointer.
47    ///
48    /// It is up to the caller to use the returned pointer correctly.
49    #[inline]
50    pub fn as_mut_ptr(self) -> *mut c_void {
51        self.0
52    }
53
54    /// Increment this user pointer by `add` bytes.
55    ///
56    /// This addition is wrapping, so wrapping around the address space does not result in a panic
57    /// even if `CONFIG_RUST_OVERFLOW_CHECKS` is enabled.
58    #[inline]
59    pub fn wrapping_byte_add(self, add: usize) -> UserPtr {
60        UserPtr(self.0.wrapping_byte_add(add))
61    }
62}
63
64/// A pointer to an area in userspace memory, which can be either read-only or read-write.
65///
66/// All methods on this struct are safe: attempting to read or write on bad addresses (either out of
67/// the bound of the slice or unmapped addresses) will return [`EFAULT`]. Concurrent access,
68/// *including data races to/from userspace memory*, is permitted, because fundamentally another
69/// userspace thread/process could always be modifying memory at the same time (in the same way that
70/// userspace Rust's [`std::io`] permits data races with the contents of files on disk). In the
71/// presence of a race, the exact byte values read/written are unspecified but the operation is
72/// well-defined. Kernelspace code should validate its copy of data after completing a read, and not
73/// expect that multiple reads of the same address will return the same value.
74///
75/// These APIs are designed to make it difficult to accidentally write TOCTOU (time-of-check to
76/// time-of-use) bugs. Every time a memory location is read, the reader's position is advanced by
77/// the read length and the next read will start from there. This helps prevent accidentally reading
78/// the same location twice and causing a TOCTOU bug.
79///
80/// Creating a [`UserSliceReader`] and/or [`UserSliceWriter`] consumes the `UserSlice`, helping
81/// ensure that there aren't multiple readers or writers to the same location.
82///
83/// If double-fetching a memory location is necessary for some reason, then that is done by creating
84/// multiple readers to the same memory location, e.g. using [`clone_reader`].
85///
86/// # Examples
87///
88/// Takes a region of userspace memory from the current process, and modify it by adding one to
89/// every byte in the region.
90///
91/// ```no_run
92/// use kernel::ffi::c_void;
93/// use kernel::uaccess::{UserPtr, UserSlice};
94///
95/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result {
96///     let (read, mut write) = UserSlice::new(uptr, len).reader_writer();
97///
98///     let mut buf = KVec::new();
99///     read.read_all(&mut buf, GFP_KERNEL)?;
100///
101///     for b in &mut buf {
102///         *b = b.wrapping_add(1);
103///     }
104///
105///     write.write_slice(&buf)?;
106///     Ok(())
107/// }
108/// ```
109///
110/// Example illustrating a TOCTOU (time-of-check to time-of-use) bug.
111///
112/// ```no_run
113/// use kernel::ffi::c_void;
114/// use kernel::uaccess::{UserPtr, UserSlice};
115///
116/// /// Returns whether the data in this region is valid.
117/// fn is_valid(uptr: UserPtr, len: usize) -> Result<bool> {
118///     let read = UserSlice::new(uptr, len).reader();
119///
120///     let mut buf = KVec::new();
121///     read.read_all(&mut buf, GFP_KERNEL)?;
122///
123///     todo!()
124/// }
125///
126/// /// Returns the bytes behind this user pointer if they are valid.
127/// fn get_bytes_if_valid(uptr: UserPtr, len: usize) -> Result<KVec<u8>> {
128///     if !is_valid(uptr, len)? {
129///         return Err(EINVAL);
130///     }
131///
132///     let read = UserSlice::new(uptr, len).reader();
133///
134///     let mut buf = KVec::new();
135///     read.read_all(&mut buf, GFP_KERNEL)?;
136///
137///     // THIS IS A BUG! The bytes could have changed since we checked them.
138///     //
139///     // To avoid this kind of bug, don't call `UserSlice::new` multiple
140///     // times with the same address.
141///     Ok(buf)
142/// }
143/// ```
144///
145/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html
146/// [`clone_reader`]: UserSliceReader::clone_reader
147pub struct UserSlice {
148    ptr: UserPtr,
149    length: usize,
150}
151
152impl UserSlice {
153    /// Constructs a user slice from a raw pointer and a length in bytes.
154    ///
155    /// Constructing a [`UserSlice`] performs no checks on the provided address and length, it can
156    /// safely be constructed inside a kernel thread with no current userspace process. Reads and
157    /// writes wrap the kernel APIs `copy_from_user` and `copy_to_user`, which check the memory map
158    /// of the current process and enforce that the address range is within the user range (no
159    /// additional calls to `access_ok` are needed). Validity of the pointer is checked when you
160    /// attempt to read or write, not in the call to `UserSlice::new`.
161    ///
162    /// Callers must be careful to avoid time-of-check-time-of-use (TOCTOU) issues. The simplest way
163    /// is to create a single instance of [`UserSlice`] per user memory block as it reads each byte
164    /// at most once.
165    pub fn new(ptr: UserPtr, length: usize) -> Self {
166        UserSlice { ptr, length }
167    }
168
169    /// Reads the entirety of the user slice, appending it to the end of the provided buffer.
170    ///
171    /// Fails with [`EFAULT`] if the read happens on a bad address.
172    pub fn read_all<A: Allocator>(self, buf: &mut Vec<u8, A>, flags: Flags) -> Result {
173        self.reader().read_all(buf, flags)
174    }
175
176    /// Constructs a [`UserSliceReader`].
177    pub fn reader(self) -> UserSliceReader {
178        UserSliceReader {
179            ptr: self.ptr,
180            length: self.length,
181        }
182    }
183
184    /// Constructs a [`UserSliceWriter`].
185    pub fn writer(self) -> UserSliceWriter {
186        UserSliceWriter {
187            ptr: self.ptr,
188            length: self.length,
189        }
190    }
191
192    /// Constructs both a [`UserSliceReader`] and a [`UserSliceWriter`].
193    ///
194    /// Usually when this is used, you will first read the data, and then overwrite it afterwards.
195    pub fn reader_writer(self) -> (UserSliceReader, UserSliceWriter) {
196        (
197            UserSliceReader {
198                ptr: self.ptr,
199                length: self.length,
200            },
201            UserSliceWriter {
202                ptr: self.ptr,
203                length: self.length,
204            },
205        )
206    }
207}
208
209/// A reader for [`UserSlice`].
210///
211/// Used to incrementally read from the user slice.
212pub struct UserSliceReader {
213    ptr: UserPtr,
214    length: usize,
215}
216
217impl UserSliceReader {
218    /// Skip the provided number of bytes.
219    ///
220    /// Returns an error if skipping more than the length of the buffer.
221    pub fn skip(&mut self, num_skip: usize) -> Result {
222        // Update `self.length` first since that's the fallible part of this operation.
223        self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?;
224        self.ptr = self.ptr.wrapping_byte_add(num_skip);
225        Ok(())
226    }
227
228    /// Create a reader that can access the same range of data.
229    ///
230    /// Reading from the clone does not advance the current reader.
231    ///
232    /// The caller should take care to not introduce TOCTOU issues, as described in the
233    /// documentation for [`UserSlice`].
234    pub fn clone_reader(&self) -> UserSliceReader {
235        UserSliceReader {
236            ptr: self.ptr,
237            length: self.length,
238        }
239    }
240
241    /// Returns the number of bytes left to be read from this reader.
242    ///
243    /// Note that even reading less than this number of bytes may fail.
244    pub fn len(&self) -> usize {
245        self.length
246    }
247
248    /// Returns `true` if no data is available in the io buffer.
249    pub fn is_empty(&self) -> bool {
250        self.length == 0
251    }
252
253    /// Reads raw data from the user slice into a kernel buffer.
254    ///
255    /// For a version that uses `&mut [u8]`, please see [`UserSliceReader::read_slice`].
256    ///
257    /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
258    /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error.
259    ///
260    /// # Guarantees
261    ///
262    /// After a successful call to this method, all bytes in `out` are initialized.
263    pub fn read_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> Result {
264        let len = out.len();
265        let out_ptr = out.as_mut_ptr().cast::<c_void>();
266        if len > self.length {
267            return Err(EFAULT);
268        }
269        // SAFETY: `out_ptr` points into a mutable slice of length `len`, so we may write
270        // that many bytes to it.
271        let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr.as_const_ptr(), len) };
272        if res != 0 {
273            return Err(EFAULT);
274        }
275        self.ptr = self.ptr.wrapping_byte_add(len);
276        self.length -= len;
277        Ok(())
278    }
279
280    /// Reads raw data from the user slice into a kernel buffer.
281    ///
282    /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
283    /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error.
284    pub fn read_slice(&mut self, out: &mut [u8]) -> Result {
285        // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to
286        // `out`.
287        let out = unsafe { &mut *(core::ptr::from_mut(out) as *mut [MaybeUninit<u8>]) };
288        self.read_raw(out)
289    }
290
291    /// Reads raw data from the user slice into a kernel buffer partially.
292    ///
293    /// This is the same as [`Self::read_slice`] but considers the given `offset` into `out` and
294    /// truncates the read to the boundaries of `self` and `out`.
295    ///
296    /// On success, returns the number of bytes read.
297    pub fn read_slice_partial(&mut self, out: &mut [u8], offset: usize) -> Result<usize> {
298        let end = offset.saturating_add(self.len()).min(out.len());
299
300        let Some(dst) = out.get_mut(offset..end) else {
301            return Ok(0);
302        };
303
304        self.read_slice(dst)?;
305        Ok(dst.len())
306    }
307
308    /// Reads raw data from the user slice into a kernel buffer partially.
309    ///
310    /// This is the same as [`Self::read_slice_partial`] but updates the given [`file::Offset`] by
311    /// the number of bytes read.
312    ///
313    /// This is equivalent to C's `simple_write_to_buffer()`.
314    ///
315    /// On success, returns the number of bytes read.
316    pub fn read_slice_file(&mut self, out: &mut [u8], offset: &mut file::Offset) -> Result<usize> {
317        if offset.is_negative() {
318            return Err(EINVAL);
319        }
320
321        let Ok(offset_index) = (*offset).try_into() else {
322            return Ok(0);
323        };
324
325        let read = self.read_slice_partial(out, offset_index)?;
326
327        // OVERFLOW: `offset + read <= data.len() <= isize::MAX <= Offset::MAX`
328        *offset += read as i64;
329
330        Ok(read)
331    }
332
333    /// Reads a value of the specified type.
334    ///
335    /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
336    /// bounds of this [`UserSliceReader`].
337    pub fn read<T: FromBytes>(&mut self) -> Result<T> {
338        let len = size_of::<T>();
339        if len > self.length {
340            return Err(EFAULT);
341        }
342        let mut out: MaybeUninit<T> = MaybeUninit::uninit();
343        // SAFETY: The local variable `out` is valid for writing `size_of::<T>()` bytes.
344        //
345        // By using the _copy_from_user variant, we skip the check_object_size check that verifies
346        // the kernel pointer. This mirrors the logic on the C side that skips the check when the
347        // length is a compile-time constant.
348        let res = unsafe {
349            bindings::_copy_from_user(
350                out.as_mut_ptr().cast::<c_void>(),
351                self.ptr.as_const_ptr(),
352                len,
353            )
354        };
355        if res != 0 {
356            return Err(EFAULT);
357        }
358        self.ptr = self.ptr.wrapping_byte_add(len);
359        self.length -= len;
360        // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements
361        // `FromBytes`, any bit-pattern is a valid value for this type.
362        Ok(unsafe { out.assume_init() })
363    }
364
365    /// Reads the entirety of the user slice, appending it to the end of the provided buffer.
366    ///
367    /// Fails with [`EFAULT`] if the read happens on a bad address.
368    pub fn read_all<A: Allocator>(mut self, buf: &mut Vec<u8, A>, flags: Flags) -> Result {
369        let len = self.length;
370        buf.reserve(len, flags)?;
371
372        // The call to `reserve` was successful, so the spare capacity is at least `len` bytes long.
373        self.read_raw(&mut buf.spare_capacity_mut()[..len])?;
374
375        // SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the
376        // vector have been initialized.
377        unsafe { buf.inc_len(len) };
378        Ok(())
379    }
380
381    /// Read a NUL-terminated string from userspace and return it.
382    ///
383    /// The string is read into `buf` and a NUL-terminator is added if the end of `buf` is reached.
384    /// Since there must be space to add a NUL-terminator, the buffer must not be empty. The
385    /// returned `&CStr` points into `buf`.
386    ///
387    /// Fails with [`EFAULT`] if the read happens on a bad address (some data may have been
388    /// copied).
389    #[doc(alias = "strncpy_from_user")]
390    pub fn strcpy_into_buf<'buf>(self, buf: &'buf mut [u8]) -> Result<&'buf CStr> {
391        if buf.is_empty() {
392            return Err(EINVAL);
393        }
394
395        // SAFETY: The types are compatible and `strncpy_from_user` doesn't write uninitialized
396        // bytes to `buf`.
397        let mut dst = unsafe { &mut *(core::ptr::from_mut(buf) as *mut [MaybeUninit<u8>]) };
398
399        // We never read more than `self.length` bytes.
400        if dst.len() > self.length {
401            dst = &mut dst[..self.length];
402        }
403
404        let mut len = raw_strncpy_from_user(dst, self.ptr)?;
405        if len < dst.len() {
406            // Add one to include the NUL-terminator.
407            len += 1;
408        } else if len < buf.len() {
409            // This implies that `len == dst.len() < buf.len()`.
410            //
411            // This means that we could not fill the entire buffer, but we had to stop reading
412            // because we hit the `self.length` limit of this `UserSliceReader`. Since we did not
413            // fill the buffer, we treat this case as if we tried to read past the `self.length`
414            // limit and received a page fault, which is consistent with other `UserSliceReader`
415            // methods that also return page faults when you exceed `self.length`.
416            return Err(EFAULT);
417        } else {
418            // This implies that `len == buf.len()`.
419            //
420            // This means that we filled the buffer exactly. In this case, we add a NUL-terminator
421            // and return it. Unlike the `len < dst.len()` branch, don't modify `len` because it
422            // already represents the length including the NUL-terminator.
423            //
424            // SAFETY: Due to the check at the beginning, the buffer is not empty.
425            unsafe { *buf.last_mut().unwrap_unchecked() = 0 };
426        }
427
428        // This method consumes `self`, so it can only be called once, thus we do not need to
429        // update `self.length`. This sidesteps concerns such as whether `self.length` should be
430        // incremented by `len` or `len-1` in the `len == buf.len()` case.
431
432        // SAFETY: There are two cases:
433        // * If we hit the `len < dst.len()` case, then `raw_strncpy_from_user` guarantees that
434        //   this slice contains exactly one NUL byte at the end of the string.
435        // * Otherwise, `raw_strncpy_from_user` guarantees that the string contained no NUL bytes,
436        //   and we have since added a NUL byte at the end.
437        Ok(unsafe { CStr::from_bytes_with_nul_unchecked(&buf[..len]) })
438    }
439}
440
441/// A writer for [`UserSlice`].
442///
443/// Used to incrementally write into the user slice.
444pub struct UserSliceWriter {
445    ptr: UserPtr,
446    length: usize,
447}
448
449impl UserSliceWriter {
450    /// Returns the amount of space remaining in this buffer.
451    ///
452    /// Note that even writing less than this number of bytes may fail.
453    pub fn len(&self) -> usize {
454        self.length
455    }
456
457    /// Returns `true` if no more data can be written to this buffer.
458    pub fn is_empty(&self) -> bool {
459        self.length == 0
460    }
461
462    /// Writes raw data to this user pointer from a kernel buffer.
463    ///
464    /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
465    /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
466    /// if it returns an error.
467    pub fn write_slice(&mut self, data: &[u8]) -> Result {
468        let len = data.len();
469        let data_ptr = data.as_ptr().cast::<c_void>();
470        if len > self.length {
471            return Err(EFAULT);
472        }
473        // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read
474        // that many bytes from it.
475        let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) };
476        if res != 0 {
477            return Err(EFAULT);
478        }
479        self.ptr = self.ptr.wrapping_byte_add(len);
480        self.length -= len;
481        Ok(())
482    }
483
484    /// Writes raw data to this user pointer from a kernel buffer partially.
485    ///
486    /// This is the same as [`Self::write_slice`] but considers the given `offset` into `data` and
487    /// truncates the write to the boundaries of `self` and `data`.
488    ///
489    /// On success, returns the number of bytes written.
490    pub fn write_slice_partial(&mut self, data: &[u8], offset: usize) -> Result<usize> {
491        let end = offset.saturating_add(self.len()).min(data.len());
492
493        let Some(src) = data.get(offset..end) else {
494            return Ok(0);
495        };
496
497        self.write_slice(src)?;
498        Ok(src.len())
499    }
500
501    /// Writes raw data to this user pointer from a kernel buffer partially.
502    ///
503    /// This is the same as [`Self::write_slice_partial`] but updates the given [`file::Offset`] by
504    /// the number of bytes written.
505    ///
506    /// This is equivalent to C's `simple_read_from_buffer()`.
507    ///
508    /// On success, returns the number of bytes written.
509    pub fn write_slice_file(&mut self, data: &[u8], offset: &mut file::Offset) -> Result<usize> {
510        if offset.is_negative() {
511            return Err(EINVAL);
512        }
513
514        let Ok(offset_index) = (*offset).try_into() else {
515            return Ok(0);
516        };
517
518        let written = self.write_slice_partial(data, offset_index)?;
519
520        // OVERFLOW: `offset + written <= data.len() <= isize::MAX <= Offset::MAX`
521        *offset += written as i64;
522
523        Ok(written)
524    }
525
526    /// Writes the provided Rust value to this userspace pointer.
527    ///
528    /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
529    /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
530    /// if it returns an error.
531    pub fn write<T: AsBytes>(&mut self, value: &T) -> Result {
532        let len = size_of::<T>();
533        if len > self.length {
534            return Err(EFAULT);
535        }
536        // SAFETY: The reference points to a value of type `T`, so it is valid for reading
537        // `size_of::<T>()` bytes.
538        //
539        // By using the _copy_to_user variant, we skip the check_object_size check that verifies the
540        // kernel pointer. This mirrors the logic on the C side that skips the check when the length
541        // is a compile-time constant.
542        let res = unsafe {
543            bindings::_copy_to_user(
544                self.ptr.as_mut_ptr(),
545                core::ptr::from_ref(value).cast::<c_void>(),
546                len,
547            )
548        };
549        if res != 0 {
550            return Err(EFAULT);
551        }
552        self.ptr = self.ptr.wrapping_byte_add(len);
553        self.length -= len;
554        Ok(())
555    }
556}
557
558/// Reads a nul-terminated string into `dst` and returns the length.
559///
560/// This reads from userspace until a NUL byte is encountered, or until `dst.len()` bytes have been
561/// read. Fails with [`EFAULT`] if a read happens on a bad address (some data may have been
562/// copied). When the end of the buffer is encountered, no NUL byte is added, so the string is
563/// *not* guaranteed to be NUL-terminated when `Ok(dst.len())` is returned.
564///
565/// # Guarantees
566///
567/// When this function returns `Ok(len)`, it is guaranteed that the first `len` bytes of `dst` are
568/// initialized and non-zero. Furthermore, if `len < dst.len()`, then `dst[len]` is a NUL byte.
569#[inline]
570fn raw_strncpy_from_user(dst: &mut [MaybeUninit<u8>], src: UserPtr) -> Result<usize> {
571    // CAST: Slice lengths are guaranteed to be `<= isize::MAX`.
572    let len = dst.len() as isize;
573
574    // SAFETY: `dst` is valid for writing `dst.len()` bytes.
575    let res = unsafe {
576        bindings::strncpy_from_user(
577            dst.as_mut_ptr().cast::<c_char>(),
578            src.as_const_ptr().cast::<c_char>(),
579            len,
580        )
581    };
582
583    if res < 0 {
584        return Err(Error::from_errno(res as i32));
585    }
586
587    #[cfg(CONFIG_RUST_OVERFLOW_CHECKS)]
588    assert!(res <= len);
589
590    // GUARANTEES: `strncpy_from_user` was successful, so `dst` has contents in accordance with the
591    // guarantees of this function.
592    Ok(res as usize)
593}