Skip to main content

kernel/
page.rs

1// SPDX-License-Identifier: GPL-2.0
2
3//! Kernel page allocation and management.
4
5use crate::{
6    alloc::{
7        AllocError,
8        Flags, //
9    },
10    bindings,
11    error::{
12        code::*,
13        Result, //
14    },
15    uaccess::UserSliceReader, //
16};
17use core::{
18    marker::PhantomData,
19    mem::ManuallyDrop,
20    ops::Deref,
21    ptr::{
22        self,
23        NonNull, //
24    }, //
25};
26
27/// A bitwise shift for the page size.
28pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
29
30/// The number of bytes in a page.
31pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
32
33/// A bitmask that gives the page containing a given address.
34pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
35
36/// Rounds up to the next multiple of [`PAGE_SIZE`].
37///
38/// Returns [`None`] on integer overflow.
39///
40/// # Examples
41///
42/// ```
43/// use kernel::page::{
44///     page_align,
45///     PAGE_SIZE,
46/// };
47///
48/// // Requested address is already aligned.
49/// assert_eq!(page_align(0x0), Some(0x0));
50/// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
51///
52/// // Requested address needs alignment up.
53/// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
54/// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
55///
56/// // Requested address causes overflow (returns `None`).
57/// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
58/// assert_eq!(page_align(overflow_addr), None);
59/// ```
60#[inline(always)]
61pub const fn page_align(addr: usize) -> Option<usize> {
62    let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
63        return None;
64    };
65    Some(sum & PAGE_MASK)
66}
67
68/// Representation of a non-owning reference to a [`Page`].
69///
70/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
71/// [`Vmalloc`] allocation such as [`VBox`].
72///
73/// # Example
74///
75/// ```
76/// # use kernel::{bindings, prelude::*};
77/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
78/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
79///
80/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
81///     let ptr = ptr::from_ref(&**vbox);
82///
83///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
84///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
85///
86///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
87///     // pointer to `Vmalloc` memory.
88///     let page = unsafe { NonNull::new_unchecked(page) };
89///
90///     // SAFETY:
91///     // - `self.0` is a valid pointer to a `struct page`.
92///     // - `self.0` is valid for the entire lifetime of `self`.
93///     unsafe { BorrowedPage::from_raw(page) }
94/// }
95///
96/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
97/// let page = borrow_page(&mut vbox);
98///
99/// // SAFETY: There is no concurrent read or write to this page.
100/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
101/// # Ok::<(), Error>(())
102/// ```
103///
104/// # Invariants
105///
106/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
107///
108/// [`VBox`]: kernel::alloc::VBox
109/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
110pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
111
112impl<'a> BorrowedPage<'a> {
113    /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
114    ///
115    /// # Safety
116    ///
117    /// - `ptr` must point to a valid `bindings::page`.
118    /// - `ptr` must remain valid for the entire lifetime `'a`.
119    pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
120        let page = Page { page: ptr };
121
122        // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
123        // `'a`.
124        Self(ManuallyDrop::new(page), PhantomData)
125    }
126}
127
128impl<'a> Deref for BorrowedPage<'a> {
129    type Target = Page;
130
131    fn deref(&self) -> &Self::Target {
132        &self.0
133    }
134}
135
136/// Trait to be implemented by types which provide an [`Iterator`] implementation of
137/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
138pub trait AsPageIter {
139    /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
140    type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
141    where
142        Self: 'a;
143
144    /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
145    fn page_iter(&mut self) -> Self::Iter<'_>;
146}
147
148/// A pointer to a page that owns the page allocation.
149///
150/// # Invariants
151///
152/// The pointer is valid, and has ownership over the page.
153pub struct Page {
154    page: NonNull<bindings::page>,
155}
156
157// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
158// threads is safe.
159unsafe impl Send for Page {}
160
161// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
162// them concurrently is safe.
163unsafe impl Sync for Page {}
164
165impl Page {
166    /// Allocates a new page.
167    ///
168    /// # Examples
169    ///
170    /// Allocate memory for a page.
171    ///
172    /// ```
173    /// use kernel::page::Page;
174    ///
175    /// let page = Page::alloc_page(GFP_KERNEL)?;
176    /// # Ok::<(), kernel::alloc::AllocError>(())
177    /// ```
178    ///
179    /// Allocate memory for a page and zero its contents.
180    ///
181    /// ```
182    /// use kernel::page::Page;
183    ///
184    /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
185    /// # Ok::<(), kernel::alloc::AllocError>(())
186    /// ```
187    #[inline]
188    pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
189        // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
190        // is always safe to call this method.
191        let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
192        let page = NonNull::new(page).ok_or(AllocError)?;
193        // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
194        // allocated page. We transfer that ownership to the new `Page` object.
195        Ok(Self { page })
196    }
197
198    /// Returns a raw pointer to the page.
199    pub fn as_ptr(&self) -> *mut bindings::page {
200        self.page.as_ptr()
201    }
202
203    /// Get the node id containing this page.
204    #[inline]
205    pub fn nid(&self) -> i32 {
206        // SAFETY: Always safe to call with a valid page.
207        unsafe { bindings::page_to_nid(self.as_ptr()) }
208    }
209
210    /// Runs a piece of code with this page mapped to an address.
211    ///
212    /// The page is unmapped when this call returns.
213    ///
214    /// # Using the raw pointer
215    ///
216    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
217    /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
218    /// only be mapped on the current thread, and when that is the case, dereferencing it on other
219    /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
220    /// cause data races, the memory may be uninitialized, and so on.
221    ///
222    /// If multiple threads map the same page at the same time, then they may reference with
223    /// different addresses. However, even if the addresses are different, the underlying memory is
224    /// still the same for these purposes (e.g., it's still a data race if they both write to the
225    /// same underlying byte at the same time).
226    fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
227        // SAFETY: `page` is valid due to the type invariants on `Page`.
228        let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
229
230        let res = f(mapped_addr.cast());
231
232        // This unmaps the page mapped above.
233        //
234        // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
235        // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
236        //
237        // In other words, if this call to `kunmap_local` happens when a different page should be
238        // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
239        // call just above in `with_page_mapped` that made that possible. In this case, it is the
240        // unsafe block that wraps that other call that is incorrect.
241        unsafe { bindings::kunmap_local(mapped_addr) };
242
243        res
244    }
245
246    /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
247    ///
248    /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
249    /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
250    /// this task, as this method uses a local mapping.
251    ///
252    /// If `off` and `len` refers to a region outside of this page, then this method returns
253    /// [`EINVAL`] and does not call `f`.
254    ///
255    /// # Using the raw pointer
256    ///
257    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
258    /// `len` bytes and for the duration in which the closure is called. The pointer might only be
259    /// mapped on the current thread, and when that is the case, dereferencing it on other threads
260    /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
261    /// data races, the memory may be uninitialized, and so on.
262    ///
263    /// If multiple threads map the same page at the same time, then they may reference with
264    /// different addresses. However, even if the addresses are different, the underlying memory is
265    /// still the same for these purposes (e.g., it's still a data race if they both write to the
266    /// same underlying byte at the same time).
267    fn with_pointer_into_page<T>(
268        &self,
269        off: usize,
270        len: usize,
271        f: impl FnOnce(*mut u8) -> Result<T>,
272    ) -> Result<T> {
273        let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
274
275        if bounds_ok {
276            self.with_page_mapped(move |page_addr| {
277                // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
278                // result in a pointer that is in bounds or one off the end of the page.
279                f(unsafe { page_addr.add(off) })
280            })
281        } else {
282            Err(EINVAL)
283        }
284    }
285
286    /// Maps the page and reads from it into the given buffer.
287    ///
288    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
289    /// outside of the page, then this call returns [`EINVAL`].
290    ///
291    /// # Safety
292    ///
293    /// * Callers must ensure that `dst` is valid for writing `len` bytes.
294    /// * Callers must ensure that this call does not race with a write to the same page that
295    ///   overlaps with this read.
296    pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
297        self.with_pointer_into_page(offset, len, move |src| {
298            // SAFETY: If `with_pointer_into_page` calls into this closure, then
299            // it has performed a bounds check and guarantees that `src` is
300            // valid for `len` bytes.
301            //
302            // There caller guarantees that there is no data race.
303            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
304            Ok(())
305        })
306    }
307
308    /// Maps the page and writes into it from the given buffer.
309    ///
310    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
311    /// outside of the page, then this call returns [`EINVAL`].
312    ///
313    /// # Safety
314    ///
315    /// * Callers must ensure that `src` is valid for reading `len` bytes.
316    /// * Callers must ensure that this call does not race with a read or write to the same page
317    ///   that overlaps with this write.
318    pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
319        self.with_pointer_into_page(offset, len, move |dst| {
320            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
321            // bounds check and guarantees that `dst` is valid for `len` bytes.
322            //
323            // There caller guarantees that there is no data race.
324            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
325            Ok(())
326        })
327    }
328
329    /// Maps the page and zeroes the given slice.
330    ///
331    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
332    /// outside of the page, then this call returns [`EINVAL`].
333    ///
334    /// # Safety
335    ///
336    /// Callers must ensure that this call does not race with a read or write to the same page that
337    /// overlaps with this write.
338    pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
339        self.with_pointer_into_page(offset, len, move |dst| {
340            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
341            // bounds check and guarantees that `dst` is valid for `len` bytes.
342            //
343            // There caller guarantees that there is no data race.
344            unsafe { ptr::write_bytes(dst, 0u8, len) };
345            Ok(())
346        })
347    }
348
349    /// Copies data from userspace into this page.
350    ///
351    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
352    /// outside of the page, then this call returns [`EINVAL`].
353    ///
354    /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
355    /// However, they are not allowed on the page you are copying into.
356    ///
357    /// # Safety
358    ///
359    /// Callers must ensure that this call does not race with a read or write to the same page that
360    /// overlaps with this write.
361    pub unsafe fn copy_from_user_slice_raw(
362        &self,
363        reader: &mut UserSliceReader,
364        offset: usize,
365        len: usize,
366    ) -> Result {
367        self.with_pointer_into_page(offset, len, move |dst| {
368            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
369            // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
370            // exclusive access to the slice since the caller guarantees that there are no races.
371            reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
372        })
373    }
374}
375
376impl Drop for Page {
377    #[inline]
378    fn drop(&mut self) {
379        // SAFETY: By the type invariants, we have ownership of the page and can free it.
380        unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
381    }
382}