Skip to main content

kernel/
page.rs

1// SPDX-License-Identifier: GPL-2.0
2
3//! Kernel page allocation and management.
4
5use crate::{
6    alloc::{
7        AllocError,
8        Flags, //
9    },
10    bindings,
11    error::code::*,
12    error::Result,
13    uaccess::UserSliceReader, //
14};
15use core::{
16    marker::PhantomData,
17    mem::ManuallyDrop,
18    ops::Deref,
19    ptr::{
20        self,
21        NonNull, //
22    }, //
23};
24
25/// A bitwise shift for the page size.
26pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
27
28/// The number of bytes in a page.
29pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
30
31/// A bitmask that gives the page containing a given address.
32pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
33
34/// Rounds up to the next multiple of [`PAGE_SIZE`].
35///
36/// Returns [`None`] on integer overflow.
37///
38/// # Examples
39///
40/// ```
41/// use kernel::page::{
42///     page_align,
43///     PAGE_SIZE,
44/// };
45///
46/// // Requested address is already aligned.
47/// assert_eq!(page_align(0x0), Some(0x0));
48/// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
49///
50/// // Requested address needs alignment up.
51/// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
52/// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
53///
54/// // Requested address causes overflow (returns `None`).
55/// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
56/// assert_eq!(page_align(overflow_addr), None);
57/// ```
58#[inline(always)]
59pub const fn page_align(addr: usize) -> Option<usize> {
60    let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
61        return None;
62    };
63    Some(sum & PAGE_MASK)
64}
65
66/// Representation of a non-owning reference to a [`Page`].
67///
68/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
69/// [`Vmalloc`] allocation such as [`VBox`].
70///
71/// # Example
72///
73/// ```
74/// # use kernel::{bindings, prelude::*};
75/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
76/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
77///
78/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
79///     let ptr = ptr::from_ref(&**vbox);
80///
81///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
82///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
83///
84///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
85///     // pointer to `Vmalloc` memory.
86///     let page = unsafe { NonNull::new_unchecked(page) };
87///
88///     // SAFETY:
89///     // - `self.0` is a valid pointer to a `struct page`.
90///     // - `self.0` is valid for the entire lifetime of `self`.
91///     unsafe { BorrowedPage::from_raw(page) }
92/// }
93///
94/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
95/// let page = borrow_page(&mut vbox);
96///
97/// // SAFETY: There is no concurrent read or write to this page.
98/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
99/// # Ok::<(), Error>(())
100/// ```
101///
102/// # Invariants
103///
104/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
105///
106/// [`VBox`]: kernel::alloc::VBox
107/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
108pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
109
110impl<'a> BorrowedPage<'a> {
111    /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
112    ///
113    /// # Safety
114    ///
115    /// - `ptr` must point to a valid `bindings::page`.
116    /// - `ptr` must remain valid for the entire lifetime `'a`.
117    pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
118        let page = Page { page: ptr };
119
120        // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
121        // `'a`.
122        Self(ManuallyDrop::new(page), PhantomData)
123    }
124}
125
126impl<'a> Deref for BorrowedPage<'a> {
127    type Target = Page;
128
129    fn deref(&self) -> &Self::Target {
130        &self.0
131    }
132}
133
134/// Trait to be implemented by types which provide an [`Iterator`] implementation of
135/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
136pub trait AsPageIter {
137    /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
138    type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
139    where
140        Self: 'a;
141
142    /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
143    fn page_iter(&mut self) -> Self::Iter<'_>;
144}
145
146/// A pointer to a page that owns the page allocation.
147///
148/// # Invariants
149///
150/// The pointer is valid, and has ownership over the page.
151pub struct Page {
152    page: NonNull<bindings::page>,
153}
154
155// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
156// threads is safe.
157unsafe impl Send for Page {}
158
159// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
160// them concurrently is safe.
161unsafe impl Sync for Page {}
162
163impl Page {
164    /// Allocates a new page.
165    ///
166    /// # Examples
167    ///
168    /// Allocate memory for a page.
169    ///
170    /// ```
171    /// use kernel::page::Page;
172    ///
173    /// let page = Page::alloc_page(GFP_KERNEL)?;
174    /// # Ok::<(), kernel::alloc::AllocError>(())
175    /// ```
176    ///
177    /// Allocate memory for a page and zero its contents.
178    ///
179    /// ```
180    /// use kernel::page::Page;
181    ///
182    /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
183    /// # Ok::<(), kernel::alloc::AllocError>(())
184    /// ```
185    #[inline]
186    pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
187        // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
188        // is always safe to call this method.
189        let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
190        let page = NonNull::new(page).ok_or(AllocError)?;
191        // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
192        // allocated page. We transfer that ownership to the new `Page` object.
193        Ok(Self { page })
194    }
195
196    /// Returns a raw pointer to the page.
197    pub fn as_ptr(&self) -> *mut bindings::page {
198        self.page.as_ptr()
199    }
200
201    /// Get the node id containing this page.
202    #[inline]
203    pub fn nid(&self) -> i32 {
204        // SAFETY: Always safe to call with a valid page.
205        unsafe { bindings::page_to_nid(self.as_ptr()) }
206    }
207
208    /// Runs a piece of code with this page mapped to an address.
209    ///
210    /// The page is unmapped when this call returns.
211    ///
212    /// # Using the raw pointer
213    ///
214    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
215    /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
216    /// only be mapped on the current thread, and when that is the case, dereferencing it on other
217    /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
218    /// cause data races, the memory may be uninitialized, and so on.
219    ///
220    /// If multiple threads map the same page at the same time, then they may reference with
221    /// different addresses. However, even if the addresses are different, the underlying memory is
222    /// still the same for these purposes (e.g., it's still a data race if they both write to the
223    /// same underlying byte at the same time).
224    fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
225        // SAFETY: `page` is valid due to the type invariants on `Page`.
226        let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
227
228        let res = f(mapped_addr.cast());
229
230        // This unmaps the page mapped above.
231        //
232        // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
233        // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
234        //
235        // In other words, if this call to `kunmap_local` happens when a different page should be
236        // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
237        // call just above in `with_page_mapped` that made that possible. In this case, it is the
238        // unsafe block that wraps that other call that is incorrect.
239        unsafe { bindings::kunmap_local(mapped_addr) };
240
241        res
242    }
243
244    /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
245    ///
246    /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
247    /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
248    /// this task, as this method uses a local mapping.
249    ///
250    /// If `off` and `len` refers to a region outside of this page, then this method returns
251    /// [`EINVAL`] and does not call `f`.
252    ///
253    /// # Using the raw pointer
254    ///
255    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
256    /// `len` bytes and for the duration in which the closure is called. The pointer might only be
257    /// mapped on the current thread, and when that is the case, dereferencing it on other threads
258    /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
259    /// data races, the memory may be uninitialized, and so on.
260    ///
261    /// If multiple threads map the same page at the same time, then they may reference with
262    /// different addresses. However, even if the addresses are different, the underlying memory is
263    /// still the same for these purposes (e.g., it's still a data race if they both write to the
264    /// same underlying byte at the same time).
265    fn with_pointer_into_page<T>(
266        &self,
267        off: usize,
268        len: usize,
269        f: impl FnOnce(*mut u8) -> Result<T>,
270    ) -> Result<T> {
271        let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
272
273        if bounds_ok {
274            self.with_page_mapped(move |page_addr| {
275                // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
276                // result in a pointer that is in bounds or one off the end of the page.
277                f(unsafe { page_addr.add(off) })
278            })
279        } else {
280            Err(EINVAL)
281        }
282    }
283
284    /// Maps the page and reads from it into the given buffer.
285    ///
286    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
287    /// outside of the page, then this call returns [`EINVAL`].
288    ///
289    /// # Safety
290    ///
291    /// * Callers must ensure that `dst` is valid for writing `len` bytes.
292    /// * Callers must ensure that this call does not race with a write to the same page that
293    ///   overlaps with this read.
294    pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
295        self.with_pointer_into_page(offset, len, move |src| {
296            // SAFETY: If `with_pointer_into_page` calls into this closure, then
297            // it has performed a bounds check and guarantees that `src` is
298            // valid for `len` bytes.
299            //
300            // There caller guarantees that there is no data race.
301            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
302            Ok(())
303        })
304    }
305
306    /// Maps the page and writes into it from the given buffer.
307    ///
308    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
309    /// outside of the page, then this call returns [`EINVAL`].
310    ///
311    /// # Safety
312    ///
313    /// * Callers must ensure that `src` is valid for reading `len` bytes.
314    /// * Callers must ensure that this call does not race with a read or write to the same page
315    ///   that overlaps with this write.
316    pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
317        self.with_pointer_into_page(offset, len, move |dst| {
318            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
319            // bounds check and guarantees that `dst` is valid for `len` bytes.
320            //
321            // There caller guarantees that there is no data race.
322            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
323            Ok(())
324        })
325    }
326
327    /// Maps the page and zeroes the given slice.
328    ///
329    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
330    /// outside of the page, then this call returns [`EINVAL`].
331    ///
332    /// # Safety
333    ///
334    /// Callers must ensure that this call does not race with a read or write to the same page that
335    /// overlaps with this write.
336    pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
337        self.with_pointer_into_page(offset, len, move |dst| {
338            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
339            // bounds check and guarantees that `dst` is valid for `len` bytes.
340            //
341            // There caller guarantees that there is no data race.
342            unsafe { ptr::write_bytes(dst, 0u8, len) };
343            Ok(())
344        })
345    }
346
347    /// Copies data from userspace into this page.
348    ///
349    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
350    /// outside of the page, then this call returns [`EINVAL`].
351    ///
352    /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
353    /// However, they are not allowed on the page you are copying into.
354    ///
355    /// # Safety
356    ///
357    /// Callers must ensure that this call does not race with a read or write to the same page that
358    /// overlaps with this write.
359    pub unsafe fn copy_from_user_slice_raw(
360        &self,
361        reader: &mut UserSliceReader,
362        offset: usize,
363        len: usize,
364    ) -> Result {
365        self.with_pointer_into_page(offset, len, move |dst| {
366            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
367            // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
368            // exclusive access to the slice since the caller guarantees that there are no races.
369            reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
370        })
371    }
372}
373
374impl Drop for Page {
375    #[inline]
376    fn drop(&mut self) {
377        // SAFETY: By the type invariants, we have ownership of the page and can free it.
378        unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
379    }
380}