kernel/
page.rs

1// SPDX-License-Identifier: GPL-2.0
2
3//! Kernel page allocation and management.
4
5use crate::{
6    alloc::{AllocError, Flags},
7    bindings,
8    error::code::*,
9    error::Result,
10    uaccess::UserSliceReader,
11};
12use core::{
13    marker::PhantomData,
14    mem::ManuallyDrop,
15    ops::Deref,
16    ptr::{self, NonNull},
17};
18
19/// A bitwise shift for the page size.
20pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
21
22/// The number of bytes in a page.
23pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
24
25/// A bitmask that gives the page containing a given address.
26pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
27
28/// Rounds up to the next multiple of [`PAGE_SIZE`].
29///
30/// Returns [`None`] on integer overflow.
31///
32/// # Examples
33///
34/// ```
35/// use kernel::page::{
36///     page_align,
37///     PAGE_SIZE,
38/// };
39///
40/// // Requested address is already aligned.
41/// assert_eq!(page_align(0x0), Some(0x0));
42/// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
43///
44/// // Requested address needs alignment up.
45/// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
46/// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
47///
48/// // Requested address causes overflow (returns `None`).
49/// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
50/// assert_eq!(page_align(overflow_addr), None);
51/// ```
52#[inline(always)]
53pub const fn page_align(addr: usize) -> Option<usize> {
54    let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
55        return None;
56    };
57    Some(sum & PAGE_MASK)
58}
59
60/// Representation of a non-owning reference to a [`Page`].
61///
62/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
63/// [`Vmalloc`] allocation such as [`VBox`].
64///
65/// # Example
66///
67/// ```
68/// # use kernel::{bindings, prelude::*};
69/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
70/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
71///
72/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
73///     let ptr = ptr::from_ref(&**vbox);
74///
75///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
76///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
77///
78///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
79///     // pointer to `Vmalloc` memory.
80///     let page = unsafe { NonNull::new_unchecked(page) };
81///
82///     // SAFETY:
83///     // - `self.0` is a valid pointer to a `struct page`.
84///     // - `self.0` is valid for the entire lifetime of `self`.
85///     unsafe { BorrowedPage::from_raw(page) }
86/// }
87///
88/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
89/// let page = borrow_page(&mut vbox);
90///
91/// // SAFETY: There is no concurrent read or write to this page.
92/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
93/// # Ok::<(), Error>(())
94/// ```
95///
96/// # Invariants
97///
98/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
99///
100/// [`VBox`]: kernel::alloc::VBox
101/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
102pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
103
104impl<'a> BorrowedPage<'a> {
105    /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
106    ///
107    /// # Safety
108    ///
109    /// - `ptr` must point to a valid `bindings::page`.
110    /// - `ptr` must remain valid for the entire lifetime `'a`.
111    pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
112        let page = Page { page: ptr };
113
114        // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
115        // `'a`.
116        Self(ManuallyDrop::new(page), PhantomData)
117    }
118}
119
120impl<'a> Deref for BorrowedPage<'a> {
121    type Target = Page;
122
123    fn deref(&self) -> &Self::Target {
124        &self.0
125    }
126}
127
128/// Trait to be implemented by types which provide an [`Iterator`] implementation of
129/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
130pub trait AsPageIter {
131    /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
132    type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
133    where
134        Self: 'a;
135
136    /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
137    fn page_iter(&mut self) -> Self::Iter<'_>;
138}
139
140/// A pointer to a page that owns the page allocation.
141///
142/// # Invariants
143///
144/// The pointer is valid, and has ownership over the page.
145pub struct Page {
146    page: NonNull<bindings::page>,
147}
148
149// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
150// threads is safe.
151unsafe impl Send for Page {}
152
153// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
154// them concurrently is safe.
155unsafe impl Sync for Page {}
156
157impl Page {
158    /// Allocates a new page.
159    ///
160    /// # Examples
161    ///
162    /// Allocate memory for a page.
163    ///
164    /// ```
165    /// use kernel::page::Page;
166    ///
167    /// let page = Page::alloc_page(GFP_KERNEL)?;
168    /// # Ok::<(), kernel::alloc::AllocError>(())
169    /// ```
170    ///
171    /// Allocate memory for a page and zero its contents.
172    ///
173    /// ```
174    /// use kernel::page::Page;
175    ///
176    /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
177    /// # Ok::<(), kernel::alloc::AllocError>(())
178    /// ```
179    #[inline]
180    pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
181        // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
182        // is always safe to call this method.
183        let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
184        let page = NonNull::new(page).ok_or(AllocError)?;
185        // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
186        // allocated page. We transfer that ownership to the new `Page` object.
187        Ok(Self { page })
188    }
189
190    /// Returns a raw pointer to the page.
191    pub fn as_ptr(&self) -> *mut bindings::page {
192        self.page.as_ptr()
193    }
194
195    /// Get the node id containing this page.
196    pub fn nid(&self) -> i32 {
197        // SAFETY: Always safe to call with a valid page.
198        unsafe { bindings::page_to_nid(self.as_ptr()) }
199    }
200
201    /// Runs a piece of code with this page mapped to an address.
202    ///
203    /// The page is unmapped when this call returns.
204    ///
205    /// # Using the raw pointer
206    ///
207    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
208    /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
209    /// only be mapped on the current thread, and when that is the case, dereferencing it on other
210    /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
211    /// cause data races, the memory may be uninitialized, and so on.
212    ///
213    /// If multiple threads map the same page at the same time, then they may reference with
214    /// different addresses. However, even if the addresses are different, the underlying memory is
215    /// still the same for these purposes (e.g., it's still a data race if they both write to the
216    /// same underlying byte at the same time).
217    fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
218        // SAFETY: `page` is valid due to the type invariants on `Page`.
219        let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
220
221        let res = f(mapped_addr.cast());
222
223        // This unmaps the page mapped above.
224        //
225        // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
226        // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
227        //
228        // In other words, if this call to `kunmap_local` happens when a different page should be
229        // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
230        // call just above in `with_page_mapped` that made that possible. In this case, it is the
231        // unsafe block that wraps that other call that is incorrect.
232        unsafe { bindings::kunmap_local(mapped_addr) };
233
234        res
235    }
236
237    /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
238    ///
239    /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
240    /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
241    /// this task, as this method uses a local mapping.
242    ///
243    /// If `off` and `len` refers to a region outside of this page, then this method returns
244    /// [`EINVAL`] and does not call `f`.
245    ///
246    /// # Using the raw pointer
247    ///
248    /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
249    /// `len` bytes and for the duration in which the closure is called. The pointer might only be
250    /// mapped on the current thread, and when that is the case, dereferencing it on other threads
251    /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
252    /// data races, the memory may be uninitialized, and so on.
253    ///
254    /// If multiple threads map the same page at the same time, then they may reference with
255    /// different addresses. However, even if the addresses are different, the underlying memory is
256    /// still the same for these purposes (e.g., it's still a data race if they both write to the
257    /// same underlying byte at the same time).
258    fn with_pointer_into_page<T>(
259        &self,
260        off: usize,
261        len: usize,
262        f: impl FnOnce(*mut u8) -> Result<T>,
263    ) -> Result<T> {
264        let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
265
266        if bounds_ok {
267            self.with_page_mapped(move |page_addr| {
268                // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
269                // result in a pointer that is in bounds or one off the end of the page.
270                f(unsafe { page_addr.add(off) })
271            })
272        } else {
273            Err(EINVAL)
274        }
275    }
276
277    /// Maps the page and reads from it into the given buffer.
278    ///
279    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
280    /// outside of the page, then this call returns [`EINVAL`].
281    ///
282    /// # Safety
283    ///
284    /// * Callers must ensure that `dst` is valid for writing `len` bytes.
285    /// * Callers must ensure that this call does not race with a write to the same page that
286    ///   overlaps with this read.
287    pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
288        self.with_pointer_into_page(offset, len, move |src| {
289            // SAFETY: If `with_pointer_into_page` calls into this closure, then
290            // it has performed a bounds check and guarantees that `src` is
291            // valid for `len` bytes.
292            //
293            // There caller guarantees that there is no data race.
294            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
295            Ok(())
296        })
297    }
298
299    /// Maps the page and writes into it from the given buffer.
300    ///
301    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
302    /// outside of the page, then this call returns [`EINVAL`].
303    ///
304    /// # Safety
305    ///
306    /// * Callers must ensure that `src` is valid for reading `len` bytes.
307    /// * Callers must ensure that this call does not race with a read or write to the same page
308    ///   that overlaps with this write.
309    pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
310        self.with_pointer_into_page(offset, len, move |dst| {
311            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
312            // bounds check and guarantees that `dst` is valid for `len` bytes.
313            //
314            // There caller guarantees that there is no data race.
315            unsafe { ptr::copy_nonoverlapping(src, dst, len) };
316            Ok(())
317        })
318    }
319
320    /// Maps the page and zeroes the given slice.
321    ///
322    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
323    /// outside of the page, then this call returns [`EINVAL`].
324    ///
325    /// # Safety
326    ///
327    /// Callers must ensure that this call does not race with a read or write to the same page that
328    /// overlaps with this write.
329    pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
330        self.with_pointer_into_page(offset, len, move |dst| {
331            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
332            // bounds check and guarantees that `dst` is valid for `len` bytes.
333            //
334            // There caller guarantees that there is no data race.
335            unsafe { ptr::write_bytes(dst, 0u8, len) };
336            Ok(())
337        })
338    }
339
340    /// Copies data from userspace into this page.
341    ///
342    /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
343    /// outside of the page, then this call returns [`EINVAL`].
344    ///
345    /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
346    /// However, they are not allowed on the page you are copying into.
347    ///
348    /// # Safety
349    ///
350    /// Callers must ensure that this call does not race with a read or write to the same page that
351    /// overlaps with this write.
352    pub unsafe fn copy_from_user_slice_raw(
353        &self,
354        reader: &mut UserSliceReader,
355        offset: usize,
356        len: usize,
357    ) -> Result {
358        self.with_pointer_into_page(offset, len, move |dst| {
359            // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
360            // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
361            // exclusive access to the slice since the caller guarantees that there are no races.
362            reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
363        })
364    }
365}
366
367impl Drop for Page {
368    #[inline]
369    fn drop(&mut self) {
370        // SAFETY: By the type invariants, we have ownership of the page and can free it.
371        unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
372    }
373}