kernel/page.rs
1// SPDX-License-Identifier: GPL-2.0
2
3//! Kernel page allocation and management.
4
5use crate::{
6 alloc::{
7 AllocError,
8 Flags, //
9 },
10 bindings,
11 error::{
12 code::*,
13 Result, //
14 },
15 uaccess::UserSliceReader, //
16};
17use core::{
18 marker::PhantomData,
19 mem::ManuallyDrop,
20 ops::Deref,
21 ptr::{
22 self,
23 NonNull, //
24 }, //
25};
26
27/// A bitwise shift for the page size.
28pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
29
30/// The number of bytes in a page.
31pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
32
33/// A bitmask that gives the page containing a given address.
34pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
35
36/// Rounds up to the next multiple of [`PAGE_SIZE`].
37///
38/// Returns [`None`] on integer overflow.
39///
40/// # Examples
41///
42/// ```
43/// use kernel::page::{
44/// page_align,
45/// PAGE_SIZE,
46/// };
47///
48/// // Requested address is already aligned.
49/// assert_eq!(page_align(0x0), Some(0x0));
50/// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
51///
52/// // Requested address needs alignment up.
53/// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
54/// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
55///
56/// // Requested address causes overflow (returns `None`).
57/// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
58/// assert_eq!(page_align(overflow_addr), None);
59/// ```
60#[inline(always)]
61pub const fn page_align(addr: usize) -> Option<usize> {
62 let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
63 return None;
64 };
65 Some(sum & PAGE_MASK)
66}
67
68/// Representation of a non-owning reference to a [`Page`].
69///
70/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
71/// [`Vmalloc`] allocation such as [`VBox`].
72///
73/// # Example
74///
75/// ```
76/// # use kernel::{bindings, prelude::*};
77/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
78/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
79///
80/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
81/// let ptr = ptr::from_ref(&**vbox);
82///
83/// // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
84/// let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
85///
86/// // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
87/// // pointer to `Vmalloc` memory.
88/// let page = unsafe { NonNull::new_unchecked(page) };
89///
90/// // SAFETY:
91/// // - `self.0` is a valid pointer to a `struct page`.
92/// // - `self.0` is valid for the entire lifetime of `self`.
93/// unsafe { BorrowedPage::from_raw(page) }
94/// }
95///
96/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
97/// let page = borrow_page(&mut vbox);
98///
99/// // SAFETY: There is no concurrent read or write to this page.
100/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
101/// # Ok::<(), Error>(())
102/// ```
103///
104/// # Invariants
105///
106/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
107///
108/// [`VBox`]: kernel::alloc::VBox
109/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
110pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
111
112impl<'a> BorrowedPage<'a> {
113 /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
114 ///
115 /// # Safety
116 ///
117 /// - `ptr` must point to a valid `bindings::page`.
118 /// - `ptr` must remain valid for the entire lifetime `'a`.
119 pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
120 let page = Page { page: ptr };
121
122 // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
123 // `'a`.
124 Self(ManuallyDrop::new(page), PhantomData)
125 }
126}
127
128impl<'a> Deref for BorrowedPage<'a> {
129 type Target = Page;
130
131 fn deref(&self) -> &Self::Target {
132 &self.0
133 }
134}
135
136/// Trait to be implemented by types which provide an [`Iterator`] implementation of
137/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
138pub trait AsPageIter {
139 /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
140 type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
141 where
142 Self: 'a;
143
144 /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
145 fn page_iter(&mut self) -> Self::Iter<'_>;
146}
147
148/// A pointer to a page that owns the page allocation.
149///
150/// # Invariants
151///
152/// The pointer is valid, and has ownership over the page.
153pub struct Page {
154 page: NonNull<bindings::page>,
155}
156
157// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
158// threads is safe.
159unsafe impl Send for Page {}
160
161// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
162// them concurrently is safe.
163unsafe impl Sync for Page {}
164
165impl Page {
166 /// Allocates a new page.
167 ///
168 /// # Examples
169 ///
170 /// Allocate memory for a page.
171 ///
172 /// ```
173 /// use kernel::page::Page;
174 ///
175 /// let page = Page::alloc_page(GFP_KERNEL)?;
176 /// # Ok::<(), kernel::alloc::AllocError>(())
177 /// ```
178 ///
179 /// Allocate memory for a page and zero its contents.
180 ///
181 /// ```
182 /// use kernel::page::Page;
183 ///
184 /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
185 /// # Ok::<(), kernel::alloc::AllocError>(())
186 /// ```
187 #[inline]
188 pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
189 // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
190 // is always safe to call this method.
191 let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
192 let page = NonNull::new(page).ok_or(AllocError)?;
193 // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
194 // allocated page. We transfer that ownership to the new `Page` object.
195 Ok(Self { page })
196 }
197
198 /// Returns a raw pointer to the page.
199 pub fn as_ptr(&self) -> *mut bindings::page {
200 self.page.as_ptr()
201 }
202
203 /// Get the node id containing this page.
204 #[inline]
205 pub fn nid(&self) -> i32 {
206 // SAFETY: Always safe to call with a valid page.
207 unsafe { bindings::page_to_nid(self.as_ptr()) }
208 }
209
210 /// Runs a piece of code with this page mapped to an address.
211 ///
212 /// The page is unmapped when this call returns.
213 ///
214 /// # Using the raw pointer
215 ///
216 /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
217 /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
218 /// only be mapped on the current thread, and when that is the case, dereferencing it on other
219 /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
220 /// cause data races, the memory may be uninitialized, and so on.
221 ///
222 /// If multiple threads map the same page at the same time, then they may reference with
223 /// different addresses. However, even if the addresses are different, the underlying memory is
224 /// still the same for these purposes (e.g., it's still a data race if they both write to the
225 /// same underlying byte at the same time).
226 fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
227 // SAFETY: `page` is valid due to the type invariants on `Page`.
228 let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
229
230 let res = f(mapped_addr.cast());
231
232 // This unmaps the page mapped above.
233 //
234 // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
235 // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
236 //
237 // In other words, if this call to `kunmap_local` happens when a different page should be
238 // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
239 // call just above in `with_page_mapped` that made that possible. In this case, it is the
240 // unsafe block that wraps that other call that is incorrect.
241 unsafe { bindings::kunmap_local(mapped_addr) };
242
243 res
244 }
245
246 /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
247 ///
248 /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
249 /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
250 /// this task, as this method uses a local mapping.
251 ///
252 /// If `off` and `len` refers to a region outside of this page, then this method returns
253 /// [`EINVAL`] and does not call `f`.
254 ///
255 /// # Using the raw pointer
256 ///
257 /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
258 /// `len` bytes and for the duration in which the closure is called. The pointer might only be
259 /// mapped on the current thread, and when that is the case, dereferencing it on other threads
260 /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
261 /// data races, the memory may be uninitialized, and so on.
262 ///
263 /// If multiple threads map the same page at the same time, then they may reference with
264 /// different addresses. However, even if the addresses are different, the underlying memory is
265 /// still the same for these purposes (e.g., it's still a data race if they both write to the
266 /// same underlying byte at the same time).
267 fn with_pointer_into_page<T>(
268 &self,
269 off: usize,
270 len: usize,
271 f: impl FnOnce(*mut u8) -> Result<T>,
272 ) -> Result<T> {
273 let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
274
275 if bounds_ok {
276 self.with_page_mapped(move |page_addr| {
277 // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
278 // result in a pointer that is in bounds or one off the end of the page.
279 f(unsafe { page_addr.add(off) })
280 })
281 } else {
282 Err(EINVAL)
283 }
284 }
285
286 /// Maps the page and reads from it into the given buffer.
287 ///
288 /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
289 /// outside of the page, then this call returns [`EINVAL`].
290 ///
291 /// # Safety
292 ///
293 /// * Callers must ensure that `dst` is valid for writing `len` bytes.
294 /// * Callers must ensure that this call does not race with a write to the same page that
295 /// overlaps with this read.
296 pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
297 self.with_pointer_into_page(offset, len, move |src| {
298 // SAFETY: If `with_pointer_into_page` calls into this closure, then
299 // it has performed a bounds check and guarantees that `src` is
300 // valid for `len` bytes.
301 //
302 // There caller guarantees that there is no data race.
303 unsafe { ptr::copy_nonoverlapping(src, dst, len) };
304 Ok(())
305 })
306 }
307
308 /// Maps the page and writes into it from the given buffer.
309 ///
310 /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
311 /// outside of the page, then this call returns [`EINVAL`].
312 ///
313 /// # Safety
314 ///
315 /// * Callers must ensure that `src` is valid for reading `len` bytes.
316 /// * Callers must ensure that this call does not race with a read or write to the same page
317 /// that overlaps with this write.
318 pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
319 self.with_pointer_into_page(offset, len, move |dst| {
320 // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
321 // bounds check and guarantees that `dst` is valid for `len` bytes.
322 //
323 // There caller guarantees that there is no data race.
324 unsafe { ptr::copy_nonoverlapping(src, dst, len) };
325 Ok(())
326 })
327 }
328
329 /// Maps the page and zeroes the given slice.
330 ///
331 /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
332 /// outside of the page, then this call returns [`EINVAL`].
333 ///
334 /// # Safety
335 ///
336 /// Callers must ensure that this call does not race with a read or write to the same page that
337 /// overlaps with this write.
338 pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
339 self.with_pointer_into_page(offset, len, move |dst| {
340 // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
341 // bounds check and guarantees that `dst` is valid for `len` bytes.
342 //
343 // There caller guarantees that there is no data race.
344 unsafe { ptr::write_bytes(dst, 0u8, len) };
345 Ok(())
346 })
347 }
348
349 /// Copies data from userspace into this page.
350 ///
351 /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
352 /// outside of the page, then this call returns [`EINVAL`].
353 ///
354 /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
355 /// However, they are not allowed on the page you are copying into.
356 ///
357 /// # Safety
358 ///
359 /// Callers must ensure that this call does not race with a read or write to the same page that
360 /// overlaps with this write.
361 pub unsafe fn copy_from_user_slice_raw(
362 &self,
363 reader: &mut UserSliceReader,
364 offset: usize,
365 len: usize,
366 ) -> Result {
367 self.with_pointer_into_page(offset, len, move |dst| {
368 // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
369 // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
370 // exclusive access to the slice since the caller guarantees that there are no races.
371 reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
372 })
373 }
374}
375
376impl Drop for Page {
377 #[inline]
378 fn drop(&mut self) {
379 // SAFETY: By the type invariants, we have ownership of the page and can free it.
380 unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
381 }
382}