summaryrefslogtreecommitdiff
path: root/rust/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel')
-rw-r--r--rust/kernel/alloc.rs73
-rw-r--r--rust/kernel/alloc/allocator.rs (renamed from rust/kernel/allocator.rs)38
-rw-r--r--rust/kernel/alloc/box_ext.rs56
-rw-r--r--rust/kernel/alloc/vec_ext.rs185
-rw-r--r--rust/kernel/block.rs5
-rw-r--r--rust/kernel/block/mq.rs98
-rw-r--r--rust/kernel/block/mq/gen_disk.rs198
-rw-r--r--rust/kernel/block/mq/operations.rs245
-rw-r--r--rust/kernel/block/mq/raw_writer.rs55
-rw-r--r--rust/kernel/block/mq/request.rs253
-rw-r--r--rust/kernel/block/mq/tag_set.rs86
-rw-r--r--rust/kernel/device.rs105
-rw-r--r--rust/kernel/error.rs20
-rw-r--r--rust/kernel/firmware.rs117
-rw-r--r--rust/kernel/init.rs74
-rw-r--r--rust/kernel/init/macros.rs47
-rw-r--r--rust/kernel/lib.rs20
-rw-r--r--rust/kernel/net/phy.rs6
-rw-r--r--rust/kernel/prelude.rs2
-rw-r--r--rust/kernel/print.rs5
-rw-r--r--rust/kernel/std_vendor.rs7
-rw-r--r--rust/kernel/str.rs98
-rw-r--r--rust/kernel/sync.rs6
-rw-r--r--rust/kernel/sync/arc.rs189
-rw-r--r--rust/kernel/sync/condvar.rs3
-rw-r--r--rust/kernel/sync/lock.rs2
-rw-r--r--rust/kernel/sync/lock/mutex.rs4
-rw-r--r--rust/kernel/sync/lock/spinlock.rs4
-rw-r--r--rust/kernel/task.rs2
-rw-r--r--rust/kernel/time.rs63
-rw-r--r--rust/kernel/types.rs6
-rw-r--r--rust/kernel/workqueue.rs54
32 files changed, 1936 insertions, 190 deletions
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs
new file mode 100644
index 000000000000..531b5e471cb1
--- /dev/null
+++ b/rust/kernel/alloc.rs
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Extensions to the [`alloc`] crate.
+
+#[cfg(not(test))]
+#[cfg(not(testlib))]
+mod allocator;
+pub mod box_ext;
+pub mod vec_ext;
+
+/// Indicates an allocation error.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct AllocError;
+
+/// Flags to be used when allocating memory.
+///
+/// They can be combined with the operators `|`, `&`, and `!`.
+///
+/// Values can be used from the [`flags`] module.
+#[derive(Clone, Copy)]
+pub struct Flags(u32);
+
+impl core::ops::BitOr for Flags {
+ type Output = Self;
+ fn bitor(self, rhs: Self) -> Self::Output {
+ Self(self.0 | rhs.0)
+ }
+}
+
+impl core::ops::BitAnd for Flags {
+ type Output = Self;
+ fn bitand(self, rhs: Self) -> Self::Output {
+ Self(self.0 & rhs.0)
+ }
+}
+
+impl core::ops::Not for Flags {
+ type Output = Self;
+ fn not(self) -> Self::Output {
+ Self(!self.0)
+ }
+}
+
+/// Allocation flags.
+///
+/// These are meant to be used in functions that can allocate memory.
+pub mod flags {
+ use super::Flags;
+
+ /// Zeroes out the allocated memory.
+ ///
+ /// This is normally or'd with other flags.
+ pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO);
+
+ /// Users can not sleep and need the allocation to succeed.
+ ///
+ /// A lower watermark is applied to allow access to "atomic reserves". The current
+ /// implementation doesn't support NMI and few other strict non-preemptive contexts (e.g.
+ /// raw_spin_lock). The same applies to [`GFP_NOWAIT`].
+ pub const GFP_ATOMIC: Flags = Flags(bindings::GFP_ATOMIC);
+
+ /// Typical for kernel-internal allocations. The caller requires ZONE_NORMAL or a lower zone
+ /// for direct access but can direct reclaim.
+ pub const GFP_KERNEL: Flags = Flags(bindings::GFP_KERNEL);
+
+ /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg.
+ pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT);
+
+ /// Ror kernel allocations that should not stall for direct reclaim, start physical IO or
+ /// use any filesystem callback. It is very likely to fail to allocate memory, even for very
+ /// small allocations.
+ pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT);
+}
diff --git a/rust/kernel/allocator.rs b/rust/kernel/alloc/allocator.rs
index 01ad139e19bc..e6ea601f38c6 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -2,11 +2,10 @@
//! Allocator support.
+use super::{flags::*, Flags};
use core::alloc::{GlobalAlloc, Layout};
use core::ptr;
-use crate::bindings;
-
struct KernelAllocator;
/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
@@ -15,35 +14,28 @@ struct KernelAllocator;
///
/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
/// - `new_layout` must have a non-zero size.
-unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
+pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: Flags) -> *mut u8 {
// Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
let layout = new_layout.pad_to_align();
- let mut size = layout.size();
-
- if layout.align() > bindings::ARCH_SLAB_MINALIGN {
- // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
- // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
- // more information).
- //
- // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
- // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
- size = size.next_power_of_two();
- }
+ // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()`
+ // which together with the slab guarantees means the `krealloc` will return a properly aligned
+ // object (see comments in `kmalloc()` for more information).
+ let size = layout.size();
// SAFETY:
// - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
// function safety requirement.
- // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
- // according to the function safety requirement) or a result from `next_power_of_two()`.
- unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
+ // - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according
+ // to the function safety requirement)
+ unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 }
}
unsafe impl GlobalAlloc for KernelAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
// requirement.
- unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
+ unsafe { krealloc_aligned(ptr::null_mut(), layout, GFP_KERNEL) }
}
unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
@@ -64,19 +56,13 @@ unsafe impl GlobalAlloc for KernelAllocator {
// requirement.
// - the size of `layout` is not zero because `new_size` is not zero by the function safety
// requirement.
- unsafe { krealloc_aligned(ptr, layout, bindings::GFP_KERNEL) }
+ unsafe { krealloc_aligned(ptr, layout, GFP_KERNEL) }
}
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
// SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
// requirement.
- unsafe {
- krealloc_aligned(
- ptr::null_mut(),
- layout,
- bindings::GFP_KERNEL | bindings::__GFP_ZERO,
- )
- }
+ unsafe { krealloc_aligned(ptr::null_mut(), layout, GFP_KERNEL | __GFP_ZERO) }
}
}
diff --git a/rust/kernel/alloc/box_ext.rs b/rust/kernel/alloc/box_ext.rs
new file mode 100644
index 000000000000..829cb1c1cf9e
--- /dev/null
+++ b/rust/kernel/alloc/box_ext.rs
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Extensions to [`Box`] for fallible allocations.
+
+use super::{AllocError, Flags};
+use alloc::boxed::Box;
+use core::mem::MaybeUninit;
+
+/// Extensions to [`Box`].
+pub trait BoxExt<T>: Sized {
+ /// Allocates a new box.
+ ///
+ /// The allocation may fail, in which case an error is returned.
+ fn new(x: T, flags: Flags) -> Result<Self, AllocError>;
+
+ /// Allocates a new uninitialised box.
+ ///
+ /// The allocation may fail, in which case an error is returned.
+ fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError>;
+}
+
+impl<T> BoxExt<T> for Box<T> {
+ fn new(x: T, flags: Flags) -> Result<Self, AllocError> {
+ let b = <Self as BoxExt<_>>::new_uninit(flags)?;
+ Ok(Box::write(b, x))
+ }
+
+ #[cfg(any(test, testlib))]
+ fn new_uninit(_flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError> {
+ Ok(Box::new_uninit())
+ }
+
+ #[cfg(not(any(test, testlib)))]
+ fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError> {
+ let ptr = if core::mem::size_of::<MaybeUninit<T>>() == 0 {
+ core::ptr::NonNull::<_>::dangling().as_ptr()
+ } else {
+ let layout = core::alloc::Layout::new::<MaybeUninit<T>>();
+
+ // SAFETY: Memory is being allocated (first arg is null). The only other source of
+ // safety issues is sleeping on atomic context, which is addressed by klint. Lastly,
+ // the type is not a SZT (checked above).
+ let ptr =
+ unsafe { super::allocator::krealloc_aligned(core::ptr::null_mut(), layout, flags) };
+ if ptr.is_null() {
+ return Err(AllocError);
+ }
+
+ ptr.cast::<MaybeUninit<T>>()
+ };
+
+ // SAFETY: For non-zero-sized types, we allocate above using the global allocator. For
+ // zero-sized types, we use `NonNull::dangling`.
+ Ok(unsafe { Box::from_raw(ptr) })
+ }
+}
diff --git a/rust/kernel/alloc/vec_ext.rs b/rust/kernel/alloc/vec_ext.rs
new file mode 100644
index 000000000000..1297a4be32e8
--- /dev/null
+++ b/rust/kernel/alloc/vec_ext.rs
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Extensions to [`Vec`] for fallible allocations.
+
+use super::{AllocError, Flags};
+use alloc::vec::Vec;
+
+/// Extensions to [`Vec`].
+pub trait VecExt<T>: Sized {
+ /// Creates a new [`Vec`] instance with at least the given capacity.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let v = Vec::<u32>::with_capacity(20, GFP_KERNEL)?;
+ ///
+ /// assert!(v.capacity() >= 20);
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn with_capacity(capacity: usize, flags: Flags) -> Result<Self, AllocError>;
+
+ /// Appends an element to the back of the [`Vec`] instance.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = Vec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1]);
+ ///
+ /// v.push(2, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 2]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError>;
+
+ /// Pushes clones of the elements of slice into the [`Vec`] instance.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = Vec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ ///
+ /// v.extend_from_slice(&[20, 30, 40], GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 20, 30, 40]);
+ ///
+ /// v.extend_from_slice(&[50, 60], GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 20, 30, 40, 50, 60]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn extend_from_slice(&mut self, other: &[T], flags: Flags) -> Result<(), AllocError>
+ where
+ T: Clone;
+
+ /// Ensures that the capacity exceeds the length by at least `additional` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = Vec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ ///
+ /// v.reserve(10, GFP_KERNEL)?;
+ /// let cap = v.capacity();
+ /// assert!(cap >= 10);
+ ///
+ /// v.reserve(10, GFP_KERNEL)?;
+ /// let new_cap = v.capacity();
+ /// assert_eq!(new_cap, cap);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn reserve(&mut self, additional: usize, flags: Flags) -> Result<(), AllocError>;
+}
+
+impl<T> VecExt<T> for Vec<T> {
+ fn with_capacity(capacity: usize, flags: Flags) -> Result<Self, AllocError> {
+ let mut v = Vec::new();
+ <Self as VecExt<_>>::reserve(&mut v, capacity, flags)?;
+ Ok(v)
+ }
+
+ fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError> {
+ <Self as VecExt<_>>::reserve(self, 1, flags)?;
+ let s = self.spare_capacity_mut();
+ s[0].write(v);
+
+ // SAFETY: We just initialised the first spare entry, so it is safe to increase the length
+ // by 1. We also know that the new length is <= capacity because of the previous call to
+ // `reserve` above.
+ unsafe { self.set_len(self.len() + 1) };
+ Ok(())
+ }
+
+ fn extend_from_slice(&mut self, other: &[T], flags: Flags) -> Result<(), AllocError>
+ where
+ T: Clone,
+ {
+ <Self as VecExt<_>>::reserve(self, other.len(), flags)?;
+ for (slot, item) in core::iter::zip(self.spare_capacity_mut(), other) {
+ slot.write(item.clone());
+ }
+
+ // SAFETY: We just initialised the `other.len()` spare entries, so it is safe to increase
+ // the length by the same amount. We also know that the new length is <= capacity because
+ // of the previous call to `reserve` above.
+ unsafe { self.set_len(self.len() + other.len()) };
+ Ok(())
+ }
+
+ #[cfg(any(test, testlib))]
+ fn reserve(&mut self, additional: usize, _flags: Flags) -> Result<(), AllocError> {
+ Vec::reserve(self, additional);
+ Ok(())
+ }
+
+ #[cfg(not(any(test, testlib)))]
+ fn reserve(&mut self, additional: usize, flags: Flags) -> Result<(), AllocError> {
+ let len = self.len();
+ let cap = self.capacity();
+
+ if cap - len >= additional {
+ return Ok(());
+ }
+
+ if core::mem::size_of::<T>() == 0 {
+ // The capacity is already `usize::MAX` for SZTs, we can't go higher.
+ return Err(AllocError);
+ }
+
+ // We know cap is <= `isize::MAX` because `Layout::array` fails if the resulting byte size
+ // is greater than `isize::MAX`. So the multiplication by two won't overflow.
+ let new_cap = core::cmp::max(cap * 2, len.checked_add(additional).ok_or(AllocError)?);
+ let layout = core::alloc::Layout::array::<T>(new_cap).map_err(|_| AllocError)?;
+
+ let (old_ptr, len, cap) = destructure(self);
+
+ // We need to make sure that `ptr` is either NULL or comes from a previous call to
+ // `krealloc_aligned`. A `Vec<T>`'s `ptr` value is not guaranteed to be NULL and might be
+ // dangling after being created with `Vec::new`. Instead, we can rely on `Vec<T>`'s capacity
+ // to be zero if no memory has been allocated yet.
+ let ptr = if cap == 0 {
+ core::ptr::null_mut()
+ } else {
+ old_ptr
+ };
+
+ // SAFETY: `ptr` is valid because it's either NULL or comes from a previous call to
+ // `krealloc_aligned`. We also verified that the type is not a ZST.
+ let new_ptr = unsafe { super::allocator::krealloc_aligned(ptr.cast(), layout, flags) };
+ if new_ptr.is_null() {
+ // SAFETY: We are just rebuilding the existing `Vec` with no changes.
+ unsafe { rebuild(self, old_ptr, len, cap) };
+ Err(AllocError)
+ } else {
+ // SAFETY: `ptr` has been reallocated with the layout for `new_cap` elements. New cap
+ // is greater than `cap`, so it continues to be >= `len`.
+ unsafe { rebuild(self, new_ptr.cast::<T>(), len, new_cap) };
+ Ok(())
+ }
+ }
+}
+
+#[cfg(not(any(test, testlib)))]
+fn destructure<T>(v: &mut Vec<T>) -> (*mut T, usize, usize) {
+ let mut tmp = Vec::new();
+ core::mem::swap(&mut tmp, v);
+ let mut tmp = core::mem::ManuallyDrop::new(tmp);
+ let len = tmp.len();
+ let cap = tmp.capacity();
+ (tmp.as_mut_ptr(), len, cap)
+}
+
+/// Rebuilds a `Vec` from a pointer, length, and capacity.
+///
+/// # Safety
+///
+/// The same as [`Vec::from_raw_parts`].
+#[cfg(not(any(test, testlib)))]
+unsafe fn rebuild<T>(v: &mut Vec<T>, ptr: *mut T, len: usize, cap: usize) {
+ // SAFETY: The safety requirements from this function satisfy those of `from_raw_parts`.
+ let mut tmp = unsafe { Vec::from_raw_parts(ptr, len, cap) };
+ core::mem::swap(&mut tmp, v);
+}
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
new file mode 100644
index 000000000000..150f710efe5b
--- /dev/null
+++ b/rust/kernel/block.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Types for working with the block layer.
+
+pub mod mq;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
new file mode 100644
index 000000000000..fb0f393c1cea
--- /dev/null
+++ b/rust/kernel/block/mq.rs
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides types for implementing block drivers that interface the
+//! blk-mq subsystem.
+//!
+//! To implement a block device driver, a Rust module must do the following:
+//!
+//! - Implement [`Operations`] for a type `T`.
+//! - Create a [`TagSet<T>`].
+//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`].
+//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in
+//! the `TagSet` reference.
+//!
+//! The types available in this module that have direct C counterparts are:
+//!
+//! - The [`TagSet`] type that abstracts the C type `struct tag_set`.
+//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`.
+//! - The [`Request`] type that abstracts the C type `struct request`.
+//!
+//! The kernel will interface with the block device driver by calling the method
+//! implementations of the `Operations` trait.
+//!
+//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`]
+//! instances. The `Request` type is a wrapper around the C `struct request`.
+//! The driver must mark end of processing by calling one of the
+//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout
+//! errors. Please note that the C function `blk_mq_start_request` is implicitly
+//! called when the request is queued with the driver.
+//!
+//! The `TagSet` is responsible for creating and maintaining a mapping between
+//! `Request`s and integer ids as well as carrying a pointer to the vtable
+//! generated by `Operations`. This mapping is useful for associating
+//! completions from hardware with the correct `Request` instance. The `TagSet`
+//! determines the maximum queue depth by setting the number of `Request`
+//! instances available to the driver, and it determines the number of queues to
+//! instantiate for the driver. If possible, a driver should allocate one queue
+//! per core, to keep queue data local to a core.
+//!
+//! One `TagSet` instance can be shared between multiple `GenDisk` instances.
+//! This can be useful when implementing drivers where one piece of hardware
+//! with one set of IO resources are represented to the user as multiple disks.
+//!
+//! One significant difference between block device drivers implemented with
+//! these Rust abstractions and drivers implemented in C, is that the Rust
+//! drivers have to own a reference count on the `Request` type when the IO is
+//! in flight. This is to ensure that the C `struct request` instances backing
+//! the Rust `Request` instances are live while the Rust driver holds a
+//! reference to the `Request`. In addition, the conversion of an integer tag to
+//! a `Request` via the `TagSet` would not be sound without this bookkeeping.
+//!
+//! [`GenDisk`]: gen_disk::GenDisk
+//! [`GenDisk<T>`]: gen_disk::GenDisk
+//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder
+//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build
+//!
+//! # Example
+//!
+//! ```rust
+//! use kernel::{
+//! alloc::flags,
+//! block::mq::*,
+//! new_mutex,
+//! prelude::*,
+//! sync::{Arc, Mutex},
+//! types::{ARef, ForeignOwnable},
+//! };
+//!
+//! struct MyBlkDevice;
+//!
+//! #[vtable]
+//! impl Operations for MyBlkDevice {
+//!
+//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! Request::end_ok(rq);
+//! Ok(())
+//! }
+//!
+//! fn commit_rqs() {}
+//! }
+//!
+//! let tagset: Arc<TagSet<MyBlkDevice>> =
+//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+//! let mut disk = gen_disk::GenDiskBuilder::new()
+//! .capacity_sectors(4096)
+//! .build(format_args!("myblk"), tagset)?;
+//!
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+
+pub mod gen_disk;
+mod operations;
+mod raw_writer;
+mod request;
+mod tag_set;
+
+pub use operations::Operations;
+pub use request::Request;
+pub use tag_set::TagSet;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
new file mode 100644
index 000000000000..f548a6199847
--- /dev/null
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic disk abstraction.
+//!
+//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
+//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
+
+use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
+use crate::error;
+use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
+use core::fmt::{self, Write};
+
+/// A builder for [`GenDisk`].
+///
+/// Use this struct to configure and add new [`GenDisk`] to the VFS.
+pub struct GenDiskBuilder {
+ rotational: bool,
+ logical_block_size: u32,
+ physical_block_size: u32,
+ capacity_sectors: u64,
+}
+
+impl Default for GenDiskBuilder {
+ fn default() -> Self {
+ Self {
+ rotational: false,
+ logical_block_size: bindings::PAGE_SIZE as u32,
+ physical_block_size: bindings::PAGE_SIZE as u32,
+ capacity_sectors: 0,
+ }
+ }
+}
+
+impl GenDiskBuilder {
+ /// Create a new instance.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Set the rotational media attribute for the device to be built.
+ pub fn rotational(mut self, rotational: bool) -> Self {
+ self.rotational = rotational;
+ self
+ }
+
+ /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
+ /// and that it is a power of two.
+ fn validate_block_size(size: u32) -> Result<()> {
+ if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
+ Err(error::code::EINVAL)
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Set the logical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit the storage device can address. It is
+ /// typically 4096 bytes.
+ pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.logical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the physical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit a physical storage device can write
+ /// atomically. It is usually the same as the logical block size but may be
+ /// bigger. One example is SATA drives with 4096 byte physical block size
+ /// that expose a 512 byte logical block size to the operating system.
+ pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.physical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the capacity of the device to be built, in sectors (512 bytes).
+ pub fn capacity_sectors(mut self, capacity: u64) -> Self {
+ self.capacity_sectors = capacity;
+ self
+ }
+
+ /// Build a new `GenDisk` and add it to the VFS.
+ pub fn build<T: Operations>(
+ self,
+ name: fmt::Arguments<'_>,
+ tagset: Arc<TagSet<T>>,
+ ) -> Result<GenDisk<T>> {
+ let lock_class_key = crate::sync::LockClassKey::new();
+
+ // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
+ let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
+
+ lim.logical_block_size = self.logical_block_size;
+ lim.physical_block_size = self.physical_block_size;
+ if self.rotational {
+ lim.features = bindings::BLK_FEAT_ROTATIONAL;
+ }
+
+ // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
+ let gendisk = from_err_ptr(unsafe {
+ bindings::__blk_mq_alloc_disk(
+ tagset.raw_tag_set(),
+ &mut lim,
+ core::ptr::null_mut(),
+ lock_class_key.as_ptr(),
+ )
+ })?;
+
+ const TABLE: bindings::block_device_operations = bindings::block_device_operations {
+ submit_bio: None,
+ open: None,
+ release: None,
+ ioctl: None,
+ compat_ioctl: None,
+ check_events: None,
+ unlock_native_capacity: None,
+ getgeo: None,
+ set_read_only: None,
+ swap_slot_free_notify: None,
+ report_zones: None,
+ devnode: None,
+ alternative_gpt_sector: None,
+ get_unique_id: None,
+ // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
+ // be merged (unstable in rustc 1.78 which is staged for linux 6.10)
+ // https://github.com/rust-lang/rust/issues/119618
+ owner: core::ptr::null_mut(),
+ pr_ops: core::ptr::null_mut(),
+ free_disk: None,
+ poll_bio: None,
+ };
+
+ // SAFETY: `gendisk` is a valid pointer as we initialized it above
+ unsafe { (*gendisk).fops = &TABLE };
+
+ let mut raw_writer = RawWriter::from_array(
+ // SAFETY: `gendisk` points to a valid and initialized instance. We
+ // have exclusive access, since the disk is not added to the VFS
+ // yet.
+ unsafe { &mut (*gendisk).disk_name },
+ )?;
+ raw_writer.write_fmt(name)?;
+ raw_writer.write_char('\0')?;
+
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`. `set_capacity` takes a lock to synchronize this
+ // operation, so we will not race.
+ unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
+
+ crate::error::to_result(
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`.
+ unsafe {
+ bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
+ },
+ )?;
+
+ // INVARIANT: `gendisk` was initialized above.
+ // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ Ok(GenDisk {
+ _tagset: tagset,
+ gendisk,
+ })
+ }
+}
+
+/// A generic block device.
+///
+/// # Invariants
+///
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+pub struct GenDisk<T: Operations> {
+ _tagset: Arc<TagSet<T>>,
+ gendisk: *mut bindings::gendisk,
+}
+
+// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
+// `TagSet` It is safe to send this to other threads as long as T is Send.
+unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
+
+impl<T: Operations> Drop for GenDisk<T> {
+ fn drop(&mut self) {
+ // SAFETY: By type invariant, `self.gendisk` points to a valid and
+ // initialized instance of `struct gendisk`, and it was previously added
+ // to the VFS.
+ unsafe { bindings::del_gendisk(self.gendisk) };
+ }
+}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
new file mode 100644
index 000000000000..9ba7fdfeb4b2
--- /dev/null
+++ b/rust/kernel/block/mq/operations.rs