// SPDX-License-Identifier: GPL-2.0-only
/*
* kexec_handover.c - kexec handover metadata processing
* Copyright (C) 2023 Alexander Graf <graf@amazon.com>
* Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
* Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
* Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#define pr_fmt(fmt) "KHO: " fmt
#include <linux/cleanup.h>
#include <linux/cma.h>
#include <linux/kmemleak.h>
#include <linux/count_zeros.h>
#include <linux/kexec.h>
#include <linux/kexec_handover.h>
#include <linux/kho/abi/kexec_handover.h>
#include <linux/libfdt.h>
#include <linux/list.h>
#include <linux/memblock.h>
#include <linux/page-isolation.h>
#include <linux/unaligned.h>
#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
/*
* KHO is tightly coupled with mm init and needs access to some of mm
* internal APIs.
*/
#include "../../mm/internal.h"
#include "../kexec_internal.h"
#include "kexec_handover_internal.h"
/* The magic token for preserved pages */
#define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */
/*
* KHO uses page->private, which is an unsigned long, to store page metadata.
* Use it to store both the magic and the order.
*/
union kho_page_info {
unsigned long page_private;
struct {
unsigned int order;
unsigned int magic;
};
};
static_assert(sizeof(union kho_page_info) == sizeof(((struct page *)0)->private));
static bool kho_enable __ro_after_init = IS_ENABLED(CONFIG_KEXEC_HANDOVER_ENABLE_DEFAULT);
bool kho_is_enabled(void)
{
return kho_enable;
}
EXPORT_SYMBOL_GPL(kho_is_enabled);
static int __init kho_parse_enable(char *p)
{
return kstrtobool(p, &kho_enable);
}
early_param("kho", kho_parse_enable);
/*
* Keep track of memory that is to be preserved across KHO.
*
* The serializing side uses two levels of xarrays to manage chunks of per-order
* PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
* of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
* allocations each bitmap will cover 128M of address space. Thus, for 16G of
* memory at most 512K of bitmap memory will be needed for order 0.
*
* This approach is fully incremental, as the serialization progresses folios
* can continue be aggregated to the tracker. The final step, immediately prior
* to kexec would serialize the xarray information into a linked list for the
* successor kernel to parse.
*/
#define PRESERVE_BITS (PAGE_SIZE * 8)
struct kho_mem_phys_bits {
DECLARE_BITMAP(preserve, PRESERVE_BITS);
};
static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
struct kho_mem_phys {
/*
* Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
* to order.
*/
struct xarray phys_bits;
};
struct kho_mem_track {
/* Points to kho_mem_phys, each order gets its own bitmap tree */
struct xarray orders;
};
struct khoser_mem_chunk;
struct kho_out {
void *fdt;
bool finalized;
struct mutex lock; /* protects KHO FDT finalization */
struct kho_mem_track track;
struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
.lock = __MUTEX_INITIALIZER(kho_out.lock),
.track = {
.orders = XARRAY_INIT(kho_out.track.orders, 0),
},
.finalized = false,
};
static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
{
void *res = xa_load(xa, index);
if (res)
return res;
void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
return ERR_PTR(-EINVAL);
res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
if (xa_is_err(res))
return ERR_PTR(xa_err(res));
else if (res)
return res;
return no_free_ptr(elm);
}
static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
unsigned int order)
{
struct kho_mem_phys_bits *bits;
struct kho_mem_phys *physxa;
const unsigned long pfn_high = pfn >> order;
physxa = xa_load(&track->orders, order);
if (WARN_ON_ONCE(!physxa))
return;
bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
if (WARN_ON_ONCE(!bits))
return;
clear_bit(pfn_high % PRESERVE_BITS, bits-&g
|