// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/miscdevice.h>
#include <linux/node.h>
#include <linux/pagemap.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <asm/sgx.h>
#include "driver.h"
#include "encl.h"
#include "encls.h"
struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
static int sgx_nr_epc_sections;
static struct task_struct *ksgxd_tsk;
static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
static DEFINE_XARRAY(sgx_epc_address_space);
/*
* These variables are part of the state of the reclaimer, and must be accessed
* with sgx_reclaimer_lock acquired.
*/
static LIST_HEAD(sgx_active_page_list);
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
/* Nodes with one or more EPC sections. */
static nodemask_t sgx_numa_mask;
/*
* Array with one list_head for each possible NUMA node. Each
* list contains all the sgx_epc_section's which are on that
* node.
*/
static struct sgx_numa_node *sgx_numa_nodes;
static LIST_HEAD(sgx_dirty_page_list);
/*
* Reset post-kexec EPC pages to the uninitialized state. The pages are removed
* from the input list, and made available for the page allocator. SECS pages
* prepending their children in the input list are left intact.
*/
static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
{
struct sgx_epc_page *page;
LIST_HEAD(dirty);
int ret;
/* dirty_page_list is thread-local, no need for a lock: */
while (!list_empty(dirty_page_list)) {
if (kthread_should_stop())
return;
page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
/*
* Checking page->poison without holding the node->lock
* is racy, but losing the race (i.e. poison is set just
* after the check) just means __eremove() will be uselessly
* called for a page that sgx_free_epc_page() will put onto
* the node->sgx_poison_page_list later.
*/
if (page->poison) {
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
struct sgx_numa_node *node = section->node;
spin_lock(&node->lock);
list_move(&page->list, &node->sgx_poison_page_list);
spin_unlock(&node->lock);
continue;
}
ret = __eremove(sgx_get_epc_virt_addr(page));
if (!ret) {
/*
* page is now sanitized. Make it available via the SGX
* page allocator:
*/
list_del(&page->list);
sgx_free_epc_page(page);
} else {
/* The page is not yet clean - move to the dirty list. */
list_move_tail(&page->list, &dirty);
}
cond_resched();
}
list_splice(&dirty, dirty_page_list);
}
static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
{
struct sgx_encl_page *page = epc_page->owner;
struct sgx_encl *encl = page->encl;
struct sgx_encl_mm *encl_mm;
bool ret = true;
int idx;
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
mmap_read_lock(encl_mm->mm);
ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page);
mmap_read_unlock(encl_mm->mm);
mmput_async(encl_mm->mm);
if (!ret)
break;
}
srcu_read_unlock(&encl->srcu, idx);
if (!ret)
return false;
return true;
}
static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
{
struct sgx_encl_page *page = epc_page->owner;
unsigned long addr = page->desc & PAGE_MASK;
struct sgx_encl *encl = page->encl;
unsigned long mm_list_version;
struct sgx_encl_mm *encl_mm;
struct vm_area_struct *vma;
int idx, ret;
do {
mm_list_version = encl->mm_list_version;
/* Pairs with smp_rmb() in sgx_encl_mm_add(). */
smp_rmb();
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
mmap_read_lock(encl_mm->mm);
ret = sgx_encl_find(encl_mm->mm, addr, &vma);
if (!ret && encl == vma->vm_private_data)
zap_vma_ptes(vma, addr, PAGE_SIZE);
mmap_read_unlock(encl_mm->mm);
mmput_async(encl_mm->mm);
}
srcu_read_unlock(&encl->srcu, idx);
} while (unlikely(encl->mm_list_version != mm_list_version));
mutex_lock(&encl->lock);
ret = __eblock(sgx_get_epc_virt_addr(epc_page));
if (encls_failed(ret))
ENCLS_WARN(ret, "EBLOCK");
mutex_unlock(&encl->lock);
}
static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
struct sgx_backing *backing)
{
struct sgx_pageinfo pginfo;
int ret;
pginfo.addr = 0;
pginfo.secs = 0;
pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
backing->pcmd_offset;
ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
kunmap_atomic((void *)(unsigned