summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/debug/debug_core.c11
-rw-r--r--kernel/dma/swiotlb.c310
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c30
-rw-r--r--kernel/groups.c7
-rw-r--r--kernel/kexec_internal.h2
-rw-r--r--kernel/locking/rtmutex.c4
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/locking/semaphore.c2
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/membarrier.c2
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c20
-rw-r--r--kernel/trace/error_report-traces.c11
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events_synth.c4
24 files changed, 308 insertions, 156 deletions
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 5b3f01da172b..60739d5e3373 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -84,7 +84,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
dentry = kern_path_locked(pathname, &path);
if (IS_ERR(dentry))
- return (void *)dentry; /* returning an error */
+ return ERR_CAST(dentry); /* returning an error */
inode = path.dentry->d_inode;
inode_unlock(inode);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index b636d517c02c..4708aec492df 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -455,6 +455,17 @@ setundefined:
return 0;
}
+void kgdb_free_init_mem(void)
+{
+ int i;
+
+ /* Clear init memory breakpoints. */
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (init_section_contains((void *)kgdb_break[i].bpt_addr, 0))
+ kgdb_break[i].state = BP_UNDEFINED;
+ }
+}
+
#ifdef CONFIG_KGDB_KDB
void kdb_dump_stack_on_cpu(int cpu)
{
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..c10e855a03bc 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,9 +50,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>
-#define OFFSET(val,align) ((unsigned long) \
- ( (val) & ( (align) - 1)))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
@@ -103,6 +100,11 @@ static unsigned int max_segment;
static phys_addr_t *io_tlb_orig_addr;
/*
+ * The mapped buffer's size should be validated during a sync operation.
+ */
+static size_t *io_tlb_orig_size;
+
+/*
* Protect the above data structures in the map and unmap calls
*/
static DEFINE_SPINLOCK(io_tlb_lock);
@@ -171,7 +173,7 @@ void __init swiotlb_adjust_size(unsigned long new_size)
* adjust/expand SWIOTLB size for their use.
*/
if (!io_tlb_nslabs) {
- size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
+ size = ALIGN(new_size, IO_TLB_SIZE);
io_tlb_nslabs = size >> IO_TLB_SHIFT;
io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
@@ -192,6 +194,16 @@ void swiotlb_print_info(void)
bytes >> 20);
}
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+ return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+ return DIV_ROUND_UP(val, IO_TLB_SIZE);
+}
+
/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
@@ -240,9 +252,16 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
+ alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
+ io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!io_tlb_orig_size)
+ panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
io_tlb_index = 0;
no_iotlb_memory = false;
@@ -363,7 +382,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
* between io_tlb_start and io_tlb_end.
*/
io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
+ get_order(io_tlb_nslabs * sizeof(int)));
if (!io_tlb_list)
goto cleanup3;
@@ -374,9 +393,18 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!io_tlb_orig_addr)
goto cleanup4;
+ io_tlb_orig_size = (size_t *)
+ __get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs *
+ sizeof(size_t)));
+ if (!io_tlb_orig_size)
+ goto cleanup5;
+
+
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
io_tlb_index = 0;
no_iotlb_memory = false;
@@ -389,6 +417,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return 0;
+cleanup5:
+ free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+ sizeof(phys_addr_t)));
+
cleanup4:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
@@ -404,6 +436,8 @@ void __init swiotlb_exit(void)
return;
if (late_alloc) {
+ free_pages((unsigned long)io_tlb_orig_size,
+ get_order(io_tlb_nslabs * sizeof(size_t)));
free_pages((unsigned long)io_tlb_orig_addr,
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -413,6 +447,8 @@ void __init swiotlb_exit(void)
} else {
memblock_free_late(__pa(io_tlb_orig_addr),
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+ memblock_free_late(__pa(io_tlb_orig_size),
+ PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
memblock_free_late(__pa(io_tlb_list),
PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
memblock_free_late(io_tlb_start,
@@ -461,79 +497,71 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
- unsigned long flags;
- phys_addr_t tlb_addr;
- unsigned int nslots, stride, index, wrap;
- int i;
- unsigned long mask;
- unsigned long offset_slots;
- unsigned long max_slots;
- unsigned long tmp_io_tlb_used;
-
- if (no_iotlb_memory)
- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
- if (mem_encrypt_active())
- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
- if (mapping_size > alloc_size) {
- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
- mapping_size, alloc_size);
- return (phys_addr_t)DMA_MAPPING_ERROR;
- }
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
- mask = dma_get_seg_boundary(hwdev);
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+ if (boundary_mask == ~0UL)
+ return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ return nr_slots(boundary_mask + 1);
+}
- tbl_dma_addr &= mask;
+static unsigned int wrap_index(unsigned int index)
+{
+ if (index >= io_tlb_nslabs)
+ return 0;
+ return index;
+}
- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size)
+{
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ dma_addr_t tbl_dma_addr =
+ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ unsigned long max_slots = get_max_slots(boundary_mask);
+ unsigned int iotlb_align_mask =
+ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ unsigned int nslots = nr_slots(alloc_size), stride;
+ unsigned int index, wrap, count = 0, i;
+ unsigned long flags;
- /*
- * Carefully handle integer overflow which can occur when mask == ~0UL.
- */
- max_slots = mask + 1
- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ BUG_ON(!nslots);
/*
- * For mappings greater than or equal to a page, we limit the stride
- * (and hence alignment) to a page size.
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one. For allocations of
+ * PAGE_SIZE or larger only look for page aligned allocations.
*/
- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
- BUG_ON(!nslots);
+ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
- /*
- * Find suitable number of IO TLB entries size that will fit this
- * request and allocate a buffer from that IO TLB pool.
- */
spin_lock_irqsave(&io_tlb_lock, flags);
-
if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
goto not_found;
- index = ALIGN(io_tlb_index, stride);
- if (index >= io_tlb_nslabs)
- index = 0;
- wrap = index;
-
+ index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
do {
- while (iommu_is_span_boundary(index, nslots, offset_slots,
- max_slots)) {
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- if (index == wrap)
- goto not_found;
+ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask)) {
+ index = wrap_index(index + 1);
+ continue;
}
/*
@@ -541,55 +569,96 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
* contiguous buffers, we allocate the buffers from that slot
* and mark the entries as '0' indicating unavailable.
*/
- if (io_tlb_list[index] >= nslots) {
- int count = 0;
-
- for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in the next
- * round.
- */
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
-
- goto found;
+ if (!iommu_is_span_boundary(index, nslots,
+ nr_slots(tbl_dma_addr),
+ max_slots)) {
+ if (io_tlb_list[index] >= nslots)
+ goto found;
}
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
+ index = wrap_index(index + stride);
} while (index != wrap);
not_found:
- tmp_io_tlb_used = io_tlb_used;
-
spin_unlock_irqrestore(&io_tlb_lock, flags);
- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
- return (phys_addr_t)DMA_MAPPING_ERROR;
+ return -1;
+
found:
+ for (i = index; i < index + nslots; i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+ io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+
+ /*
+ * Update the indices to avoid searching in the next round.
+ */
+ if (index + nslots < io_tlb_nslabs)
+ io_tlb_index = index + nslots;
+ else
+ io_tlb_index = 0;
io_tlb_used += nslots;
+
spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int index, i;
+ phys_addr_t tlb_addr;
+
+ if (no_iotlb_memory)
+ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+ if (mem_encrypt_active())
+ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+ if (mapping_size > alloc_size) {
+ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ mapping_size, alloc_size);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ index = find_slots(dev, orig_addr, alloc_size + offset);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+ alloc_size, io_tlb_nslabs, io_tlb_used);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
/*
* Save away the mapping from the original address to the DMA address.
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+ for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+ io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+ }
+ tlb_addr = slot_addr(io_tlb_start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
return tlb_addr;
}
+static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
+{
+ if (*size > orig_size) {
+ /* Warn and truncate mapping_size */
+ dev_WARN_ONCE(hwdev, 1,
+ "Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
+ orig_size, *size);
+ *size = orig_size;
+ }
+}
+
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
@@ -598,10 +667,13 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long flags;
- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+ int i, count, nslots = nr_slots(alloc_size + offset);
+ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+
/*
* First, sync the memory before unmapping the entry
*/
@@ -617,26 +689,30 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
* with slots below and above the pool being returned.
*/
spin_lock_irqsave(&io_tlb_lock, flags);
- {
- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the
- * slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- }
- /*
- * Step 2: merge the returned slots with the preceding slots,
- * if available (non zero)
- */
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+ count = io_tlb_list[index + nslots];
+ else
+ count = 0;
- io_tlb_used -= nslots;
+ /*
+ * Step 1: return the slots to the free list, merging the slots with
+ * superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--) {
+ io_tlb_list[i] = ++count;
+ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
+
+ /*
+ * Step 2: merge the returned slots with the preceding slots, if
+ * available (non zero)
+ */
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ io_tlb_used -= nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
@@ -645,11 +721,13 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_sync_target target)
{
int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ size_t orig_size = io_tlb_orig_size[index];
phys_addr_t orig_addr = io_tlb_orig_addr[index];
if (orig_addr == INVALID_PHYS_ADDR)
return;
- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+ validate_sync_size_and_truncate(hwdev, orig_size, &size);
switch (target) {
case SYNC_FOR_CPU:
@@ -707,7 +785,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
size_t swiotlb_max_mapping_size(struct device *dev)
{
- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
}
bool is_swiotlb_active(void)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 129dee540a8b..0aeca5f3c0ac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -269,7 +269,7 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
if (!event->parent) {
/*
* If this is a !child event, we must hold ctx::mutex to
- * stabilize the the event->ctx relation. See
+ * stabilize the event->ctx relation. See
* perf_event_ctx_lock().
*/
lockdep_assert_held(&ctx->mutex);
@@ -1303,7 +1303,7 @@ static void put_ctx(struct perf_event_context *ctx)
* life-time rules separate them. That is an exiting task cannot fork, and a
* spawning task cannot (yet) exit.
*
- * But remember that that these are parent<->child context relations, and
+ * But remember that these are parent<->child context relations, and
* migration does not affect children, therefore these two orderings should not
* interact.
*
@@ -1442,7 +1442,7 @@ static u64 primary_event_id(struct perf_event *event)
/*
* Get the perf_event_context for a task and lock it.
*
- * This has to cope with with the fact that until it is locked,
+ * This has to cope with the fact that until it is locked,
* the context could get moved to another task.
*/
static struct perf_event_context *
@@ -2486,7 +2486,7 @@ static void perf_set_shadow_time(struct perf_event *event,
* But this is a bit hairy.
*
* So instead, we have an explicit cgroup call to remain
- * within the time time source all along. We believe it
+ * within the time source all along. We believe it
* is cleaner and simpler to understand.
*/
if (is_cgroup_event(event))
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 3ea7f8f92f1d..6addc9780319 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1733,7 +1733,7 @@ void uprobe_free_utask(struct task_struct *t)
}
/*
- * Allocate a uprobe_task object for the task if if necessary.
+ * Allocate a uprobe_task object for the task if necessary.
* Called when the thread hits a breakpoint.
*
* Returns:
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..d3171e8e88e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ if (args->io_thread)
+ p->flags |= PF_IO_WORKER;
/*
* This _must_ happen before we call free_task(), i.e. before we jump
@@ -2411,6 +2413,34 @@ struct mm_struct *copy_init_mm(void)
}
/*
+ * This is like kernel_clone(), but shaved down and tailored to just
+ * creating io_uring workers. It returns a created task, or an error pointer.
+ * The returned task is inactive, and the caller must fire it up through
+ * wake_up_new_task(p). All signals are blocked in the created task.
+ */
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
+{
+ unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+ CLONE_IO;
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
+ .stack = (unsigned long)fn,
+ .stack_size = (unsigned long)arg,
+ .io_thread = 1,
+ };
+ struct task_struct *tsk;
+
+ tsk = copy_process(NULL, 0, node, &args);
+ if (!IS_ERR(tsk)) {
+ sigfillset(&tsk->blocked);
+ sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+ }
+ return tsk;
+}
+
+/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
diff --git a/kernel/groups.c b/kernel/groups.c
index fe7e6385530e..787b381c7c00 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -15,12 +15,7 @@
struct group_info *groups_alloc(int gidsetsize)
{
struct group_info *gi;
- unsigned int len;
-
- len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
- gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
- if (!gi)
- gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
+ gi = kvmalloc(struct_size(gi, gid, gidsetsize), GFP_KERNEL_ACCOUNT);
if (!gi)
return NULL;
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..48aaf2ac0d0d 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -13,8 +13,6 @@ void kimage_terminate(struct kimage *image);
int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
-int machine_kexec_post_load(struct kimage *image);
-
extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 03b21135313c..48fff6437901 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1420,7 +1420,7 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
}
/*
- * Performs the wakeup of the the top-waiter and re-enables preemption.
+ * Performs the wakeup of the top-waiter and re-enables preemption.
*/
void rt_mutex_postunlock(struct wake_q_head *wake_q)
{
@@ -1819,7 +1819,7 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
- * Wait for the the lock acquisition started on our behalf by
+ * Wait for the lock acquisition started on our behalf by
* rt_mutex_start_proxy_lock(). Upon failure, the caller must call
* rt_mutex_cleanup_proxy_lock().
*
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index ba67600c7b2c..abba5df50006 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1048,7 +1048,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
/*
* If there were already threads queued before us and:
- * 1) there are no no active locks, wake the front
+ * 1) there are no active locks, wake the front
* queued process(es) as the handoff bit might be set.
* 2) there are no active writers and some readers, the lock
* must be read owned; so we try to wake any read lock
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index d9dd94defc0a..9aa855a96c4a 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(down_killable);
* @sem: the semaphore to be acquired
*
* Try to acquire the semaphore atomically. Returns 0 if the semaphore has
- * been acquired successfully or 1 if it it cannot be acquired.
+ * been acquired successfully or 1 if it cannot be acquired.
*
* NOTE: This return value is inverted from both spin_trylock and
* mutex_trylock! Be careful about this when converting code.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61db50f7ca86..821cf1723814 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
audit_ptrace(task);
retval = -EPERM;
- if (unlikely(task->flags & PF_KTHREAD))
+ if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
goto out;
if (same_thread_group(task, current))
goto out;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8a8bd7b13634..794c2cb945f8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5126,7 +5126,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
/*
* When a group wakes up we want to make sure that its quota is not already
* expired/exceeded, otherwise it may be allowed to steal additional ticks of
- * runtime as update_curr() throttling can not not trigger until it's on-rq.
+ * runtime as update_curr() throttling can not trigger until it's on-rq.
*/
static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
{
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 08ae45ad9261..acdae625c636 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -454,7 +454,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
/*
* For each cpu runqueue, if the task's mm match @mm, ensure that all
- * @mm's membarrier state set bits are also set in in the runqueue's
+ * @mm's membarrier state set bits are also set in the runqueue's
* membarrier state. This ensures that a runqueue scheduling
* between threads which are users of @mm has its membarrier state
* updated.
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ad8566534e7..ba4d1ef39a9e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -91,7 +91,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
return true;
/* Only allow kernel generated signals to this kthread */
- if (unlikely((t->flags & PF_KTHREAD) &&
+ if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
(handler == SIG_KTHREAD_KERNEL) && !force))
return true;
@@ -1096,7 +1096,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
/*
* Skip useless siginfo allocation for SIGKILL and kernel threads.
*/
- if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
+ if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER)))
goto out_set;
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c9fbdd848138..62fbd09b5dc1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2962,7 +2962,7 @@ static struct ctl_table vm_table[] = {
.data = &block_dump,
.maxlen = sizeof(block_dump),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
@@ -2970,7 +2970,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_vfs_cache_pressure,
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
@@ -2980,7 +2980,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_legacy_va_layout,
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
@@ -2990,7 +2990,7 @@ static struct ctl_table vm_table[] = {
.data = &node_reclaim_mode,
.maxlen = sizeof(node_reclaim_mode),