summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma/Kconfig25
-rw-r--r--kernel/dma/Makefile1
-rw-r--r--kernel/dma/coherent.c25
-rw-r--r--kernel/dma/contiguous.c142
-rw-r--r--kernel/dma/debug.c19
-rw-r--r--kernel/dma/debug.h122
-rw-r--r--kernel/dma/direct.c268
-rw-r--r--kernel/dma/direct.h119
-rw-r--r--kernel/dma/dummy.c3
-rw-r--r--kernel/dma/mapping.c159
-rw-r--r--kernel/dma/ops_helpers.c85
-rw-r--r--kernel/dma/pool.c5
-rw-r--r--kernel/dma/swiotlb.c6
-rw-r--r--kernel/dma/virt.c4
14 files changed, 773 insertions, 210 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 847a9d1fa634..c99de4a21458 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -9,6 +9,7 @@ config HAS_DMA
default y
config DMA_OPS
+ depends on HAS_DMA
bool
#
@@ -43,6 +44,12 @@ config ARCH_HAS_DMA_SET_MASK
config ARCH_HAS_DMA_WRITE_COMBINE
bool
+#
+# Select if the architectures provides the arch_dma_mark_clean hook
+#
+config ARCH_HAS_DMA_MARK_CLEAN
+ bool
+
config DMA_DECLARE_COHERENT
bool
@@ -68,9 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
-config DMA_NONCOHERENT_CACHE_SYNC
- bool
-
config DMA_VIRT_OPS
bool
depends on HAS_DMA
@@ -114,10 +118,21 @@ config DMA_CMA
You can disable CMA by specifying "cma=0" on the kernel's command
line.
- For more information see <include/linux/dma-contiguous.h>.
+ For more information see <kernel/dma/contiguous.c>.
If unsure, say "n".
if DMA_CMA
+
+config DMA_PERNUMA_CMA
+ bool "Enable separate DMA Contiguous Memory Area for each NUMA Node"
+ default NUMA && ARM64
+ help
+ Enable this option to get pernuma CMA areas so that devices like
+ ARM64 SMMU can get local memory by DMA coherent APIs.
+
+ You can set the size of pernuma CMA by specifying "cma_pernuma=size"
+ on the kernel's command line.
+
comment "Default contiguous memory area size:"
config CMA_SIZE_MBYTES
@@ -162,7 +177,7 @@ endchoice
config CMA_ALIGNMENT
int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
- range 4 12
+ range 2 12
default 8
help
DMA mapping framework by default aligns all buffers to the smallest
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 32c7c1942bbd..dc755ab68aab 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_HAS_DMA) += mapping.o direct.o
+obj-$(CONFIG_DMA_OPS) += ops_helpers.o
obj-$(CONFIG_DMA_OPS) += dummy.o
obj-$(CONFIG_DMA_CMA) += contiguous.o
obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 2a0c4985f38e..5b5b6c7ec7f2 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -7,7 +7,8 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
struct dma_coherent_mem {
void *virt_base;
@@ -32,9 +33,8 @@ static inline dma_addr_t dma_get_device_base(struct device *dev,
struct dma_coherent_mem * mem)
{
if (mem->use_dev_dma_pfn_offset)
- return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
- else
- return mem->device_base;
+ return phys_to_dma(dev, PFN_PHYS(mem->pfn_base));
+ return mem->device_base;
}
static int dma_init_coherent_memory(phys_addr_t phys_addr,
@@ -107,6 +107,23 @@ static int dma_assign_coherent_memory(struct device *dev,
return 0;
}
+/*
+ * Declare a region of memory to be handed out by dma_alloc_coherent() when it
+ * is asked for coherent memory for this device. This shall only be used
+ * from platform code, usually based on the device tree description.
+ *
+ * phys_addr is the CPU physical address to which the memory is currently
+ * assigned (this will be ioremapped so the CPU can access the region).
+ *
+ * device_addr is the DMA address the device needs to be programmed with to
+ * actually address this memory (this will be handed out as the dma_addr_t in
+ * dma_alloc_coherent()).
+ *
+ * size is the size of the area (must be a multiple of PAGE_SIZE).
+ *
+ * As a simplification for the platforms, only *one* such region of memory may
+ * be declared per device.
+ */
int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
dma_addr_t device_addr, size_t size)
{
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 0369fd5fda8f..16b95ff12e4d 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -5,6 +5,34 @@
* Written by:
* Marek Szyprowski <m.szyprowski@samsung.com>
* Michal Nazarewicz <mina86@mina86.com>
+ *
+ * Contiguous Memory Allocator
+ *
+ * The Contiguous Memory Allocator (CMA) makes it possible to
+ * allocate big contiguous chunks of memory after the system has
+ * booted.
+ *
+ * Why is it needed?
+ *
+ * Various devices on embedded systems have no scatter-getter and/or
+ * IO map support and require contiguous blocks of memory to
+ * operate. They include devices such as cameras, hardware video
+ * coders, etc.
+ *
+ * Such devices often require big memory buffers (a full HD frame
+ * is, for instance, more then 2 mega pixels large, i.e. more than 6
+ * MB of memory), which makes mechanisms such as kmalloc() or
+ * alloc_page() ineffective.
+ *
+ * At the same time, a solution where a big memory region is
+ * reserved for a device is suboptimal since often more memory is
+ * reserved then strictly required and, moreover, the memory is
+ * inaccessible to page system even if device drivers don't use it.
+ *
+ * CMA tries to solve this issue by operating on memory regions
+ * where only movable pages can be allocated from. This way, kernel
+ * can use the memory for pagecache and when device driver requests
+ * it, allocated pages can be migrated.
*/
#define pr_fmt(fmt) "cma: " fmt
@@ -16,12 +44,11 @@
#endif
#include <asm/page.h>
-#include <asm/dma-contiguous.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/sizes.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/cma.h>
#ifdef CONFIG_CMA_SIZE_MBYTES
@@ -69,6 +96,19 @@ static int __init early_cma(char *p)
}
early_param("cma", early_cma);
+#ifdef CONFIG_DMA_PERNUMA_CMA
+
+static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES];
+static phys_addr_t pernuma_size_bytes __initdata;
+
+static int __init early_cma_pernuma(char *p)
+{
+ pernuma_size_bytes = memparse(p, &p);
+ return 0;
+}
+early_param("cma_pernuma", early_cma_pernuma);
+#endif
+
#ifdef CONFIG_CMA_SIZE_PERCENTAGE
static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
@@ -87,6 +127,34 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
#endif
+#ifdef CONFIG_DMA_PERNUMA_CMA
+void __init dma_pernuma_cma_reserve(void)
+{
+ int nid;
+
+ if (!pernuma_size_bytes)
+ return;
+
+ for_each_online_node(nid) {
+ int ret;
+ char name[CMA_MAX_NAME];
+ struct cma **cma = &dma_contiguous_pernuma_area[nid];
+
+ snprintf(name, sizeof(name), "pernuma%d", nid);
+ ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0,
+ 0, false, name, cma, nid);
+ if (ret) {
+ pr_warn("%s: reservation failed: err %d, node %d", __func__,
+ ret, nid);
+ continue;
+ }
+
+ pr_debug("%s: reserved %llu MiB on node %d\n", __func__,
+ (unsigned long long)pernuma_size_bytes / SZ_1M, nid);
+ }
+}
+#endif
+
/**
* dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
* @limit: End address of the reserved memory (optional, 0 for any).
@@ -134,6 +202,11 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
}
+void __weak
+dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+}
+
/**
* dma_contiguous_reserve_area() - reserve custom contiguous area
* @size: Size of the reserved area (in bytes),
@@ -219,23 +292,44 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp)
* @size: Requested allocation size.
* @gfp: Allocation flags.
*
- * This function allocates contiguous memory buffer for specified device. It
- * tries to use device specific contiguous memory area if available, or the
- * default global one.
+ * tries to use device specific contiguous memory area if available, or it
+ * tries to use per-numa cma, if the allocation fails, it will fallback to
+ * try default global one.
*
- * Note that it byapss one-page size of allocations from the global area as
- * the addresses within one page are always contiguous, so there is no need
- * to waste CMA pages for that kind; it also helps reduce fragmentations.
+ * Note that it bypass one-page size of allocations from the per-numa and
+ * global area as the addresses within one page are always contiguous, so
+ * there is no need to waste CMA pages for that kind; it also helps reduce
+ * fragmentations.
*/
struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
{
+#ifdef CONFIG_DMA_PERNUMA_CMA
+ int nid = dev_to_node(dev);
+#endif
+
/* CMA can be used only in the context which permits sleeping */
if (!gfpflags_allow_blocking(gfp))
return NULL;
if (dev->cma_area)
return cma_alloc_aligned(dev->cma_area, size, gfp);
- if (size <= PAGE_SIZE || !dma_contiguous_default_area)
+ if (size <= PAGE_SIZE)
return NULL;
+
+#ifdef CONFIG_DMA_PERNUMA_CMA
+ if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) {
+ struct cma *cma = dma_contiguous_pernuma_area[nid];
+ struct page *page;
+
+ if (cma) {
+ page = cma_alloc_aligned(cma, size, gfp);
+ if (page)
+ return page;
+ }
+ }
+#endif
+ if (!dma_contiguous_default_area)
+ return NULL;
+
return cma_alloc_aligned(dma_contiguous_default_area, size, gfp);
}
@@ -252,9 +346,27 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
*/
void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
{
- if (!cma_release(dev_get_cma_area(dev), page,
- PAGE_ALIGN(size) >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ /* if dev has its own cma, free page from there */
+ if (dev->cma_area) {
+ if (cma_release(dev->cma_area, page, count))
+ return;
+ } else {
+ /*
+ * otherwise, page is from either per-numa cma or default cma
+ */
+#ifdef CONFIG_DMA_PERNUMA_CMA
+ if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)],
+ page, count))
+ return;
+#endif
+ if (cma_release(dma_contiguous_default_area, page, count))
+ return;
+ }
+
+ /* not in any cma, free from buddy */
+ __free_pages(page, get_order(size));
}
/*
@@ -270,14 +382,14 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
{
- dev_set_cma_area(dev, rmem->priv);
+ dev->cma_area = rmem->priv;
return 0;
}
static void rmem_cma_device_release(struct reserved_mem *rmem,
struct device *dev)
{
- dev_set_cma_area(dev, NULL);
+ dev->cma_area = NULL;
}
static const struct reserved_mem_ops rmem_cma_ops = {
@@ -318,7 +430,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
dma_contiguous_early_fixup(rmem->base, rmem->size);
if (default_cma)
- dma_contiguous_set_default(cma);
+ dma_contiguous_default_area = cma;
rmem->ops = &rmem_cma_ops;
rmem->priv = cma;
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 8e9f7b301c6d..14de1271463f 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -9,10 +9,9 @@
#include <linux/sched/task_stack.h>
#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/sched/task.h>
#include <linux/stacktrace.h>
-#include <linux/dma-debug.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/debugfs.h>
@@ -24,8 +23,8 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
-
#include <asm/sections.h>
+#include "debug.h"
#define HASH_SIZE 16384ULL
#define HASH_FN_SHIFT 13
@@ -1219,7 +1218,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->dev = dev;
entry->type = dma_debug_single;
entry->pfn = page_to_pfn(page);
- entry->offset = offset,
+ entry->offset = offset;
entry->dev_addr = dma_addr;
entry->size = size;
entry->direction = direction;
@@ -1235,7 +1234,6 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
add_dma_entry(entry);
}
-EXPORT_SYMBOL(debug_dma_map_page);
void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
@@ -1290,7 +1288,6 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
return;
check_unmap(&ref);
}
-EXPORT_SYMBOL(debug_dma_unmap_page);
void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, int mapped_ents, int direction)
@@ -1310,7 +1307,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
entry->type = dma_debug_sg;
entry->dev = dev;
entry->pfn = page_to_pfn(sg_page(s));
- entry->offset = s->offset,
+ entry->offset = s->offset;
entry->size = sg_dma_len(s);
entry->dev_addr = sg_dma_address(s);
entry->direction = direction;
@@ -1328,7 +1325,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
add_dma_entry(entry);
}
}
-EXPORT_SYMBOL(debug_dma_map_sg);
static int get_nr_mapped_entries(struct device *dev,
struct dma_debug_entry *ref)
@@ -1380,7 +1376,6 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
check_unmap(&ref);
}
}
-EXPORT_SYMBOL(debug_dma_unmap_sg);
void debug_dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t dma_addr, void *virt)
@@ -1466,7 +1461,6 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
add_dma_entry(entry);
}
-EXPORT_SYMBOL(debug_dma_map_resource);
void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
@@ -1484,7 +1478,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
check_unmap(&ref);
}
-EXPORT_SYMBOL(debug_dma_unmap_resource);
void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
@@ -1503,7 +1496,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
check_sync(dev, &ref, true);
}
-EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
void debug_dma_sync_single_for_device(struct device *dev,
dma_addr_t dma_handle, size_t size,
@@ -1523,7 +1515,6 @@ void debug_dma_sync_single_for_device(struct device *dev,
check_sync(dev, &ref, false);
}
-EXPORT_SYMBOL(debug_dma_sync_single_for_device);
void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
@@ -1556,7 +1547,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
check_sync(dev, &ref, true);
}
}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
@@ -1588,7 +1578,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
check_sync(dev, &ref, false);
}
}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
static int __init dma_debug_driver_setup(char *str)
{
diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h
new file mode 100644
index 000000000000..83643b3010b2
--- /dev/null
+++ b/kernel/dma/debug.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ */
+
+#ifndef _KERNEL_DMA_DEBUG_H
+#define _KERNEL_DMA_DEBUG_H
+
+#ifdef CONFIG_DMA_API_DEBUG
+extern void debug_dma_map_page(struct device *dev, struct page *page,
+ size_t offset, size_t size,
+ int direction, dma_addr_t dma_addr);
+
+extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, int direction);
+
+extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int mapped_ents, int direction);
+
+extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir);
+
+extern void debug_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt);
+
+extern void debug_dma_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr);
+
+extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
+ size_t size, int direction,
+ dma_addr_t dma_addr);
+
+extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int direction);
+
+extern void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction);
+
+extern void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction);
+
+extern void debug_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction);
+
+extern void debug_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction);
+#else /* CONFIG_DMA_API_DEBUG */
+static inline void debug_dma_map_page(struct device *dev, struct page *page,
+ size_t offset, size_t size,
+ int direction, dma_addr_t dma_addr)
+{
+}
+
+static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int mapped_ents, int direction)
+{
+}
+
+static inline void debug_dma_unmap_sg(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems, int dir)
+{
+}
+
+static inline void debug_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt)
+{
+}
+
+static inline void debug_dma_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr)
+{
+}
+
+static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+}
+
+static inline void debug_dma_unmap_resource(struct device *dev,
+ dma_addr_t dma_addr, size_t size,
+ int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+
+static inline void debug_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+#endif /* CONFIG_DMA_API_DEBUG */
+#endif /* _KERNEL_DMA_DEBUG_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index db6ef07aec3b..b92d08e65999 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -1,18 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (C) 2018 Christoph Hellwig.
+ * Copyright (C) 2018-2020 Christoph Hellwig.
*
* DMA operations that map physical memory directly without using an IOMMU.
*/
#include <linux/memblock.h> /* for max_pfn */
#include <linux/export.h>
#include <linux/mm.h>
-#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
#include <linux/scatterlist.h>
-#include <linux/dma-contiguous.h>
#include <linux/pfn.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
+#include <linux/slab.h>
+#include "direct.h"
/*
* Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
@@ -25,7 +26,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
{
if (force_dma_unencrypted(dev))
- return __phys_to_dma(dev, phys);
+ return phys_to_dma_unencrypted(dev, phys);
return phys_to_dma(dev, phys);
}
@@ -48,11 +49,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
{
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
- if (force_dma_unencrypted(dev))
- *phys_limit = __dma_to_phys(dev, dma_limit);
- else
- *phys_limit = dma_to_phys(dev, dma_limit);
-
/*
* Optimistically try the zone that the physical address mask falls
* into first. If that returns memory that isn't actually addressable
@@ -61,6 +57,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
* Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
* zones.
*/
+ *phys_limit = dma_to_phys(dev, dma_limit);
if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
return GFP_DMA;
if (*phys_limit <= DMA_BIT_MASK(32))
@@ -70,45 +67,16 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
- return phys_to_dma_direct(dev, phys) + size - 1 <=
- min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
-}
-
-/*
- * Decrypting memory is allowed to block, so if this device requires
- * unencrypted memory it must come from atomic pools.
- */
-static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp,
- unsigned long attrs)
-{
- if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
- return false;
- if (gfpflags_allow_blocking(gfp))
- return false;
- if (force_dma_unencrypted(dev))
- return true;
- if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
- return false;
- if (dma_alloc_need_uncached(dev, attrs))
- return true;
- return false;
-}
+ dma_addr_t dma_addr = phys_to_dma_direct(dev, phys);
-static inline bool dma_should_free_from_pool(struct device *dev,
- unsigned long attrs)
-{
- if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
- return true;
- if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
- !force_dma_unencrypted(dev))
+ if (dma_addr == DMA_MAPPING_ERROR)
return false;
- if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
- return true;
- return false;
+ return dma_addr + size - 1 <=
+ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
}
static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
- gfp_t gfp, unsigned long attrs)
+ gfp_t gfp)
{
int node = dev_to_node(dev);
struct page *page = NULL;
@@ -116,11 +84,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
WARN_ON_ONCE(!PAGE_ALIGNED(size));
- if (attrs & DMA_ATTR_NO_WARN)
- gfp |= __GFP_NOWARN;
-
- /* we always manually zero the memory once we are done: */
- gfp &= ~__GFP_ZERO;
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
page = dma_alloc_contiguous(dev, size, gfp);
@@ -151,7 +114,23 @@ again:
return page;
}
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+static void *dma_direct_alloc_from_pool(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ struct page *page;
+ u64 phys_mask;
+ void *ret;
+
+ gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+ &phys_mask);
+ page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok);
+ if (!page)
+ return NULL;
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return ret;
+}
+
+void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct page *page;
@@ -159,35 +138,44 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
int err;
size = PAGE_ALIGN(size);
-
- if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
- u64 phys_mask;
-
- gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_mask);
- page = dma_alloc_from_pool(dev, size, &ret, gfp,
- dma_coherent_ok);
- if (!page)
- return NULL;
- goto done;
- }
-
- page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
- if (!page)
- return NULL;
+ if (attrs & DMA_ATTR_NO_WARN)
+ gfp |= __GFP_NOWARN;
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
+ if (!page)
+ return NULL;
/* remove any dirty cache lines on the kernel alias */
if (!PageHighMem(page))
arch_dma_prep_coherent(page, size);
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
/* return the page pointer as the opaque cookie */
- ret = page;
- goto done;
+ return page;
}
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ !dev_is_dma_coherent(dev))
+ return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
+
+ /*
+ * Remapping or decrypting memory may block. If either is required and
+ * we can't block, allocate the memory from the atomic pools.
+ */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ !gfpflags_allow_blocking(gfp) &&
+ (force_dma_unencrypted(dev) ||
+ (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev))))
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+ /* we always manually zero the memory once we are done */
+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
+ if (!page)
+ return NULL;
+
if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs)) ||
+ !dev_is_dma_coherent(dev)) ||
(IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
@@ -230,17 +218,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
memset(ret, 0, size);
if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- dma_alloc_need_uncached(dev, attrs)) {
+ !dev_is_dma_coherent(dev)) {
arch_dma_prep_coherent(page, size);
ret = arch_dma_set_uncached(ret, size);
if (IS_ERR(ret))
goto out_encrypt_pages;
}
done:
- if (force_dma_unencrypted(dev))
- *dma_handle = __phys_to_dma(dev, page_to_phys(page));
- else
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return ret;
out_encrypt_pages:
@@ -256,16 +241,11 @@ out_free_pages:
return NULL;
}
-void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs)
+void dma_direct_free(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int page_order = get_order(size);
- /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
- if (dma_should_free_from_pool(dev, attrs) &&
- dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
- return;
-
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
@@ -273,6 +253,18 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
return;
}
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ !dev_is_dma_coherent(dev)) {
+ arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
+ return;
+ }
+
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
@@ -284,25 +276,60 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
}
-void *dma_direct_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
- return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+ struct page *page;
+ void *ret;
+
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+ page = __dma_direct_alloc_pages(dev, size, gfp);
+ if (!page)
+ return NULL;
+ if (PageHighMem(page)) {
+ /*
+ * Depending on the cma= arguments and per-arch setup
+ * dma_alloc_contiguous could return highmem pages.
+ * Without remapping there is no way to return them here,
+ * so log an error and fail.
+ */
+ dev_info(dev, "Rejecting highmem page from CMA.\n");
+ goto out_free_pages;
+ }
+
+ ret = page_address(page);
+ if (force_dma_unencrypted(dev)) {
+ if (set_memory_decrypted((unsigned long)ret,
+ 1 << get_order(size)))
+ goto out_free_pages;
+ }
+ memset(ret, 0, size);
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return page;
+out_free_pages:
+ dma_free_contiguous(dev, page, size);
+ return NULL;
}
-void dma_direct_free(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
+void dma_direct_free_pages(struct device *dev, size_t size,
+ struct page *page, dma_addr_t dma_addr,
+ enum dma_data_direction dir)
{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
- else
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+ unsigned int page_order = get_order(size);
+ void *vaddr = page_address(page);
+
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(dev, vaddr, size))
+ return;
+
+ if (force_dma_unencrypted(dev))
+ set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
+
+ dma_free_contiguous(dev, page, size);
}
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
@@ -345,6 +372,9 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (unlikely(is_swiotlb_buffer(paddr)))
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
SYNC_FOR_CPU);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(paddr, sg->length);
}
if (!dev_is_dma_coherent(dev))
@@ -453,13 +483,13 @@ int dma_direct_supported(struct device *dev, u64 mask)
return 1;
/*
- * This check needs to be against the actual bit mask value, so
- * use __phys_to_dma() here so that the SME encryption mask isn't
+ * This check needs to be against the actual bit mask value, so use
+ * phys_to_dma_unencrypted() here so that the SME encryption mask isn't
* part of the check.
*/
if (IS_ENABLED(CONFIG_ZONE_DMA))
min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
- return mask >= __phys_to_dma(dev, min_mask);
+ return mask >= phys_to_dma_unencrypted(dev, min_mask);
}
size_t dma_direct_max_mapping_size(struct device *dev)
@@ -476,3 +506,45 @@ bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
return !dev_is_dma_coherent(dev) ||
is_swiotlb_buffer(dma_to_phys(dev, dma_addr));