summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-06-14 06:08:46 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2019-06-14 06:08:46 -1000
commitfd6b99fa41ddee9699cff1d0e04e90afa452b24d (patch)
tree6816c8b91ffd00556cbe61f6602af31a458377b1
parentc78ad1be4b4d65eb214421b90a788abf3c85c3ea (diff)
parent50f44ee7248ad2f7984ef081974a6ecd09724b3e (diff)
downloadlinux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.tar.gz
linux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.tar.bz2
linux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.zip
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "16 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm/devm_memremap_pages: fix final page put race PCI/P2PDMA: track pgmap references per resource, not globally lib/genalloc: introduce chunk owners PCI/P2PDMA: fix the gen_pool_add_virt() failure path mm/devm_memremap_pages: introduce devm_memunmap_pages drivers/base/devres: introduce devm_release_action() mm/vmscan.c: fix trying to reclaim unevictable LRU page coredump: fix race condition between collapse_huge_page() and core dumping mm/mlock.c: change count_mm_mlocked_page_nr return type mm: mmu_gather: remove __tlb_reset_range() for force flush fs/ocfs2: fix race in ocfs2_dentry_attach_lock() mm/vmscan.c: fix recent_rotated history mm/mlock.c: mlockall error for flag MCL_ONFAULT scripts/decode_stacktrace.sh: prefix addr2line with $CROSS_COMPILE mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node mm: memcontrol: don't batch updates of local VM stats and events
-rw-r--r--drivers/base/devres.c24
-rw-r--r--drivers/dax/device.c13
-rw-r--r--drivers/nvdimm/pmem.c17
-rw-r--r--drivers/pci/p2pdma.c115
-rw-r--r--fs/ocfs2/dcache.c12
-rw-r--r--include/linux/device.h1
-rw-r--r--include/linux/genalloc.h55
-rw-r--r--include/linux/memcontrol.h26
-rw-r--r--include/linux/memremap.h8
-rw-r--r--include/linux/sched/mm.h4
-rw-r--r--kernel/memremap.c23
-rw-r--r--lib/genalloc.c51
-rw-r--r--mm/hmm.c14
-rw-r--r--mm/khugepaged.c3
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memcontrol.c41
-rw-r--r--mm/mlock.c7
-rw-r--r--mm/mmu_gather.c24
-rw-r--r--mm/vmscan.c6
-rwxr-xr-xscripts/decode_stacktrace.sh2
-rw-r--r--tools/testing/nvdimm/test/iomap.c2
21 files changed, 310 insertions, 140 deletions
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index e038e2b3b7ea..0bbb328bd17f 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -755,10 +755,32 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
&devres));
-
}
EXPORT_SYMBOL_GPL(devm_remove_action);
+/**
+ * devm_release_action() - release previously added custom action
+ * @dev: Device that owns the action
+ * @action: Function implementing the action
+ * @data: Pointer to data passed to @action implementation
+ *
+ * Releases and removes instance of @action previously added by
+ * devm_add_action(). Both action and data should match one of the
+ * existing entries.
+ */
+void devm_release_action(struct device *dev, void (*action)(void *), void *data)
+{
+ struct action_devres devres = {
+ .data = data,
+ .action = action,
+ };
+
+ WARN_ON(devres_release(dev, devm_action_release, devm_action_match,
+ &devres));
+
+}
+EXPORT_SYMBOL_GPL(devm_release_action);
+
/*
* Managed kmalloc/kfree
*/
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 996d68ff992a..8465d12fecba 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -27,9 +27,8 @@ static void dev_dax_percpu_release(struct percpu_ref *ref)
complete(&dev_dax->cmp);
}
-static void dev_dax_percpu_exit(void *data)
+static void dev_dax_percpu_exit(struct percpu_ref *ref)
{
- struct percpu_ref *ref = data;
struct dev_dax *dev_dax = ref_to_dev_dax(ref);
dev_dbg(&dev_dax->dev, "%s\n", __func__);
@@ -466,18 +465,12 @@ int dev_dax_probe(struct device *dev)
if (rc)
return rc;
- rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
- if (rc)
- return rc;
-
dev_dax->pgmap.ref = &dev_dax->ref;
dev_dax->pgmap.kill = dev_dax_percpu_kill;
+ dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
- if (IS_ERR(addr)) {
- devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
- percpu_ref_exit(&dev_dax->ref);
+ if (IS_ERR(addr))
return PTR_ERR(addr);
- }
inode = dax_inode(dax_dev);
cdev = inode->i_cdev;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 28cb44c61d4a..24d7fe7c74ed 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -303,11 +303,19 @@ static const struct attribute_group *pmem_attribute_groups[] = {
NULL,
};
-static void pmem_release_queue(void *q)
+static void __pmem_release_queue(struct percpu_ref *ref)
{
+ struct request_queue *q;
+
+ q = container_of(ref, typeof(*q), q_usage_counter);
blk_cleanup_queue(q);
}
+static void pmem_release_queue(void *ref)
+{
+ __pmem_release_queue(ref);
+}
+
static void pmem_freeze_queue(struct percpu_ref *ref)
{
struct request_queue *q;
@@ -399,12 +407,10 @@ static int pmem_attach_disk(struct device *dev,
if (!q)
return -ENOMEM;
- if (devm_add_action_or_reset(dev, pmem_release_queue, q))
- return -ENOMEM;
-
pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
pmem->pgmap.kill = pmem_freeze_queue;
+ pmem->pgmap.cleanup = __pmem_release_queue;
if (is_nd_pfn(dev)) {
if (setup_pagemap_fsdax(dev, &pmem->pgmap))
return -ENOMEM;
@@ -425,6 +431,9 @@ static int pmem_attach_disk(struct device *dev,
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else {
+ if (devm_add_action_or_reset(dev, pmem_release_queue,
+ &q->q_usage_counter))
+ return -ENOMEM;
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 742928d0053e..a98126ad9c3a 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -20,12 +20,16 @@
#include <linux/seq_buf.h>
struct pci_p2pdma {
- struct percpu_ref devmap_ref;
- struct completion devmap_ref_done;
struct gen_pool *pool;
bool p2pmem_published;
};
+struct p2pdma_pagemap {
+ struct dev_pagemap pgmap;
+ struct percpu_ref ref;
+ struct completion ref_done;
+};
+
static ssize_t size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = {
.name = "p2pmem",
};
+static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref)
+{
+ return container_of(ref, struct p2pdma_pagemap, ref);
+}
+
static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
{
- struct pci_p2pdma *p2p =
- container_of(ref, struct pci_p2pdma, devmap_ref);
+ struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
- complete_all(&p2p->devmap_ref_done);
+ complete(&p2p_pgmap->ref_done);
}
static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
{
- /*
- * pci_p2pdma_add_resource() may be called multiple times
- * by a driver and may register the percpu_kill devm action multiple
- * times. We only want the first action to actually kill the
- * percpu_ref.
- */
- if (percpu_ref_is_dying(ref))
- return;
-
percpu_ref_kill(ref);
}
+static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref)
+{
+ struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
+
+ wait_for_completion(&p2p_pgmap->ref_done);
+ percpu_ref_exit(&p2p_pgmap->ref);
+}
+
static void pci_p2pdma_release(void *data)
{
struct pci_dev *pdev = data;
+ struct pci_p2pdma *p2pdma = pdev->p2pdma;
- if (!pdev->p2pdma)
+ if (!p2pdma)
return;
- wait_for_completion(&pdev->p2pdma->devmap_ref_done);
- percpu_ref_exit(&pdev->p2pdma->devmap_ref);
+ /* Flush and disable pci_alloc_p2p_mem() */
+ pdev->p2pdma = NULL;
+ synchronize_rcu();
- gen_pool_destroy(pdev->p2pdma->pool);
+ gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
- pdev->p2pdma = NULL;
}
static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
if (!p2p->pool)
goto out;
- init_completion(&p2p->devmap_ref_done);
- error = percpu_ref_init(&p2p->devmap_ref,
- pci_p2pdma_percpu_release, 0, GFP_KERNEL);
- if (error)
- goto out_pool_destroy;
-
error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
if (error)
goto out_pool_destroy;
@@ -163,6 +165,7 @@ out:
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset)
{
+ struct p2pdma_pagemap *p2p_pgmap;
struct dev_pagemap *pgmap;
void *addr;
int error;
@@ -185,18 +188,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return error;
}
- pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
- if (!pgmap)
+ p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+ if (!p2p_pgmap)
return -ENOMEM;
+ init_completion(&p2p_pgmap->ref_done);
+ error = percpu_ref_init(&p2p_pgmap->ref,
+ pci_p2pdma_percpu_release, 0, GFP_KERNEL);
+ if (error)
+ goto pgmap_free;
+
+ pgmap = &p2p_pgmap->pgmap;
+
pgmap->res.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar);
- pgmap->ref = &pdev->p2pdma->devmap_ref;
+ pgmap->ref = &p2p_pgmap->ref;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
pci_resource_start(pdev, bar);
pgmap->kill = pci_p2pdma_percpu_kill;
+ pgmap->cleanup = pci_p2pdma_percpu_cleanup;
addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) {
@@ -204,19 +216,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
goto pgmap_free;
}
- error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
+ error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
- resource_size(&pgmap->res), dev_to_node(&pdev->dev));
+ resource_size(&pgmap->res), dev_to_node(&pdev->dev),
+ &p2p_pgmap->ref);
if (error)
- goto pgmap_free;
+ goto pages_free;
pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
&pgmap->res);
return 0;
+pages_free:
+ devm_memunmap_pages(&pdev->dev, pgmap);
pgmap_free:
- devm_kfree(&pdev->dev, pgmap);
+ devm_kfree(&pdev->dev, p2p_pgmap);
return error;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
@@ -585,19 +600,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
*/
void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
{
- void *ret;
+ void *ret = NULL;
+ struct percpu_ref *ref;
+ /*
+ * Pairs with synchronize_rcu() in pci_p2pdma_release() to
+ * ensure pdev->p2pdma is non-NULL for the duration of the
+ * read-lock.
+ */
+ rcu_read_lock();
if (unlikely(!pdev->p2pdma))
- return NULL;
-
- if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
- return NULL;
-
- ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
+ goto out;
- if (unlikely(!ret))
- percpu_ref_put(&pdev->p2pdma->devmap_ref);
+ ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
+ (void **) &ref);
+ if (!ret)
+ goto out;
+ if (unlikely(!percpu_ref_tryget_live(ref))) {
+ gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
+ ret = NULL;
+ goto out;
+ }
+out:
+ rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
@@ -610,8 +636,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
*/
void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
{
- gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
- percpu_ref_put(&pdev->p2pdma->devmap_ref);
+ struct percpu_ref *ref;
+
+ gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
+ (void **) &ref);
+ percpu_ref_put(ref);
}
EXPORT_SYMBOL_GPL(pci_free_p2pmem);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 2d016937fdda..42a61eecdacd 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
out_attach:
spin_lock(&dentry_attach_lock);
+ if (unlikely(dentry->d_fsdata && !alias)) {
+ /* d_fsdata is set by a racing thread which is doing
+ * the same thing as this thread is doing. Leave the racing
+ * thread going ahead and we return here.
+ */
+ spin_unlock(&dentry_attach_lock);
+ iput(dl->dl_inode);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ kfree(dl);
+ return 0;
+ }
+
dentry->d_fsdata = dl;
dl->dl_count++;
spin_unlock(&dentry_attach_lock);
diff --git a/include/linux/device.h b/include/linux/device.h
index e85264fb6616..848fc71c6ba6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -713,6 +713,7 @@ void __iomem *devm_of_iomap(struct device *dev,
/* allows to add/remove a custom action to devres stack */
int devm_add_action(struct device *dev, void (*action)(void *), void *data);
void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
+void devm_release_action(struct device *dev, void (*action)(void *), void *data);
static inline int devm_add_action_or_reset(struct device *dev,
void (*action)(void *), void *data)
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index dd0a452373e7..a337313e064f 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -75,6 +75,7 @@ struct gen_pool_chunk {
struct list_head next_chunk; /* next chunk in pool */
atomic_long_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
+ void *owner; /* private data to retrieve at alloc time */
unsigned long start_addr; /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk (inclusive) */
unsigned long bits[0]; /* bitmap for allocating memory chunk */
@@ -96,8 +97,15 @@ struct genpool_data_fixed {
extern struct gen_pool *gen_pool_create(int, int);
extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
-extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
- size_t, int);
+extern int gen_pool_add_owner(struct gen_pool *, unsigned long, phys_addr_t,
+ size_t, int, void *);
+
+static inline int gen_pool_add_virt(struct gen_pool *pool, unsigned long addr,
+ phys_addr_t phys, size_t size, int nid)
+{
+ return gen_pool_add_owner(pool, addr, phys, size, nid, NULL);
+}
+
/**
* gen_pool_add - add a new chunk of special memory to the pool
* @pool: pool to add new memory chunk to
@@ -116,12 +124,47 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
return gen_pool_add_virt(pool, addr, -1, size, nid);
}
extern void gen_pool_destroy(struct gen_pool *);
-extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
-extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t,
- genpool_algo_t algo, void *data);
+unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
+ genpool_algo_t algo, void *data, void **owner);
+
+static inline unsigned long gen_pool_alloc_owner(struct gen_pool *pool,
+ size_t size, void **owner)
+{
+ return gen_pool_alloc_algo_owner(pool, size, pool->algo, pool->data,
+ owner);
+}
+
+static inline unsigned long gen_pool_alloc_algo(struct gen_pool *pool,
+ size_t size, genpool_algo_t algo, void *data)
+{
+ return gen_pool_alloc_algo_owner(pool, size, algo, data, NULL);
+}
+
+/**
+ * gen_pool_alloc - allocate special memory from the pool
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ */
+static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
+{
+ return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
+}
+
extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
dma_addr_t *dma);
-extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr,
+ size_t size, void **owner);
+static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr,
+ size_t size)
+{
+ gen_pool_free_owner(pool, addr, size, NULL);
+}
+
extern void gen_pool_for_each_chunk(struct gen_pool *,
void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
extern size_t gen_pool_avail(struct gen_pool *);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index edf9e8f32d70..1dcb763bb610 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -117,9 +117,12 @@ struct memcg_shrinker_map {
struct mem_cgroup_per_node {
struct lruvec lruvec;
+ /* Legacy local VM stats */
+ struct lruvec_stat __percpu *lruvec_stat_local;
+
+ /* Subtree VM stats (batched updates) */
struct lruvec_stat __percpu *lruvec_stat_cpu;
atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
- atomic_long_t lruvec_stat_local[NR_VM_NODE_STAT_ITEMS];
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
@@ -265,17 +268,18 @@ struct mem_cgroup {
atomic_t moving_account;
struct task_struct *move_lock_task;
- /* memory.stat */
+ /* Legacy local VM stats and events */
+ struct memcg_vmstats_percpu __percpu *vmstats_local;
+
+ /* Subtree VM stats and events (batched updates) */
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
MEMCG_PADDING(_pad2_);
atomic_long_t vmstats[MEMCG_NR_STAT];
- atomic_long_t vmstats_local[MEMCG_NR_STAT];
-
atomic_long_t vmevents[NR_VM_EVENT_ITEMS];
- atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS];
+ /* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
unsigned long socket_pressure;
@@ -567,7 +571,11 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
int idx)
{
- long x = atomic_long_read(&memcg->vmstats_local[idx]);
+ long x = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
@@ -641,13 +649,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
- long x;
+ long x = 0;
+ int cpu;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- x = atomic_long_read(&pn->lruvec_stat_local[idx]);
+ for_each_possible_cpu(cpu)
+ x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f0628660d541..1732dea030b2 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -81,6 +81,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @kill: callback to transition @ref to the dead state
+ * @cleanup: callback to wait for @ref to be idle and reap it
* @dev: host device of the mapping for debug
* @data: private data pointer for page_free()
* @type: memory type: see MEMORY_* in memory_hotplug.h
@@ -92,6 +93,7 @@ struct dev_pagemap {
struct resource res;
struct percpu_ref *ref;
void (*kill)(struct percpu_ref *ref);
+ void (*cleanup)(struct percpu_ref *ref);
struct device *dev;
void *data;
enum memory_type type;
@@ -100,6 +102,7 @@ struct dev_pagemap {
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
@@ -118,6 +121,11 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
+static inline void devm_memunmap_pages(struct device *dev,
+ struct dev_pagemap *pgmap)
+{
+}
+
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a3fda9f024c3..4a7944078cc3 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -54,6 +54,10 @@ static inline void mmdrop(struct mm_struct *mm)
* followed by taking the mmap_sem for writing before modifying the
* vmas or anything the coredump pretends not to change from under it.
*
+ * It also has to be called when mmgrab() is used in the context of
+ * the process, but then the mm_count refcount is transferred outside
+ * the context of the process to run down_write() on that pinned mm.
+ *
* NOTE: find_extend_vma() called from GUP context is the only place
* that can modify the "mm" (notably the vm_start/end) under mmap_sem
* for reading and outside the context of the process, so it is also
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1490e63f69a9..6e1970719dc2 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -95,6 +95,7 @@ static void devm_memremap_pages_release(void *data)
pgmap->kill(pgmap->ref);
for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));
+ pgmap->cleanup(pgmap->ref);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
@@ -133,8 +134,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
*
- * 3/ pgmap->ref must be 'live' on entry and will be killed at
- * devm_memremap_pages_release() time, or if this routine fails.
+ * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped
+ * at devm_memremap_pages_release() time, or if this routine fails.
*
* 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -156,8 +157,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram;
- if (!pgmap->ref || !pgmap->kill)
+ if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) {
+ WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
+ }
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -168,14 +171,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
- return ERR_PTR(-ENOMEM);
+ error = -ENOMEM;
+ goto err_array;
}
conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL);
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
- return ERR_PTR(-ENOMEM);
+ error = -ENOMEM;
+ goto err_array;
}
is_ram = region_intersects(align_start, align_size,
@@ -267,10 +272,18 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgmap_array_delete(res);
err_array:
pgmap->kill(pgmap->ref);
+ pgmap->cleanup(pgmap->ref);
+
return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(devm_memremap_pages);
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+ devm_release_action(dev, devm_memremap_pages_release, pgmap);
+}
+EXPORT_SYMBOL_GPL(devm_memunmap_pages);
+
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
/* number of pfns from base where pfn_to_page() is valid */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 7e85d1e37a6e..770c769d7cb7 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -168,20 +168,21 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
EXPORT_SYMBOL(gen_pool_create);
/**
- * gen_pool_add_virt - add a new chunk of special memory to the pool
+ * gen_pool_add_owner- add a new chunk of special memory to the pool
* @pool: pool to add new memory chunk to
* @virt: virtual starting address of memory chunk to add to pool
* @phys: physical starting address of memory chunk to add to pool
* @size: size in bytes of the memory chunk to add to pool
* @nid: node id of the node the chunk structure and bitmap should be
* allocated on, or -1
+ * @owner: private data the publisher would like to recall at alloc time
*
* Add a new chunk of special memory to the specified pool.
*
* Returns 0 on success or a -ve errno on failure.
*/
-int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
- size_t size, int nid)
+int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+ size_t size, int nid, void *owner)
{
struct gen_pool_chunk *chunk;
int nbits = size >> pool->min_alloc_order;
@@ -195,6 +196,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
+ chunk->owner = owner;
atomic_long_set(&chunk->avail, size);
spin_lock(&pool->lock);
@@ -203,7 +205,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
return 0;
}
-EXPORT_SYMBOL(gen_pool_add_virt);
+EXPORT_SYMBOL(gen_pool_add_owner);
/**
* gen_pool_virt_to_phys - return the physical address of memory
@@ -260,35 +262,20 @@ void gen_pool_destroy(struct gen_pool *pool)
EXPORT_SYMBOL(gen_pool_destroy);
/**
- * gen_pool_alloc - allocate special memory from the pool
- * @pool: pool to allocate from
- * @size: number of bytes to allocate from the pool
- *
- * Allocate the requested number of bytes from the specified pool.
- * Uses the pool allocation function (with first-fit algorithm by default).
- * Can not be used in NMI handler on architectures without
- * NMI-safe cmpxchg implementation.
- */
-unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
-{
- return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
-}
-EXPORT_SYMBOL(gen_pool_alloc);
-
-/**
- * gen_pool_alloc_algo - allocate special memory from the pool
+ * gen_pool_alloc_algo_owner - allocate special memory from the pool
* @pool: pool to allocate from
* @size: number of bytes to allocate from the pool
* @algo: algorithm passed from caller
* @data: data passed to algorithm
+ * @owner: optionally retrieve the chunk owner
*
* Allocate the requested number of bytes from the specified pool.
* Uses the pool allocation function (with first-fit algorithm by default).
* Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
-unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
- genpool_algo_t algo, void *data)
+unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
+ genpool_algo_t algo, void *data, void **owner)
{
struct gen_pool_chunk *chunk;
unsigned long addr = 0;
@@ -299,6 +286,9 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
BUG_ON(in_nmi());
#endif
+ if (owner)
+ *owner = NULL;
+
if (size == 0)
return 0;
@@ -326,12 +316,14 @@ retry:
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
atomic_long_sub(size, &chunk->avail);
+ if (owner)
+ *owner = chunk->owner;
break;
}
rcu_read_unlock();
return addr;
}
-EXPORT_SYMBOL(gen_pool_alloc_algo);
+EXPORT_SYMBOL(gen_pool_alloc_algo_owner);
/**
* gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
@@ -367,12 +359,14 @@ EXPORT_SYMBOL(gen_pool_dma_alloc);
* @pool: pool to free to
* @addr: starting address of memory to free back to pool
* @size: size in bytes of memory to free
+ * @owner: private data stashed at gen_pool_add() time
*
* Free previously allocated special memory back to the specified
* pool. Can not be used in NMI handler on architectures without
* NMI-safe cmpxchg implementation.
*/
-void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
+void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size,
+ void **owner)
{
struct gen_pool_chunk *chunk;
int order = pool->min_alloc_order;
@@ -382,6 +376,9 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
BUG_ON(in_nmi());
#endif
+ if (owner)
+ *owner = NULL;
+
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
@@ -392,6 +389,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
BUG_ON(remain);
size = nbits << order;
atomic_long_add(size, &chunk->avail);
+ if (owner)
+ *owner = chunk->owner;
rcu_read_unlock();
return;
}
@@ -399,7 +398,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
rcu_read_unlock();
BUG();
}
-EXPORT_SYMBOL(gen_pool_free);
+EXPORT_SYMBOL(gen_pool_free_owner);
/**
* gen_pool_for_each_chunk - call func for every chunk of generic memory pool
diff --git a/mm/hmm.c b/mm/hmm.c
index c5d840e34b28..f702a3895d05 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1354,9 +1354,8 @@ static void hmm_devmem_ref_release(struct percpu_ref *ref)
complete(&devmem->completion);
}
-static void hmm_devmem_ref_exit(void *data)
+static void hmm_devmem_ref_exit(struct percpu_ref *ref)
{
- struct percpu_ref *ref = data;
struct hmm_devmem *devmem;
devmem = container_of(ref, struct hmm_devmem, ref);
@@ -1433,10 +1432,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,