diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-14 06:08:46 -1000 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-14 06:08:46 -1000 |
| commit | fd6b99fa41ddee9699cff1d0e04e90afa452b24d (patch) | |
| tree | 6816c8b91ffd00556cbe61f6602af31a458377b1 | |
| parent | c78ad1be4b4d65eb214421b90a788abf3c85c3ea (diff) | |
| parent | 50f44ee7248ad2f7984ef081974a6ecd09724b3e (diff) | |
| download | linux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.tar.gz linux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.tar.bz2 linux-fd6b99fa41ddee9699cff1d0e04e90afa452b24d.zip | |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"16 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm/devm_memremap_pages: fix final page put race
PCI/P2PDMA: track pgmap references per resource, not globally
lib/genalloc: introduce chunk owners
PCI/P2PDMA: fix the gen_pool_add_virt() failure path
mm/devm_memremap_pages: introduce devm_memunmap_pages
drivers/base/devres: introduce devm_release_action()
mm/vmscan.c: fix trying to reclaim unevictable LRU page
coredump: fix race condition between collapse_huge_page() and core dumping
mm/mlock.c: change count_mm_mlocked_page_nr return type
mm: mmu_gather: remove __tlb_reset_range() for force flush
fs/ocfs2: fix race in ocfs2_dentry_attach_lock()
mm/vmscan.c: fix recent_rotated history
mm/mlock.c: mlockall error for flag MCL_ONFAULT
scripts/decode_stacktrace.sh: prefix addr2line with $CROSS_COMPILE
mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node
mm: memcontrol: don't batch updates of local VM stats and events
| -rw-r--r-- | drivers/base/devres.c | 24 | ||||
| -rw-r--r-- | drivers/dax/device.c | 13 | ||||
| -rw-r--r-- | drivers/nvdimm/pmem.c | 17 | ||||
| -rw-r--r-- | drivers/pci/p2pdma.c | 115 | ||||
| -rw-r--r-- | fs/ocfs2/dcache.c | 12 | ||||
| -rw-r--r-- | include/linux/device.h | 1 | ||||
| -rw-r--r-- | include/linux/genalloc.h | 55 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 26 | ||||
| -rw-r--r-- | include/linux/memremap.h | 8 | ||||
| -rw-r--r-- | include/linux/sched/mm.h | 4 | ||||
| -rw-r--r-- | kernel/memremap.c | 23 | ||||
| -rw-r--r-- | lib/genalloc.c | 51 | ||||
| -rw-r--r-- | mm/hmm.c | 14 | ||||
| -rw-r--r-- | mm/khugepaged.c | 3 | ||||
| -rw-r--r-- | mm/list_lru.c | 2 | ||||
| -rw-r--r-- | mm/memcontrol.c | 41 | ||||
| -rw-r--r-- | mm/mlock.c | 7 | ||||
| -rw-r--r-- | mm/mmu_gather.c | 24 | ||||
| -rw-r--r-- | mm/vmscan.c | 6 | ||||
| -rwxr-xr-x | scripts/decode_stacktrace.sh | 2 | ||||
| -rw-r--r-- | tools/testing/nvdimm/test/iomap.c | 2 |
21 files changed, 310 insertions, 140 deletions
diff --git a/drivers/base/devres.c b/drivers/base/devres.c index e038e2b3b7ea..0bbb328bd17f 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -755,10 +755,32 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data) WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match, &devres)); - } EXPORT_SYMBOL_GPL(devm_remove_action); +/** + * devm_release_action() - release previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Releases and removes instance of @action previously added by + * devm_add_action(). Both action and data should match one of the + * existing entries. + */ +void devm_release_action(struct device *dev, void (*action)(void *), void *data) +{ + struct action_devres devres = { + .data = data, + .action = action, + }; + + WARN_ON(devres_release(dev, devm_action_release, devm_action_match, + &devres)); + +} +EXPORT_SYMBOL_GPL(devm_release_action); + /* * Managed kmalloc/kfree */ diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 996d68ff992a..8465d12fecba 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -27,9 +27,8 @@ static void dev_dax_percpu_release(struct percpu_ref *ref) complete(&dev_dax->cmp); } -static void dev_dax_percpu_exit(void *data) +static void dev_dax_percpu_exit(struct percpu_ref *ref) { - struct percpu_ref *ref = data; struct dev_dax *dev_dax = ref_to_dev_dax(ref); dev_dbg(&dev_dax->dev, "%s\n", __func__); @@ -466,18 +465,12 @@ int dev_dax_probe(struct device *dev) if (rc) return rc; - rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref); - if (rc) - return rc; - dev_dax->pgmap.ref = &dev_dax->ref; dev_dax->pgmap.kill = dev_dax_percpu_kill; + dev_dax->pgmap.cleanup = dev_dax_percpu_exit; addr = devm_memremap_pages(dev, &dev_dax->pgmap); - if (IS_ERR(addr)) { - devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref); - percpu_ref_exit(&dev_dax->ref); + if (IS_ERR(addr)) return PTR_ERR(addr); - } inode = dax_inode(dax_dev); cdev = inode->i_cdev; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 28cb44c61d4a..24d7fe7c74ed 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -303,11 +303,19 @@ static const struct attribute_group *pmem_attribute_groups[] = { NULL, }; -static void pmem_release_queue(void *q) +static void __pmem_release_queue(struct percpu_ref *ref) { + struct request_queue *q; + + q = container_of(ref, typeof(*q), q_usage_counter); blk_cleanup_queue(q); } +static void pmem_release_queue(void *ref) +{ + __pmem_release_queue(ref); +} + static void pmem_freeze_queue(struct percpu_ref *ref) { struct request_queue *q; @@ -399,12 +407,10 @@ static int pmem_attach_disk(struct device *dev, if (!q) return -ENOMEM; - if (devm_add_action_or_reset(dev, pmem_release_queue, q)) - return -ENOMEM; - pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; pmem->pgmap.kill = pmem_freeze_queue; + pmem->pgmap.cleanup = __pmem_release_queue; if (is_nd_pfn(dev)) { if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; @@ -425,6 +431,9 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); } else { + if (devm_add_action_or_reset(dev, pmem_release_queue, + &q->q_usage_counter)) + return -ENOMEM; addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); memcpy(&bb_res, &nsio->res, sizeof(bb_res)); diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 742928d0053e..a98126ad9c3a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -20,12 +20,16 @@ #include <linux/seq_buf.h> struct pci_p2pdma { - struct percpu_ref devmap_ref; - struct completion devmap_ref_done; struct gen_pool *pool; bool p2pmem_published; }; +struct p2pdma_pagemap { + struct dev_pagemap pgmap; + struct percpu_ref ref; + struct completion ref_done; +}; + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = { .name = "p2pmem", }; +static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref) +{ + return container_of(ref, struct p2pdma_pagemap, ref); +} + static void pci_p2pdma_percpu_release(struct percpu_ref *ref) { - struct pci_p2pdma *p2p = - container_of(ref, struct pci_p2pdma, devmap_ref); + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); - complete_all(&p2p->devmap_ref_done); + complete(&p2p_pgmap->ref_done); } static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) { - /* - * pci_p2pdma_add_resource() may be called multiple times - * by a driver and may register the percpu_kill devm action multiple - * times. We only want the first action to actually kill the - * percpu_ref. - */ - if (percpu_ref_is_dying(ref)) - return; - percpu_ref_kill(ref); } +static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref) +{ + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); + + wait_for_completion(&p2p_pgmap->ref_done); + percpu_ref_exit(&p2p_pgmap->ref); +} + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; + struct pci_p2pdma *p2pdma = pdev->p2pdma; - if (!pdev->p2pdma) + if (!p2pdma) return; - wait_for_completion(&pdev->p2pdma->devmap_ref_done); - percpu_ref_exit(&pdev->p2pdma->devmap_ref); + /* Flush and disable pci_alloc_p2p_mem() */ + pdev->p2pdma = NULL; + synchronize_rcu(); - gen_pool_destroy(pdev->p2pdma->pool); + gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); - pdev->p2pdma = NULL; } static int pci_p2pdma_setup(struct pci_dev *pdev) @@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (!p2p->pool) goto out; - init_completion(&p2p->devmap_ref_done); - error = percpu_ref_init(&p2p->devmap_ref, - pci_p2pdma_percpu_release, 0, GFP_KERNEL); - if (error) - goto out_pool_destroy; - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); if (error) goto out_pool_destroy; @@ -163,6 +165,7 @@ out: int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { + struct p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; void *addr; int error; @@ -185,18 +188,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, return error; } - pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); + if (!p2p_pgmap) return -ENOMEM; + init_completion(&p2p_pgmap->ref_done); + error = percpu_ref_init(&p2p_pgmap->ref, + pci_p2pdma_percpu_release, 0, GFP_KERNEL); + if (error) + goto pgmap_free; + + pgmap = &p2p_pgmap->pgmap; + pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->res.end = pgmap->res.start + size - 1; pgmap->res.flags = pci_resource_flags(pdev, bar); - pgmap->ref = &pdev->p2pdma->devmap_ref; + pgmap->ref = &p2p_pgmap->ref; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); pgmap->kill = pci_p2pdma_percpu_kill; + pgmap->cleanup = pci_p2pdma_percpu_cleanup; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { @@ -204,19 +216,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } - error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, + error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, - resource_size(&pgmap->res), dev_to_node(&pdev->dev)); + resource_size(&pgmap->res), dev_to_node(&pdev->dev), + &p2p_pgmap->ref); if (error) - goto pgmap_free; + goto pages_free; pci_info(pdev, "added peer-to-peer DMA memory %pR\n", &pgmap->res); return 0; +pages_free: + devm_memunmap_pages(&pdev->dev, pgmap); pgmap_free: - devm_kfree(&pdev->dev, pgmap); + devm_kfree(&pdev->dev, p2p_pgmap); return error; } EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); @@ -585,19 +600,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many); */ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) { - void *ret; + void *ret = NULL; + struct percpu_ref *ref; + /* + * Pairs with synchronize_rcu() in pci_p2pdma_release() to + * ensure pdev->p2pdma is non-NULL for the duration of the + * read-lock. + */ + rcu_read_lock(); if (unlikely(!pdev->p2pdma)) - return NULL; - - if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref))) - return NULL; - - ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size); + goto out; - if (unlikely(!ret)) - percpu_ref_put(&pdev->p2pdma->devmap_ref); + ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, + (void **) &ref); + if (!ret) + goto out; + if (unlikely(!percpu_ref_tryget_live(ref))) { + gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); + ret = NULL; + goto out; + } +out: + rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); @@ -610,8 +636,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); */ void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) { - gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); - percpu_ref_put(&pdev->p2pdma->devmap_ref); + struct percpu_ref *ref; + + gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, + (void **) &ref); + percpu_ref_put(ref); } EXPORT_SYMBOL_GPL(pci_free_p2pmem); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 2d016937fdda..42a61eecdacd 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, out_attach: spin_lock(&dentry_attach_lock); + if (unlikely(dentry->d_fsdata && !alias)) { + /* d_fsdata is set by a racing thread which is doing + * the same thing as this thread is doing. Leave the racing + * thread going ahead and we return here. + */ + spin_unlock(&dentry_attach_lock); + iput(dl->dl_inode); + ocfs2_lock_res_free(&dl->dl_lockres); + kfree(dl); + return 0; + } + dentry->d_fsdata = dl; dl->dl_count++; spin_unlock(&dentry_attach_lock); diff --git a/include/linux/device.h b/include/linux/device.h index e85264fb6616..848fc71c6ba6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -713,6 +713,7 @@ void __iomem *devm_of_iomap(struct device *dev, /* allows to add/remove a custom action to devres stack */ int devm_add_action(struct device *dev, void (*action)(void *), void *data); void devm_remove_action(struct device *dev, void (*action)(void *), void *data); +void devm_release_action(struct device *dev, void (*action)(void *), void *data); static inline int devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data) diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index dd0a452373e7..a337313e064f 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -75,6 +75,7 @@ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ + void *owner; /* private data to retrieve at alloc time */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ unsigned long bits[0]; /* bitmap for allocating memory chunk */ @@ -96,8 +97,15 @@ struct genpool_data_fixed { extern struct gen_pool *gen_pool_create(int, int); extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long); -extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t, - size_t, int); +extern int gen_pool_add_owner(struct gen_pool *, unsigned long, phys_addr_t, + size_t, int, void *); + +static inline int gen_pool_add_virt(struct gen_pool *pool, unsigned long addr, + phys_addr_t phys, size_t size, int nid) +{ + return gen_pool_add_owner(pool, addr, phys, size, nid, NULL); +} + /** * gen_pool_add - add a new chunk of special memory to the pool * @pool: pool to add new memory chunk to @@ -116,12 +124,47 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr, return gen_pool_add_virt(pool, addr, -1, size, nid); } extern void gen_pool_destroy(struct gen_pool *); -extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); -extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t, - genpool_algo_t algo, void *data); +unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, + genpool_algo_t algo, void *data, void **owner); + +static inline unsigned long gen_pool_alloc_owner(struct gen_pool *pool, + size_t size, void **owner) +{ + return gen_pool_alloc_algo_owner(pool, size, pool->algo, pool->data, + owner); +} + +static inline unsigned long gen_pool_alloc_algo(struct gen_pool *pool, + size_t size, genpool_algo_t algo, void *data) +{ + return gen_pool_alloc_algo_owner(pool, size, algo, data, NULL); +} + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses the pool allocation function (with first-fit algorithm by default). + * Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. + */ +static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + return gen_pool_alloc_algo(pool, size, pool->algo, pool->data); +} + extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma); -extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); +extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, + size_t size, void **owner); +static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr, + size_t size) +{ + gen_pool_free_owner(pool, addr, size, NULL); +} + extern void gen_pool_for_each_chunk(struct gen_pool *, void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); extern size_t gen_pool_avail(struct gen_pool *); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index edf9e8f32d70..1dcb763bb610 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -117,9 +117,12 @@ struct memcg_shrinker_map { struct mem_cgroup_per_node { struct lruvec lruvec; + /* Legacy local VM stats */ + struct lruvec_stat __percpu *lruvec_stat_local; + + /* Subtree VM stats (batched updates) */ struct lruvec_stat __percpu *lruvec_stat_cpu; atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; - atomic_long_t lruvec_stat_local[NR_VM_NODE_STAT_ITEMS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -265,17 +268,18 @@ struct mem_cgroup { atomic_t moving_account; struct task_struct *move_lock_task; - /* memory.stat */ + /* Legacy local VM stats and events */ + struct memcg_vmstats_percpu __percpu *vmstats_local; + + /* Subtree VM stats and events (batched updates) */ struct memcg_vmstats_percpu __percpu *vmstats_percpu; MEMCG_PADDING(_pad2_); atomic_long_t vmstats[MEMCG_NR_STAT]; - atomic_long_t vmstats_local[MEMCG_NR_STAT]; - atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; - atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS]; + /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; unsigned long socket_pressure; @@ -567,7 +571,11 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { - long x = atomic_long_read(&memcg->vmstats_local[idx]); + long x = 0; + int cpu; + + for_each_possible_cpu(cpu) + x += per_cpu(memcg->vmstats_local->stat[idx], cpu); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -641,13 +649,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; - long x; + long x = 0; + int cpu; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = atomic_long_read(&pn->lruvec_stat_local[idx]); + for_each_possible_cpu(cpu) + x += per_cpu(pn->lruvec_stat_local->count[idx], cpu); #ifdef CONFIG_SMP if (x < 0) x = 0; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f0628660d541..1732dea030b2 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -81,6 +81,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data); * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @kill: callback to transition @ref to the dead state + * @cleanup: callback to wait for @ref to be idle and reap it * @dev: host device of the mapping for debug * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h @@ -92,6 +93,7 @@ struct dev_pagemap { struct resource res; struct percpu_ref *ref; void (*kill)(struct percpu_ref *ref); + void (*cleanup)(struct percpu_ref *ref); struct device *dev; void *data; enum memory_type type; @@ -100,6 +102,7 @@ struct dev_pagemap { #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); +void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); @@ -118,6 +121,11 @@ static inline void *devm_memremap_pages(struct device *dev, return ERR_PTR(-ENXIO); } +static inline void devm_memunmap_pages(struct device *dev, + struct dev_pagemap *pgmap) +{ +} + static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index a3fda9f024c3..4a7944078cc3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -54,6 +54,10 @@ static inline void mmdrop(struct mm_struct *mm) * followed by taking the mmap_sem for writing before modifying the * vmas or anything the coredump pretends not to change from under it. * + * It also has to be called when mmgrab() is used in the context of + * the process, but then the mm_count refcount is transferred outside + * the context of the process to run down_write() on that pinned mm. + * * NOTE: find_extend_vma() called from GUP context is the only place * that can modify the "mm" (notably the vm_start/end) under mmap_sem * for reading and outside the context of the process, so it is also diff --git a/kernel/memremap.c b/kernel/memremap.c index 1490e63f69a9..6e1970719dc2 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -95,6 +95,7 @@ static void devm_memremap_pages_release(void *data) pgmap->kill(pgmap->ref); for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); + pgmap->cleanup(pgmap->ref); /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); @@ -133,8 +134,8 @@ static void devm_memremap_pages_release(void *data) * 2/ The altmap field may optionally be initialized, in which case altmap_valid * must be set to true * - * 3/ pgmap->ref must be 'live' on entry and will be killed at - * devm_memremap_pages_release() time, or if this routine fails. + * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped + * at devm_memremap_pages_release() time, or if this routine fails. * * 4/ res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but @@ -156,8 +157,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; - if (!pgmap->ref || !pgmap->kill) + if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) { + WARN(1, "Missing reference count teardown definition\n"); return ERR_PTR(-EINVAL); + } align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -168,14 +171,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (conflict_pgmap) { dev_WARN(dev, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); - return ERR_PTR(-ENOMEM); + error = -ENOMEM; + goto err_array; } conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); if (conflict_pgmap) { dev_WARN(dev, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); - return ERR_PTR(-ENOMEM); + error = -ENOMEM; + goto err_array; } is_ram = region_intersects(align_start, align_size, @@ -267,10 +272,18 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) pgmap_array_delete(res); err_array: pgmap->kill(pgmap->ref); + pgmap->cleanup(pgmap->ref); + return ERR_PTR(error); } EXPORT_SYMBOL_GPL(devm_memremap_pages); +void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) +{ + devm_release_action(dev, devm_memremap_pages_release, pgmap); +} +EXPORT_SYMBOL_GPL(devm_memunmap_pages); + unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { /* number of pfns from base where pfn_to_page() is valid */ diff --git a/lib/genalloc.c b/lib/genalloc.c index 7e85d1e37a6e..770c769d7cb7 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -168,20 +168,21 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) EXPORT_SYMBOL(gen_pool_create); /** - * gen_pool_add_virt - add a new chunk of special memory to the pool + * gen_pool_add_owner- add a new chunk of special memory to the pool * @pool: pool to add new memory chunk to * @virt: virtual starting address of memory chunk to add to pool * @phys: physical starting address of memory chunk to add to pool * @size: size in bytes of the memory chunk to add to pool * @nid: node id of the node the chunk structure and bitmap should be * allocated on, or -1 + * @owner: private data the publisher would like to recall at alloc time * * Add a new chunk of special memory to the specified pool. * * Returns 0 on success or a -ve errno on failure. */ -int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, - size_t size, int nid) +int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, + size_t size, int nid, void *owner) { struct gen_pool_chunk *chunk; int nbits = size >> pool->min_alloc_order; @@ -195,6 +196,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; + chunk->owner = owner; atomic_long_set(&chunk->avail, size); spin_lock(&pool->lock); @@ -203,7 +205,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy return 0; } -EXPORT_SYMBOL(gen_pool_add_virt); +EXPORT_SYMBOL(gen_pool_add_owner); /** * gen_pool_virt_to_phys - return the physical address of memory @@ -260,35 +262,20 @@ void gen_pool_destroy(struct gen_pool *pool) EXPORT_SYMBOL(gen_pool_destroy); /** - * gen_pool_alloc - allocate special memory from the pool - * @pool: pool to allocate from - * @size: number of bytes to allocate from the pool - * - * Allocate the requested number of bytes from the specified pool. - * Uses the pool allocation function (with first-fit algorithm by default). - * Can not be used in NMI handler on architectures without - * NMI-safe cmpxchg implementation. - */ -unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) -{ - return gen_pool_alloc_algo(pool, size, pool->algo, pool->data); -} -EXPORT_SYMBOL(gen_pool_alloc); - -/** - * gen_pool_alloc_algo - allocate special memory from the pool + * gen_pool_alloc_algo_owner - allocate special memory from the pool * @pool: pool to allocate from * @size: number of bytes to allocate from the pool * @algo: algorithm passed from caller * @data: data passed to algorithm + * @owner: optionally retrieve the chunk owner * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). * Can not be used in NMI handler on architectures without * NMI-safe cmpxchg implementation. */ -unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, - genpool_algo_t algo, void *data) +unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, + genpool_algo_t algo, void *data, void **owner) { struct gen_pool_chunk *chunk; unsigned long addr = 0; @@ -299,6 +286,9 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, BUG_ON(in_nmi()); #endif + if (owner) + *owner = NULL; + if (size == 0) return 0; @@ -326,12 +316,14 @@ retry: addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; atomic_long_sub(size, &chunk->avail); + if (owner) + *owner = chunk->owner; break; } rcu_read_unlock(); return addr; } -EXPORT_SYMBOL(gen_pool_alloc_algo); +EXPORT_SYMBOL(gen_pool_alloc_algo_owner); /** * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage @@ -367,12 +359,14 @@ EXPORT_SYMBOL(gen_pool_dma_alloc); * @pool: pool to free to * @addr: starting address of memory to free back to pool * @size: size in bytes of memory to free + * @owner: private data stashed at gen_pool_add() time * * Free previously allocated special memory back to the specified * pool. Can not be used in NMI handler on architectures without * NMI-safe cmpxchg implementation. */ -void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, + void **owner) { struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; @@ -382,6 +376,9 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) BUG_ON(in_nmi()); #endif + if (owner) + *owner = NULL; + nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { @@ -392,6 +389,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) BUG_ON(remain); size = nbits << order; atomic_long_add(size, &chunk->avail); + if (owner) + *owner = chunk->owner; rcu_read_unlock(); return; } @@ -399,7 +398,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) rcu_read_unlock(); BUG(); } -EXPORT_SYMBOL(gen_pool_free); +EXPORT_SYMBOL(gen_pool_free_owner); /** * gen_pool_for_each_chunk - call func for every chunk of generic memory pool @@ -1354,9 +1354,8 @@ static void hmm_devmem_ref_release(struct percpu_ref *ref) complete(&devmem->completion); } -static void hmm_devmem_ref_exit(void *data) +static void hmm_devmem_ref_exit(struct percpu_ref *ref) { - struct percpu_ref *ref = data; struct hmm_devmem *devmem; devmem = container_of(ref, struct hmm_devmem, ref); @@ -1433,10 +1432,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, |
