diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 09:45:58 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 09:45:58 -0700 |
| commit | 249be8511b269495bc95cb8bdfdd5840b2ba73c0 (patch) | |
| tree | 6920bde053faa0284b52b2a9c9695f5516520377 /mm/sparse.c | |
| parent | 3bfe1fc46794631366faa3ef075e1b0ff7ba120a (diff) | |
| parent | eec4844fae7c033a0c1fc1eb3b8517aeb8b6cc49 (diff) | |
| download | linux-249be8511b269495bc95cb8bdfdd5840b2ba73c0.tar.gz linux-249be8511b269495bc95cb8bdfdd5840b2ba73c0.tar.bz2 linux-249be8511b269495bc95cb8bdfdd5840b2ba73c0.zip | |
Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton:
"The rest of MM and a kernel-wide procfs cleanup.
Summary of the more significant patches:
- Patch series "mm/memory_hotplug: Factor out memory block
devicehandling", v3. David Hildenbrand.
Some spring-cleaning of the memory hotplug code, notably in
drivers/base/memory.c
- "mm: thp: fix false negative of shmem vma's THP eligibility". Yang
Shi.
Fix /proc/pid/smaps output for THP pages used in shmem.
- "resource: fix locking in find_next_iomem_res()" + 1. Nadav Amit.
Bugfix and speedup for kernel/resource.c
- Patch series "mm: Further memory block device cleanups", David
Hildenbrand.
More spring-cleaning of the memory hotplug code.
- Patch series "mm: Sub-section memory hotplug support". Dan
Williams.
Generalise the memory hotplug code so that pmem can use it more
completely. Then remove the hacks from the libnvdimm code which
were there to work around the memory-hotplug code's constraints.
- "proc/sysctl: add shared variables for range check", Matteo Croce.
We have about 250 instances of
int zero;
...
.extra1 = &zero,
in the tree. This is a tree-wide sweep to make all those private
"zero"s and "one"s use global variables.
Alas, it isn't practical to make those two global integers const"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (38 commits)
proc/sysctl: add shared variables for range check
mm: migrate: remove unused mode argument
mm/sparsemem: cleanup 'section number' data types
libnvdimm/pfn: stop padding pmem namespaces to section alignment
libnvdimm/pfn: fix fsdax-mode namespace info-block zero-fields
mm/devm_memremap_pages: enable sub-section remap
mm: document ZONE_DEVICE memory-model implications
mm/sparsemem: support sub-section hotplug
mm/sparsemem: prepare for sub-section ranges
mm: kill is_dev_zone() helper
mm/hotplug: kill is_dev_zone() usage in __remove_pages()
mm/sparsemem: convert kmalloc_section_memmap() to populate_section_memmap()
mm/hotplug: prepare shrink_{zone, pgdat}_span for sub-section removal
mm/sparsemem: add helpers track active portions of a section at boot
mm/sparsemem: introduce a SECTION_IS_EARLY flag
mm/sparsemem: introduce struct mem_section_usage
drivers/base/memory.c: get rid of find_memory_block_hinted()
mm/memory_hotplug: move and simplify walk_memory_blocks()
mm/memory_hotplug: rename walk_memory_range() and pass start+size instead of pfns
mm: make register_mem_sect_under_node() static
...
Diffstat (limited to 'mm/sparse.c')
| -rw-r--r-- | mm/sparse.c | 355 |
1 files changed, 224 insertions, 131 deletions
diff --git a/mm/sparse.c b/mm/sparse.c index fd13166949b5..72f010d9bff5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -83,8 +83,15 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; + /* + * An existing section is possible in the sub-section hotplug + * case. First hot-add instantiates, follow-on hot-add reuses + * the existing section. + * + * The mem_hotplug_lock resolves the apparent race below. + */ if (mem_section[root]) - return -EEXIST; + return 0; section = sparse_index_alloc(nid); if (!section) @@ -102,7 +109,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) #endif #ifdef CONFIG_SPARSEMEM_EXTREME -int __section_nr(struct mem_section* ms) +unsigned long __section_nr(struct mem_section *ms) { unsigned long root_nr; struct mem_section *root = NULL; @@ -121,9 +128,9 @@ int __section_nr(struct mem_section* ms) return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } #else -int __section_nr(struct mem_section* ms) +unsigned long __section_nr(struct mem_section *ms) { - return (int)(ms - mem_section[0]); + return (unsigned long)(ms - mem_section[0]); } #endif @@ -178,10 +185,10 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, * Keeping track of this gives us an easy way to break out of * those loops early. */ -int __highest_present_section_nr; +unsigned long __highest_present_section_nr; static void section_mark_present(struct mem_section *ms) { - int section_nr = __section_nr(ms); + unsigned long section_nr = __section_nr(ms); if (section_nr > __highest_present_section_nr) __highest_present_section_nr = section_nr; @@ -189,7 +196,7 @@ static void section_mark_present(struct mem_section *ms) ms->section_mem_map |= SECTION_MARKED_PRESENT; } -static inline int next_present_section_nr(int section_nr) +static inline unsigned long next_present_section_nr(unsigned long section_nr) { do { section_nr++; @@ -210,6 +217,41 @@ static inline unsigned long first_present_section_nr(void) return next_present_section_nr(-1); } +void subsection_mask_set(unsigned long *map, unsigned long pfn, + unsigned long nr_pages) +{ + int idx = subsection_map_index(pfn); + int end = subsection_map_index(pfn + nr_pages - 1); + + bitmap_set(map, idx, end - idx + 1); +} + +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) +{ + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + unsigned long nr, start_sec = pfn_to_section_nr(pfn); + + if (!nr_pages) + return; + + for (nr = start_sec; nr <= end_sec; nr++) { + struct mem_section *ms; + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + ms = __nr_to_section(nr); + subsection_mask_set(ms->usage->subsection_map, pfn, pfns); + + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, + pfns, subsection_map_index(pfn), + subsection_map_index(pfn + pfns - 1)); + + pfn += pfns; + nr_pages -= pfns; + } +} + /* Record a memory area against a node. */ void __init memory_present(int nid, unsigned long start, unsigned long end) { @@ -288,33 +330,31 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, - unsigned long *pageblock_bitmap) + struct mem_section_usage *usage, unsigned long flags) { ms->section_mem_map &= ~SECTION_MAP_MASK; - ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | - SECTION_HAS_MEM_MAP; - ms->pageblock_flags = pageblock_bitmap; + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) + | SECTION_HAS_MEM_MAP | flags; + ms->usage = usage; } -unsigned long usemap_size(void) +static unsigned long usemap_size(void) { return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } -#ifdef CONFIG_MEMORY_HOTPLUG -static unsigned long *__kmalloc_section_usemap(void) +size_t mem_section_usage_size(void) { - return kmalloc(usemap_size(), GFP_KERNEL); + return sizeof(struct mem_section_usage) + usemap_size(); } -#endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { + struct mem_section_usage *usage; unsigned long goal, limit; - unsigned long *p; int nid; /* * A page may contain usemaps for other sections preventing the @@ -330,15 +370,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: - p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); - if (!p && limit) { + usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); + if (!usage && limit) { limit = 0; goto again; } - return p; + return usage; } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr; @@ -352,7 +393,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) old_pgdat_snr = NR_MEM_SECTIONS; } - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; @@ -380,14 +421,15 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) usemap_snr, pgdat_snr, nid); } #else -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -404,8 +446,8 @@ static unsigned long __init section_map_size(void) return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); } -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, - struct vmem_altmap *altmap) +struct page __init *__populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { unsigned long size = section_map_size(); struct page *map = sparse_buffer_alloc(size); @@ -474,32 +516,35 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count) { - unsigned long pnum, usemap_longs, *usemap; + struct mem_section_usage *usage; + unsigned long pnum; struct page *map; - usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), - usemap_size() * - map_count); - if (!usemap) { + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + mem_section_usage_size() * map_count); + if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } sparse_buffer_init(map_count * section_map_size(), nid); for_each_present_section_nr(pnum_begin, pnum) { + unsigned long pfn = section_nr_to_pfn(pnum); + if (pnum >= pnum_end) break; - map = sparse_mem_map_populate(pnum, nid, NULL); + map = __populate_section_memmap(pfn, PAGES_PER_SECTION, + nid, NULL); if (!map) { pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid); pnum_begin = pnum; goto failed; } - check_usemap_section_nr(nid, usemap); - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); - usemap += usemap_longs; + check_usemap_section_nr(nid, usage); + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, + SECTION_IS_EARLY); + usage = (void *) usage + mem_section_usage_size(); } sparse_buffer_fini(); return; @@ -590,21 +635,20 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, - struct vmem_altmap *altmap) +static struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { - /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid, altmap); + return __populate_section_memmap(pfn, nr_pages, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap, + +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - unsigned long start = (unsigned long)memmap; - unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + unsigned long start = (unsigned long) pfn_to_page(pfn); + unsigned long end = start + nr_pages * sizeof(struct page); vmemmap_free(start, end, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long start = (unsigned long)memmap; @@ -612,9 +656,9 @@ static void free_map_bootmem(struct page *memmap) vmemmap_free(start, end, NULL); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #else -static struct page *__kmalloc_section_memmap(void) +struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { struct page *page, *ret; unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; @@ -635,15 +679,11 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - return __kmalloc_section_memmap(); -} + struct page *memmap = pfn_to_page(pfn); -static void __kfree_section_memmap(struct page *memmap, - struct vmem_altmap *altmap) -{ if (is_vmalloc_addr(memmap)) vfree(memmap); else @@ -651,7 +691,6 @@ static void __kfree_section_memmap(struct page *memmap, get_order(sizeof(struct page) * PAGES_PER_SECTION)); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long maps_section_nr, removing_section_nr, i; @@ -681,13 +720,122 @@ static void free_map_bootmem(struct page *memmap) put_page_bootmem(page); } } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ +static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + bool section_is_early = early_section(ms); + struct page *memmap = NULL; + unsigned long *subsection_map = ms->usage + ? &ms->usage->subsection_map[0] : NULL; + + subsection_mask_set(map, pfn, nr_pages); + if (subsection_map) + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); + + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), + "section already deactivated (%#lx + %ld)\n", + pfn, nr_pages)) + return; + + /* + * There are 3 cases to handle across two configurations + * (SPARSEMEM_VMEMMAP={y,n}): + * + * 1/ deactivation of a partial hot-added section (only possible + * in the SPARSEMEM_VMEMMAP=y case). + * a/ section was present at memory init + * b/ section was hot-added post memory init + * 2/ deactivation of a complete hot-added section + * 3/ deactivation of a complete section from memory init + * + * For 1/, when subsection_map does not empty we will not be + * freeing the usage map, but still need to free the vmemmap + * range. + * + * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified + */ + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!section_is_early) { + kfree(ms->usage); + ms->usage = NULL; + } + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); + } + + if (section_is_early && memmap) + free_map_bootmem(memmap); + else + depopulate_section_memmap(pfn, nr_pages, altmap); +} + +static struct page * __meminit section_activate(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + struct mem_section_usage *usage = NULL; + unsigned long *subsection_map; + struct page *memmap; + int rc = 0; + + subsection_mask_set(map, pfn, nr_pages); + + if (!ms->usage) { + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); + if (!usage) + return ERR_PTR(-ENOMEM); + ms->usage = usage; + } + subsection_map = &ms->usage->subsection_map[0]; + + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) + rc = -EINVAL; + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) + rc = -EEXIST; + else + bitmap_or(subsection_map, map, subsection_map, + SUBSECTIONS_PER_SECTION); + + if (rc) { + if (usage) + ms->usage = NULL; + kfree(usage); + return ERR_PTR(rc); + } + + /* + * The early init code does not consider partially populated + * initial sections, it simply assumes that memory will never be + * referenced. If we hot-add memory into such a section then we + * do not need to populate the memmap and can simply reuse what + * is already there. + */ + if (nr_pages < PAGES_PER_SECTION && early_section(ms)) + return pfn_to_page(pfn); + + memmap = populate_section_memmap(pfn, nr_pages, nid, altmap); + if (!memmap) { + section_deactivate(pfn, nr_pages, altmap); + return ERR_PTR(-ENOMEM); + } + + return memmap; +} + /** - * sparse_add_one_section - add a memory section + * sparse_add_section - add a memory section, or populate an existing one * @nid: The node to add section on * @start_pfn: start pfn of the memory range + * @nr_pages: number of pfns to add in the section * @altmap: device page map * * This is only intended for hotplug. @@ -697,56 +845,40 @@ static void free_map_bootmem(struct page *memmap) * * -EEXIST - Section has been present. * * -ENOMEM - Out of memory. */ -int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, - struct vmem_altmap *altmap) +int __meminit sparse_add_section(int nid, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; struct page *memmap; - unsigned long *usemap; int ret; - /* - * no locking for this, because it does its own - * plus, it does a kmalloc - */ ret = sparse_index_init(section_nr, nid); - if (ret < 0 && ret != -EEXIST) + if (ret < 0) return ret; - ret = 0; - memmap = kmalloc_section_memmap(section_nr, nid, altmap); - if (!memmap) - return -ENOMEM; - usemap = __kmalloc_section_usemap(); - if (!usemap) { - __kfree_section_memmap(memmap, altmap); - return -ENOMEM; - } - ms = __pfn_to_section(start_pfn); - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { - ret = -EEXIST; - goto out; - } + memmap = section_activate(nid, start_pfn, nr_pages, altmap); + if (IS_ERR(memmap)) + return PTR_ERR(memmap); /* * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION); + page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + ms = __pfn_to_section(start_pfn); + set_section_nid(section_nr, nid); section_mark_present(ms); - sparse_init_one_section(ms, section_nr, memmap, usemap); -out: - if (ret < 0) { - kfree(usemap); - __kfree_section_memmap(memmap, altmap); - } - return ret; + /* Align memmap to section boundary in the subsection case */ + if (section_nr_to_pfn(section_nr) != start_pfn) + memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr)); + sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0); + + return 0; } -#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_FAILURE static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { @@ -777,51 +909,12 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap, +void sparse_remove_section(struct mem_section *ms, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) { - struct page *usemap_page; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, altmap); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) - free_map_bootmem(memmap); -} - -void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, struct vmem_altmap *altmap) -{ - struct page *memmap = NULL; - unsigned long *usemap = NULL; - - if (ms->section_mem_map) { - usemap = ms->pageblock_flags; - memmap = sparse_decode_mem_map(ms->section_mem_map, - __section_nr(ms)); - ms->section_mem_map = 0; - ms->pageblock_flags = NULL; - } - - clear_hwpoisoned_pages(memmap + map_offset, - PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap, altmap); + clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, + nr_pages - map_offset); + section_deactivate(pfn, nr_pages, altmap); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ |
