diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Makefile | 4 | ||||
| -rw-r--r-- | mm/balloon_compaction.c | 2 | ||||
| -rw-r--r-- | mm/bootmem.c | 9 | ||||
| -rw-r--r-- | mm/cma.c | 82 | ||||
| -rw-r--r-- | mm/compaction.c | 160 | ||||
| -rw-r--r-- | mm/debug.c | 5 | ||||
| -rw-r--r-- | mm/frontswap.c | 6 | ||||
| -rw-r--r-- | mm/huge_memory.c | 16 | ||||
| -rw-r--r-- | mm/hugetlb.c | 4 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 103 | ||||
| -rw-r--r-- | mm/internal.h | 32 | ||||
| -rw-r--r-- | mm/iov_iter.c | 1062 | ||||
| -rw-r--r-- | mm/memcontrol.c | 1803 | ||||
| -rw-r--r-- | mm/memory-failure.c | 6 | ||||
| -rw-r--r-- | mm/memory.c | 57 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 35 | ||||
| -rw-r--r-- | mm/mmap.c | 20 | ||||
| -rw-r--r-- | mm/nobootmem.c | 8 | ||||
| -rw-r--r-- | mm/oom_kill.c | 4 | ||||
| -rw-r--r-- | mm/page-writeback.c | 43 | ||||
| -rw-r--r-- | mm/page_alloc.c | 205 | ||||
| -rw-r--r-- | mm/page_cgroup.c | 529 | ||||
| -rw-r--r-- | mm/page_counter.c | 192 | ||||
| -rw-r--r-- | mm/page_isolation.c | 45 | ||||
| -rw-r--r-- | mm/rmap.c | 94 | ||||
| -rw-r--r-- | mm/slab.c | 25 | ||||
| -rw-r--r-- | mm/slab.h | 8 | ||||
| -rw-r--r-- | mm/slab_common.c | 54 | ||||
| -rw-r--r-- | mm/slub.c | 21 | ||||
| -rw-r--r-- | mm/swap_cgroup.c | 208 | ||||
| -rw-r--r-- | mm/swap_state.c | 1 | ||||
| -rw-r--r-- | mm/swapfile.c | 2 | ||||
| -rw-r--r-- | mm/truncate.c | 6 | ||||
| -rw-r--r-- | mm/vmalloc.c | 3 | ||||
| -rw-r--r-- | mm/vmpressure.c | 8 | ||||
| -rw-r--r-- | mm/vmscan.c | 18 |
36 files changed, 2033 insertions, 2847 deletions
diff --git a/mm/Makefile b/mm/Makefile index 8405eb0023a9..b3c6ce932c64 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -55,7 +55,9 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o +obj-$(CONFIG_PAGE_COUNTER) += page_counter.o +obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o +obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b3cbe19f71b5..fcad8322ef36 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) * to be released by the balloon driver. */ if (trylock_page(page)) { +#ifdef CONFIG_BALLOON_COMPACTION if (!PagePrivate(page)) { /* raced with isolation */ unlock_page(page); continue; } +#endif spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_delete(page); __count_vm_event(BALLOON_DEFLATE); diff --git a/mm/bootmem.c b/mm/bootmem.c index 8a000cebb0d7..477be696511d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) static int reset_managed_pages_done __initdata; -static inline void __init reset_node_managed_pages(pg_data_t *pgdat) +void reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - if (reset_managed_pages_done) - return; - for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) z->managed_pages = 0; } @@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void) { struct pglist_data *pgdat; + if (reset_managed_pages_done) + return; + for_each_online_pgdat(pgdat) reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } @@ -124,6 +124,7 @@ static int __init cma_activate_area(struct cma *cma) err: kfree(cma->bitmap); + cma->count = 0; return -EINVAL; } @@ -214,12 +215,23 @@ int __init cma_declare_contiguous(phys_addr_t base, bool fixed, struct cma **res_cma) { phys_addr_t memblock_end = memblock_end_of_DRAM(); - phys_addr_t highmem_start = __pa(high_memory); + phys_addr_t highmem_start; int ret = 0; - pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", - __func__, (unsigned long)size, (unsigned long)base, - (unsigned long)limit, (unsigned long)alignment); +#ifdef CONFIG_X86 + /* + * high_memory isn't direct mapped memory so retrieving its physical + * address isn't appropriate. But it would be useful to check the + * physical address of the highmem boundary so it's justfiable to get + * the physical address from it. On x86 there is a validation check for + * this case, so the following workaround is needed to avoid it. + */ + highmem_start = __pa_nodebug(high_memory); +#else + highmem_start = __pa(high_memory); +#endif + pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", + __func__, &size, &base, &limit, &alignment); if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); @@ -244,52 +256,72 @@ int __init cma_declare_contiguous(phys_addr_t base, size = ALIGN(size, alignment); limit &= ~(alignment - 1); + if (!base) + fixed = false; + /* size should be aligned with order_per_bit */ if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) return -EINVAL; /* - * adjust limit to avoid crossing low/high memory boundary for - * automatically allocated regions + * If allocating at a fixed base the request region must not cross the + * low/high memory boundary. */ - if (((limit == 0 || limit > memblock_end) && - (memblock_end - size < highmem_start && - memblock_end > highmem_start)) || - (!fixed && limit > highmem_start && limit - size < highmem_start)) { - limit = highmem_start; - } - - if (fixed && base < highmem_start && base+size > highmem_start) { + if (fixed && base < highmem_start && base + size > highmem_start) { ret = -EINVAL; - pr_err("Region at %08lx defined on low/high memory boundary (%08lx)\n", - (unsigned long)base, (unsigned long)highmem_start); + pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", + &base, &highmem_start); goto err; } + /* + * If the limit is unspecified or above the memblock end, its effective + * value will be the memblock end. Set it explicitly to simplify further + * checks. + */ + if (limit == 0 || limit > memblock_end) + limit = memblock_end; + /* Reserve memory */ - if (base && fixed) { + if (fixed) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { ret = -EBUSY; goto err; } } else { - phys_addr_t addr = memblock_alloc_range(size, alignment, base, - limit); + phys_addr_t addr = 0; + + /* + * All pages in the reserved area must come from the same zone. + * If the requested region crosses the low/high memory boundary, + * try allocating from high memory first and fall back to low + * memory in case of failure. + */ + if (base < highmem_start && limit > highmem_start) { + addr = memblock_alloc_range(size, alignment, + highmem_start, limit); + limit = highmem_start; + } + if (!addr) { - ret = -ENOMEM; - goto err; - } else { - base = addr; + addr = memblock_alloc_range(size, alignment, base, + limit); + if (!addr) { + ret = -ENOMEM; + goto err; + } } + + base = addr; } ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); if (ret) goto err; - pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, - (unsigned long)base); + pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, + &base); return 0; err: diff --git a/mm/compaction.c b/mm/compaction.c index edba18aed173..546e571e9d60 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -41,15 +41,17 @@ static inline void count_compact_events(enum vm_event_item item, long delta) static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; - unsigned long count = 0; + unsigned long high_pfn = 0; list_for_each_entry_safe(page, next, freelist, lru) { + unsigned long pfn = page_to_pfn(page); list_del(&page->lru); __free_page(page); - count++; + if (pfn > high_pfn) + high_pfn = pfn; } - return count; + return high_pfn; } static void map_pages(struct list_head *list) @@ -195,16 +197,12 @@ static void update_pageblock_skip(struct compact_control *cc, /* Update where async and sync compaction should restart */ if (migrate_scanner) { - if (cc->finished_update_migrate) - return; if (pfn > zone->compact_cached_migrate_pfn[0]) zone->compact_cached_migrate_pfn[0] = pfn; if (cc->mode != MIGRATE_ASYNC && pfn > zone->compact_cached_migrate_pfn[1]) zone->compact_cached_migrate_pfn[1] = pfn; } else { - if (cc->finished_update_free) - return; if (pfn < zone->compact_cached_free_pfn) zone->compact_cached_free_pfn = pfn; } @@ -479,6 +477,16 @@ isolate_freepages_range(struct compact_control *cc, block_end_pfn = min(block_end_pfn, end_pfn); + /* + * pfn could pass the block_end_pfn if isolated freepage + * is more than pageblock order. In this case, we adjust + * scanning range to right one. + */ + if (pfn >= block_end_pfn) { + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); + } + if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) break; @@ -705,7 +713,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, del_page_from_lru_list(page, lruvec, page_lru(page)); isolate_success: - cc->finished_update_migrate = true; list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; @@ -784,6 +791,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, cc->nr_migratepages = 0; break; } + + if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + break; } acct_isolated(cc->zone, cc); @@ -876,15 +886,6 @@ static void isolate_freepages(struct compact_control *cc) block_start_pfn - pageblock_nr_pages; /* - * Set a flag that we successfully isolated in this pageblock. - * In the next loop iteration, zone->compact_cached_free_pfn - * will not be updated and thus it will effectively contain the - * highest pageblock we isolated pages from. - */ - if (isolated) - cc->finished_update_free = true; - - /* * isolate_freepages_block() might have aborted due to async * compaction being contended */ @@ -1026,8 +1027,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } acct_isolated(zone, cc); - /* Record where migration scanner will be restarted */ - cc->migrate_pfn = low_pfn; + /* + * Record where migration scanner will be restarted. If we end up in + * the same pageblock as the free scanner, make the scanners fully + * meet so that compact_finished() terminates compaction. + */ + cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn; return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; } @@ -1069,9 +1074,9 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, /* Compaction run is not finished if the watermark is not met */ watermark = low_wmark_pages(zone); - watermark += (1 << cc->order); - if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) + if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx, + cc->alloc_flags)) return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ @@ -1097,7 +1102,8 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, * COMPACT_PARTIAL - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now */ -unsigned long compaction_suitable(struct zone *zone, int order) +unsigned long compaction_suitable(struct zone *zone, int order, + int alloc_flags, int classzone_idx) { int fragindex; unsigned long watermark; @@ -1109,21 +1115,30 @@ unsigned long compaction_suitable(struct zone *zone, int order) if (order == -1) return COMPACT_CONTINUE; + watermark = low_wmark_pages(zone); + /* + * If watermarks for high-order allocation are already met, there + * should be no need for compaction at all. + */ + if (zone_watermark_ok(zone, order, watermark, classzone_idx, + alloc_flags)) + return COMPACT_PARTIAL; + /* * Watermarks for order-0 must be met for compaction. Note the 2UL. * This is because during migration, copies of pages need to be * allocated and for a short time, the footprint is higher */ - watermark = low_wmark_pages(zone) + (2UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + watermark += (2UL << order); + if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags)) return COMPACT_SKIPPED; /* * fragmentation index determines if allocation failures are due to * low memory or external fragmentation * - * index of -1000 implies allocations might succeed depending on - * watermarks + * index of -1000 would imply allocations might succeed depending on + * watermarks, but we already failed the high-order watermark check * index towards 0 implies failure is due to lack of memory * index towards 1000 implies failure is due to fragmentation * @@ -1133,10 +1148,6 @@ unsigned long compaction_suitable(struct zone *zone, int order) if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) return COMPACT_SKIPPED; - if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark, - 0, 0)) - return COMPACT_PARTIAL; - return COMPACT_CONTINUE; } @@ -1147,8 +1158,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) unsigned long end_pfn = zone_end_pfn(zone); const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); const bool sync = cc->mode != MIGRATE_ASYNC; + unsigned long last_migrated_pfn = 0; - ret = compaction_suitable(zone, cc->order); + ret = compaction_suitable(zone, cc->order, cc->alloc_flags, + cc->classzone_idx); switch (ret) { case COMPACT_PARTIAL: case COMPACT_SKIPPED: @@ -1191,6 +1204,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) while ((ret = compact_finished(zone, cc, migratetype)) == COMPACT_CONTINUE) { int err; + unsigned long isolate_start_pfn = cc->migrate_pfn; switch (isolate_migratepages(zone, cc)) { case ISOLATE_ABORT: @@ -1199,7 +1213,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) cc->nr_migratepages = 0; goto out; case ISOLATE_NONE: - continue; + /* + * We haven't isolated and migrated anything, but + * there might still be unflushed migrations from + * previous cc->order aligned block. + */ + goto check_drain; case ISOLATE_SUCCESS: ; } @@ -1224,12 +1243,61 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } + + /* + * Record where we could have freed pages by migration and not + * yet flushed them to buddy allocator. We use the pfn that + * isolate_migratepages() started from in this loop iteration + * - this is the lowest page that could have been isolated and + * then freed by migration. + */ + if (!last_migrated_pfn) + last_migrated_pfn = isolate_start_pfn; + +check_drain: + /* + * Has the migration scanner moved away from the previous + * cc->order aligned block where we migrated from? If yes, + * flush the pages that were freed, so that they can merge and + * compact_finished() can detect immediately if allocation + * would succeed. + */ + if (cc->order > 0 && last_migrated_pfn) { + int cpu; + unsigned long current_block_start = + cc->migrate_pfn & ~((1UL << cc->order) - 1); + + if (last_migrated_pfn < current_block_start) { + cpu = get_cpu(); + lru_add_drain_cpu(cpu); + drain_local_pages(zone); + put_cpu(); + /* No more flushing until we migrate again */ + last_migrated_pfn = 0; + } + } + } out: - /* Release free pages and check accounting */ - cc->nr_freepages -= release_freepages(&cc->freepages); - VM_BUG_ON(cc->nr_freepages != 0); + /* + * Release free pages and update where the free scanner should restart, + * so we don't leave any returned pages behind in the next attempt. + */ + if (cc->nr_freepages > 0) { + unsigned long free_pfn = release_freepages(&cc->freepages); + + cc->nr_freepages = 0; + VM_BUG_ON(free_pfn == 0); + /* The cached pfn is always the first in a pageblock */ + free_pfn &= ~(pageblock_nr_pages-1); + /* + * Only go back, not forward. The cached pfn might have been + * already reset to zone end in compact_finished() + */ + if (free_pfn > zone->compact_cached_free_pfn) + zone->compact_cached_free_pfn = free_pfn; + } trace_mm_compaction_end(ret); @@ -1237,7 +1305,8 @@ out: } static unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, enum migrate_mode mode, int *contended) + gfp_t gfp_mask, enum migrate_mode mode, int *contended, + int alloc_flags, int classzone_idx) { unsigned long ret; struct compact_control cc = { @@ -1247,6 +1316,8 @@ static unsigned long compact_zone_order(struct zone *zone, int order, .gfp_mask = gfp_mask, .zone = zone, .mode = mode, + .alloc_flags = alloc_flags, + .classzone_idx = classzone_idx, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1271,14 +1342,13 @@ int sysctl_extfrag_threshold = 500; * @mode: The migration mode for async, sync light, or sync migration * @contended: Return value that determines if compaction was aborted due to * need_resched() or lock contention - * @candidate_zone: Return the zone where we think allocation should succeed * * This is the main entry point for direct page compaction. */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, enum migrate_mode mode, int *contended, - struct zone **candidate_zone) + int alloc_flags, int classzone_idx) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1286,7 +1356,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; int rc = COMPACT_DEFERRED; - int alloc_flags = 0; int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ *contended = COMPACT_CONTENDED_NONE; @@ -1295,10 +1364,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, if (!order || !may_enter_fs || !may_perform_io) return COMPACT_SKIPPED; -#ifdef CONFIG_CMA - if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) - alloc_flags |= ALLOC_CMA; -#endif /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { @@ -1309,7 +1374,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, continue; status = compact_zone_order(zone, order, gfp_mask, mode, - &zone_contended); + &zone_contended, alloc_flags, classzone_idx); rc = max(status, rc); /* * It takes at least one zone that wasn't lock contended @@ -1318,9 +1383,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, all_zones_contended &= zone_contended; /* If a normal allocation would succeed, stop compacting */ - if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, - alloc_flags)) { - *candidate_zone = zone; + if (zone_watermark_ok(zone, order, low_wmark_pages(zone), + classzone_idx, alloc_flags)) { /* * We think the allocation will succeed in this zone, * but it is not certain, hence the false. The caller @@ -1342,7 +1406,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, goto break_loop; } - if (mode != MIGRATE_ASYNC) { + if (mode != MIGRATE_ASYNC && status == COMPACT_COMPLETE) { /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up diff --git a/mm/debug.c b/mm/debug.c index 5ce45c9a29b5..0e58f3211f89 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -95,7 +95,10 @@ void dump_page_badflags(struct page *page, const char *reason, dump_flags(page->flags & badflags, pageflag_names, ARRAY_SIZE(pageflag_names)); } - mem_cgroup_print_bad_page(page); +#ifdef CONFIG_MEMCG + if (page->mem_cgroup) + pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); +#endif } void dump_page(struct page *page, const char *reason) diff --git a/mm/frontswap.c b/mm/frontswap.c index c30eec536f03..8d82809eb085 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -182,7 +182,7 @@ void __frontswap_init(unsigned type, unsigned long *map) if (frontswap_ops) frontswap_ops->init(type); else { - BUG_ON(type > MAX_SWAPFILES); + BUG_ON(type >= MAX_SWAPFILES); set_bit(type, need_init); } } @@ -244,8 +244,10 @@ int __frontswap_store(struct page *page) the (older) page from frontswap */ inc_frontswap_failed_stores(); - if (dup) + if (dup) { __frontswap_clear(sis, offset); + frontswap_ops->invalidate_page(type, offset); + } } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6a37f1b2ed1e..46f96c23cc27 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -200,7 +200,7 @@ retry: preempt_disable(); if (cmpxchg(&huge_zero_page, NULL, zero_page)) { preempt_enable(); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); goto retry; } @@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -784,7 +784,6 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, if (!pmd_none(*pmd)) return false; entry = mk_pmd(zero_page, vma->vm_page_prot); - entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); @@ -803,7 +802,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (unlikely(khugepaged_enter(vma, vma->vm_flags))) return VM_FAULT_OOM; if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) && transparent_hugepage_use_zero_page()) { @@ -1971,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma, * register it here without waiting a page fault that * may not happen any time soon. */ - if (unlikely(khugepaged_enter_vma_merge(vma))) + if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) return -ENOMEM; break; case MADV_NOHUGEPAGE: @@ -2072,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm) return 0; } -int khugepaged_enter_vma_merge(struct vm_area_struct *vma) +int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags) { unsigned long hstart, hend; if (!vma->anon_vma) @@ -2084,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) if (vma->vm_ops) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); + VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) - return khugepaged_enter(vma); + return khugepaged_enter(vma, vm_flags); return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9fd722769927..30cd96879152 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2638,8 +2638,9 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + address = start; again: - for (address = start; address < end; address += sz) { + for (; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; @@ -2686,6 +2687,7 @@ again: page_remove_rmap(page); force_flush = !__tlb_remove_page(tlb, page); if (force_flush) { + address += sz; spin_unlock(ptl); break; } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index a67c26e0f360..037e1c00a5b7 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -14,6 +14,7 @@ */ #include <linux/cgroup.h> +#include <linux/page_counter.h> #include <linux/slab.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> @@ -23,7 +24,7 @@ struct hugetlb_cgroup { /* * the counter to account for hugepages from hugetlb. */ - struct res_counter hugepage[HUGE_MAX_HSTATE]; + struct page_counter hugepage[HUGE_MAX_HSTATE]; }; #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) @@ -60,7 +61,7 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) int idx; for (idx = 0; idx < hugetlb_max_hstate; idx++) { - if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) + if (page_counter_read(&h_cg->hugepage[idx])) return true; } return false; @@ -79,12 +80,12 @@ hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent_h_cgroup) { for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) - res_counter_init(&h_cgroup->hugepage[idx], - &parent_h_cgroup->hugepage[idx]); + page_counter_init(&h_cgroup->hugepage[idx], + &parent_h_cgroup->hugepage[idx]); } else { root_h_cgroup = h_cgroup; for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) - res_counter_init(&h_cgroup->hugepage[idx], NULL); + page_counter_init(&h_cgroup->hugepage[idx], NULL); } return &h_cgroup->css; } @@ -108,9 +109,8 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct page *page) { - int csize; - struct res_counter *counter; - struct res_counter *fail_res; + unsigned int nr_pages; + struct page_counter *counter; struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); @@ -123,15 +123,15 @@ static void hugetlb_cgroup_move_parent(int idx, struct |
