summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 18:39:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 18:39:37 -0700
commit68abbe729567cef128b2c2141f2ed2567f3b8372 (patch)
treeaa75c39cc815eee4d7cc8db2988fe10879fccd3e /mm
parentba1b7309fc2e909a5828c36a7cd187e5d7df6f53 (diff)
parent016e92da037e0b43dd5e5848c19b0b9749506963 (diff)
downloadlinux-68abbe729567cef128b2c2141f2ed2567f3b8372.tar.gz
linux-68abbe729567cef128b2c2141f2ed2567f3b8372.tar.bz2
linux-68abbe729567cef128b2c2141f2ed2567f3b8372.zip
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - a few misc things - ocfs2 updates - v9fs updates - MM - procfs updates - lib/ updates - autofs updates * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits) autofs: small cleanup in autofs_getpath() autofs: clean up includes autofs: comment on selinux changes needed for module autoload autofs: update MAINTAINERS entry for autofs autofs: use autofs instead of autofs4 in documentation autofs: rename autofs documentation files autofs: create autofs Kconfig and Makefile autofs: delete fs/autofs4 source files autofs: update fs/autofs4/Makefile autofs: update fs/autofs4/Kconfig autofs: copy autofs4 to autofs autofs4: use autofs instead of autofs4 everywhere autofs4: merge auto_fs.h and auto_fs4.h fs/binfmt_misc.c: do not allow offset overflow checkpatch: improve patch recognition lib/ucs2_string.c: add MODULE_LICENSE() lib/mpi: headers cleanup lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock lib/idr.c: remove simple_ida_lock lib/bitmap.c: micro-optimization for __bitmap_complement() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/gup.c42
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/hugetlb.c44
-rw-r--r--mm/hugetlb_cgroup.c6
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/ksm.c11
-rw-r--r--mm/memblock.c27
-rw-r--r--mm/memcontrol.c342
-rw-r--r--mm/memfd.c345
-rw-r--r--mm/memory.c36
-rw-r--r--mm/memory_hotplug.c23
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c74
-rw-r--r--mm/page_counter.c100
-rw-r--r--mm/shmem.c382
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c112
-rw-r--r--mm/sparse.c6
-rw-r--r--mm/swap_slots.c10
-rw-r--r--mm/swap_state.c3
-rw-r--r--mm/userfaultfd.c22
-rw-r--r--mm/util.c6
-rw-r--r--mm/vmalloc.c41
-rw-r--r--mm/vmpressure.c35
-rw-r--r--mm/vmscan.c38
32 files changed, 1008 insertions, 737 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e0b6e87f65d..00bffa7a5112 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -754,3 +754,6 @@ config GUP_BENCHMARK
performance of get_user_pages_fast().
See tools/testing/selftests/vm/gup_benchmark.c
+
+config ARCH_HAS_PTE_SPECIAL
+ bool
diff --git a/mm/Makefile b/mm/Makefile
index b4e54a9ae9c5..8716bdabe1e6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -105,3 +105,4 @@ obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_HMM) += hmm.o
+obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8fe3ebd6ac00..347cc834c04a 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -557,7 +557,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
memcg = mem_cgroup_from_css(memcg_css);
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
blkcg = css_to_blkcg(blkcg_css);
- memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
+ memcg_cgwb_list = &memcg->cgwb_list;
blkcg_cgwb_list = &blkcg->cgwb_list;
/* look up again under lock and discard on blkcg mismatch */
@@ -736,7 +736,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
*/
void wb_memcg_offline(struct mem_cgroup *memcg)
{
- struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
+ struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
struct bdi_writeback *wb, *next;
spin_lock_irq(&cgwb_lock);
diff --git a/mm/filemap.c b/mm/filemap.c
index 0604cb02e6f3..52517f28e6f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2489,7 +2489,7 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
*
* We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/
-int filemap_fault(struct vm_fault *vmf)
+vm_fault_t filemap_fault(struct vm_fault *vmf)
{
int error;
struct file *file = vmf->vma->vm_file;
@@ -2499,7 +2499,7 @@ int filemap_fault(struct vm_fault *vmf)
pgoff_t offset = vmf->pgoff;
pgoff_t max_off;
struct page *page;
- int ret = 0;
+ vm_fault_t ret = 0;
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(offset >= max_off))
@@ -2693,11 +2693,11 @@ next:
}
EXPORT_SYMBOL(filemap_map_pages);
-int filemap_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
- int ret = VM_FAULT_LOCKED;
+ vm_fault_t ret = VM_FAULT_LOCKED;
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
diff --git a/mm/gup.c b/mm/gup.c
index 541904a7c60f..1020c7f8f5ee 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
unsigned int flags, unsigned int *page_mask)
{
- pmd_t *pmd;
+ pmd_t *pmd, pmdval;
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
pmd = pmd_offset(pudp, address);
- if (pmd_none(*pmd))
+ /*
+ * The READ_ONCE() will stabilize the pmdval in a register or
+ * on the stack so that it will stop changing under the code.
+ */
+ pmdval = READ_ONCE(*pmd);
+ if (pmd_none(pmdval))
return no_page_table(vma, flags);
- if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
+ if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
page = follow_huge_pmd(mm, address, pmd, flags);
if (page)
return page;
return no_page_table(vma, flags);
}
- if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
+ if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
page = follow_huge_pd(vma, address,
- __hugepd(pmd_val(*pmd)), flags,
+ __hugepd(pmd_val(pmdval)), flags,
PMD_SHIFT);
if (page)
return page;
return no_page_table(vma, flags);
}
retry:
- if (!pmd_present(*pmd)) {
+ if (!pmd_present(pmdval)) {
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(*pmd));
- if (is_pmd_migration_entry(*pmd))
+ !is_pmd_migration_entry(pmdval));
+ if (is_pmd_migration_entry(pmdval))
pmd_migration_entry_wait(mm, pmd);
+ pmdval = READ_ONCE(*pmd);
+ /*
+ * MADV_DONTNEED may convert the pmd to null because
+ * mmap_sem is held in read mode
+ */
+ if (pmd_none(pmdval))
+ return no_page_table(vma, flags);
goto retry;
}
- if (pmd_devmap(*pmd)) {
+ if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags);
spin_unlock(ptl);
if (page)
return page;
}
- if (likely(!pmd_trans_huge(*pmd)))
+ if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags);
- if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
return no_page_table(vma, flags);
retry_locked:
ptl = pmd_lock(mm, pmd);
+ if (unlikely(pmd_none(*pmd))) {
+ spin_unlock(ptl);
+ return no_page_table(vma, flags);
+ }
if (unlikely(!pmd_present(*pmd))) {
spin_unlock(ptl);
if (likely(!(flags & FOLL_MIGRATION)))
@@ -1354,7 +1370,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
}
-#ifdef __HAVE_ARCH_PTE_SPECIAL
+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
@@ -1430,7 +1446,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
{
return 0;
}
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
+#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
static int __gup_device_huge(unsigned long pfn, unsigned long addr,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ac5591d8622c..ba8fdc0b6e7f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -483,11 +483,8 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
static inline struct list_head *page_deferred_list(struct page *page)
{
- /*
- * ->lru in the tail pages is occupied by compound_head.
- * Let's use ->mapping + ->index in the second tail page as list_head.
- */
- return (struct list_head *)&page[2].mapping;
+ /* ->lru in the tail pages is occupied by compound_head. */
+ return &page[2].deferred_list;
}
void prep_transhuge_page(struct page *page)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 129088710510..696befffe6f7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3159,7 +3159,7 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
* hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
* this far.
*/
-static int hugetlb_vm_op_fault(struct vm_fault *vmf)
+static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
{
BUG();
return 0;
@@ -3686,6 +3686,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
pte_t new_pte;
spinlock_t *ptl;
+ unsigned long haddr = address & huge_page_mask(h);
/*
* Currently, we are forced to kill the process in the event the
@@ -3716,7 +3717,7 @@ retry:
u32 hash;
struct vm_fault vmf = {
.vma = vma,
- .address = address,
+ .address = haddr,
.flags = flags,
/*
* Hard to debug if it ends up being
@@ -3733,14 +3734,14 @@ retry:
* fault to make calling code simpler.
*/
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
- idx, address);
+ idx, haddr);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
ret = handle_userfault(&vmf, VM_UFFD_MISSING);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
goto out;
}
- page = alloc_huge_page(vma, address, 0);
+ page = alloc_huge_page(vma, haddr, 0);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
if (ret == -ENOMEM)
@@ -3789,12 +3790,12 @@ retry:
* the spinlock.
*/
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
- if (vma_needs_reservation(h, vma, address) < 0) {
+ if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto backout_unlocked;
}
/* Just decrements count, does not deallocate */
- vma_end_reservation(h, vma, address);
+ vma_end_reservation(h, vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
@@ -3808,17 +3809,17 @@ retry:
if (anon_rmap) {
ClearPagePrivate(page);
- hugepage_add_new_anon_rmap(page, vma, address);
+ hugepage_add_new_anon_rmap(page, vma, haddr);
} else
page_dup_rmap(page, true);
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
- set_huge_pte_at(mm, address, ptep, new_pte);
+ set_huge_pte_at(mm, haddr, ptep, new_pte);
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_cow(mm, vma, address, ptep, page, ptl);
+ ret = hugetlb_cow(mm, vma, haddr, ptep, page, ptl);
}
spin_unlock(ptl);
@@ -3830,7 +3831,7 @@ backout:
spin_unlock(ptl);
backout_unlocked:
unlock_page(page);
- restore_reserve_on_error(h, vma, address, page);
+ restore_reserve_on_error(h, vma, haddr, page);
put_page(page);
goto out;
}
@@ -3883,10 +3884,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct hstate *h = hstate_vma(vma);
struct address_space *mapping;
int need_wait_lock = 0;
+ unsigned long haddr = address & huge_page_mask(h);
- address &= huge_page_mask(h);
-
- ptep = huge_pte_offset(mm, address, huge_page_size(h));
+ ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
if (ptep) {
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -3896,20 +3896,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
} else {
- ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+ ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
if (!ptep)
return VM_FAULT_OOM;
}
mapping = vma->vm_file->f_mapping;
- idx = vma_hugecache_offset(h, vma, address);
+ idx = vma_hugecache_offset(h, vma, haddr);
/*
* Serialize hugepage allocation and instantiation, so that we don't
* get spurious allocation failures if two CPUs race to instantiate
* the same page in the page cache.
*/
- hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
+ hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
entry = huge_ptep_get(ptep);
@@ -3939,16 +3939,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* consumed.
*/
if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
- if (vma_needs_reservation(h, vma, address) < 0) {
+ if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
}
/* Just decrements count, does not deallocate */
- vma_end_reservation(h, vma, address);
+ vma_end_reservation(h, vma, haddr);
if (!(vma->vm_flags & VM_MAYSHARE))
pagecache_page = hugetlbfs_pagecache_page(h,
- vma, address);
+ vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
@@ -3973,16 +3973,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (flags & FAULT_FLAG_WRITE) {
if (!huge_pte_write(entry)) {
- ret = hugetlb_cow(mm, vma, address, ptep,
+ ret = hugetlb_cow(mm, vma, haddr, ptep,
pagecache_page, ptl);
goto out_put_page;
}
entry = huge_pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
- if (huge_ptep_set_access_flags(vma, address, ptep, entry,
+ if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
flags & FAULT_FLAG_WRITE))
- update_mmu_cache(vma, address, ptep);
+ update_mmu_cache(vma, haddr, ptep);
out_put_page:
if (page != pagecache_page)
unlock_page(page);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index eec1150125b9..68c2f2f3c05b 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -84,7 +84,7 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
limit = round_down(PAGE_COUNTER_MAX,
1 << huge_page_order(&hstates[idx]));
- ret = page_counter_limit(counter, limit);
+ ret = page_counter_set_max(counter, limit);
VM_BUG_ON(ret);
}
}
@@ -273,7 +273,7 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
case RES_USAGE:
return (u64)page_counter_read(counter) * PAGE_SIZE;
case RES_LIMIT:
- return (u64)counter->limit * PAGE_SIZE;
+ return (u64)counter->max * PAGE_SIZE;
case RES_MAX_USAGE:
return (u64)counter->watermark * PAGE_SIZE;
case RES_FAILCNT:
@@ -306,7 +306,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
switch (MEMFILE_ATTR(of_cft(of)->private)) {
case RES_LIMIT:
mutex_lock(&hugetlb_limit_mutex);
- ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages);
+ ret = page_counter_set_max(&h_cg->hugepage[idx], nr_pages);
mutex_unlock(&hugetlb_limit_mutex);
break;
default:
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f94d5d15ebc0..f0179c9c04c2 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -22,6 +22,7 @@ struct mm_struct init_mm = {
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+ .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
diff --git a/mm/ksm.c b/mm/ksm.c
index 7d6558f3bac9..e2d2886fb1df 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -840,6 +840,17 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
return err;
}
+static inline struct stable_node *page_stable_node(struct page *page)
+{
+ return PageKsm(page) ? page_rmapping(page) : NULL;
+}
+
+static inline void set_page_stable_node(struct page *page,
+ struct stable_node *stable_node)
+{
+ page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+}
+
#ifdef CONFIG_SYSFS
/*
* Only called through the sysfs control interface:
diff --git a/mm/memblock.c b/mm/memblock.c
index 5108356ad8aa..93ad42bc8a73 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -68,7 +68,7 @@ ulong __init_memblock choose_memblock_flags(void)
/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
- return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
+ return *size = min(*size, PHYS_ADDR_MAX - base);
}
/*
@@ -697,6 +697,11 @@ static int __init_memblock memblock_remove_range(struct memblock_type *type,
int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
{
+ phys_addr_t end = base + size - 1;
+
+ memblock_dbg("memblock_remove: [%pa-%pa] %pS\n",
+ &base, &end, (void *)_RET_IP_);
+
return memblock_remove_range(&memblock.memory, base, size);
}
@@ -925,7 +930,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
r = &type_b->regions[idx_b];
r_start = idx_b ? r[-1].base + r[-1].size : 0;
r_end = idx_b < type_b->cnt ?
- r->base : (phys_addr_t)ULLONG_MAX;
+ r->base : PHYS_ADDR_MAX;
/*
* if idx_b advanced past idx_a,
@@ -1041,7 +1046,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
r = &type_b->regions[idx_b];
r_start = idx_b ? r[-1].base + r[-1].size : 0;
r_end = idx_b < type_b->cnt ?
- r->base : (phys_addr_t)ULLONG_MAX;
+ r->base : PHYS_ADDR_MAX;
/*
* if idx_b advanced past idx_a,
* break out to advance idx_a
@@ -1516,13 +1521,13 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
{
- phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
+ phys_addr_t max_addr = PHYS_ADDR_MAX;
struct memblock_region *r;
/*
* translate the memory @limit size into the max address within one of
* the memory memblock regions, if the @limit exceeds the total size
- * of those regions, max_addr will keep original value ULLONG_MAX
+ * of those regions, max_addr will keep original value PHYS_ADDR_MAX
*/
for_each_memblock(memory, r) {
if (limit <= r->size) {
@@ -1537,7 +1542,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
void __init memblock_enforce_memory_limit(phys_addr_t limit)
{
- phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
+ phys_addr_t max_addr = PHYS_ADDR_MAX;
if (!limit)
return;
@@ -1545,14 +1550,14 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
max_addr = __find_max_addr(limit);
/* @limit exceeds the total size of the memory, do nothing */
- if (max_addr == (phys_addr_t)ULLONG_MAX)
+ if (max_addr == PHYS_ADDR_MAX)
return;
/* truncate both memory and reserved regions */
memblock_remove_range(&memblock.memory, max_addr,
- (phys_addr_t)ULLONG_MAX);
+ PHYS_ADDR_MAX);
memblock_remove_range(&memblock.reserved, max_addr,
- (phys_addr_t)ULLONG_MAX);
+ PHYS_ADDR_MAX);
}
void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
@@ -1580,7 +1585,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
/* truncate the reserved regions */
memblock_remove_range(&memblock.reserved, 0, base);
memblock_remove_range(&memblock.reserved,
- base + size, (phys_addr_t)ULLONG_MAX);
+ base + size, PHYS_ADDR_MAX);
}
void __init memblock_mem_limit_remove_map(phys_addr_t limit)
@@ -1593,7 +1598,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit)
max_addr = __find_max_addr(limit);
/* @limit exceeds the total size of the memory, do nothing */
- if (max_addr == (phys_addr_t)ULLONG_MAX)
+ if (max_addr == PHYS_ADDR_MAX)
return;
memblock_cap_memory_range(0, max_addr);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1695f38630f1..c1e64d60ed02 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1034,13 +1034,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
unsigned long limit;
count = page_counter_read(&memcg->memory);
- limit = READ_ONCE(memcg->memory.limit);
+ limit = READ_ONCE(memcg->memory.max);
if (count < limit)
margin = limit - count;
if (do_memsw_account()) {
count = page_counter_read(&memcg->memsw);
- limit = READ_ONCE(memcg->memsw.limit);
+ limit = READ_ONCE(memcg->memsw.max);
if (count <= limit)
margin = min(margin, limit - count);
else
@@ -1148,13 +1148,13 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
K((u64)page_counter_read(&memcg->memory)),
- K((u64)memcg->memory.limit), memcg->memory.failcnt);
+ K((u64)memcg->memory.max), memcg->memory.failcnt);
pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
K((u64)page_counter_read(&memcg->memsw)),
- K((u64)memcg->memsw.limit), memcg->memsw.failcnt);
+ K((u64)memcg->memsw.max), memcg->memsw.failcnt);
pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
K((u64)page_counter_read(&memcg->kmem)),
- K((u64)memcg->kmem.limit), memcg->kmem.failcnt);
+ K((u64)memcg->kmem.max), memcg->kmem.failcnt);
for_each_mem_cgroup_tree(iter, memcg) {
pr_info("Memory cgroup stats for ");
@@ -1179,21 +1179,21 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
/*
* Return the memory (and swap, if configured) limit for a memcg.
*/
-unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
+unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
{
- unsigned long limit;
+ unsigned long max;
- limit = memcg->memory.limit;
+ max = memcg->memory.max;
if (mem_cgroup_swappiness(memcg)) {
- unsigned long memsw_limit;
- unsigned long swap_limit;
+ unsigned long memsw_max;
+ unsigned long swap_max;
- memsw_limit = memcg->memsw.limit;
- swap_limit = memcg->swap.limit;
- swap_limit = min(swap_limit, (unsigned long)total_swap_pages);
- limit = min(limit + swap_limit, memsw_limit);
+ memsw_max = memcg->memsw.max;
+ swap_max = memcg->swap.max;
+ swap_max = min(swap_max, (unsigned long)total_swap_pages);
+ max = min(max + swap_max, memsw_max);
}
- return limit;
+ return max;
}
static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -2444,12 +2444,13 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
}
#endif
-static DEFINE_MUTEX(memcg_limit_mutex);
+static DEFINE_MUTEX(memcg_max_mutex);
-static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
- unsigned long limit, bool memsw)
+static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
+ unsigned long max, bool memsw)
{
bool enlarge = false;
+ bool drained = false;
int ret;
bool limits_invariant;
struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory;
@@ -2460,26 +2461,32 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
break;
}
- mutex_lock(&memcg_limit_mutex);
+ mutex_lock(&memcg_max_mutex);
/*
* Make sure that the new limit (memsw or memory limit) doesn't
- * break our basic invariant rule memory.limit <= memsw.limit.
+ * break our basic invariant rule memory.max <= memsw.max.
*/
- limits_invariant = memsw ? limit >= memcg->memory.limit :
- limit <= memcg->memsw.limit;
+ limits_invariant = memsw ? max >= memcg->memory.max :
+ max <= memcg->memsw.max;
if (!limits_invariant) {
- mutex_unlock(&memcg_limit_mutex);
+ mutex_unlock(&memcg_max_mutex);
ret = -EINVAL;
break;
}
- if (limit > counter->limit)
+ if (max > counter->max)
enlarge = true;
- ret = page_counter_limit(counter, limit);
- mutex_unlock(&memcg_limit_mutex);
+ ret = page_counter_set_max(counter, max);
+ mutex_unlock(&memcg_max_mutex);
if (!ret)
break;
+ if (!drained) {
+ drain_all_stock(memcg);
+ drained = true;
+ continue;
+ }
+
if (!try_to_free_mem_cgroup_pages(memcg, 1,
GFP_KERNEL, !memsw)) {
ret = -EBUSY;
@@ -2603,6 +2610,9 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
/* we call try-to-free pages for make this cgroup empty */
lru_add_drain_all();
+
+ drain_all_stock(memcg);
+
/* try to free all pages in this cgroup */
while (nr_retries && page_counter_read(&memcg->memory)) {
int progress;
@@ -2757,7 +2767,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE;
return (u64)page_counter_read(counter) * PAGE_SIZE;
case RES_LIMIT:
- return (u64)counter->limit * PAGE_SIZE;
+ return (u64)counter->max * PAGE_SIZE;
case RES_MAX_USAGE:
return (u64)counter->watermark * PAGE_SIZE;
case RES_FAILCNT:
@@ -2871,24 +2881,24 @@ static void memcg_free_kmem(struct mem_cgroup *memcg)
}
#endif /* !CONFIG_SLOB */
-static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
- unsigned long limit)
+static int memcg_update_kmem_max(struct mem_cgroup *memcg,
+ unsigned long max)
{
int ret;
- mutex_lock(&memcg_limit_mutex);
- ret = page_counter_limit(&memcg->kmem, limit);
- mutex_unlock(&memcg_limit_mutex);
+ mutex_lock(&memcg_max_mutex);
+ ret = page_counter_set_max(&memcg->kmem, max);
+ mutex_unlock(&memcg_max_mutex);
return ret;
}
-static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit)
+static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
{
int ret;
- mutex_lock(&memcg_limit_mutex);
+ mutex_lock(&memcg_max_mutex);
- ret = page_counter_limit(&memcg->tcpmem, limit);
+ ret = page_counter_set_max(&memcg->tcpmem, max);
if (ret)
goto out;
@@ -2913,7 +2923,7 @@ static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit)
memcg->tcpmem_active = true;
}
out:
- mutex_unlock(&memcg_limit_mutex);
+ mutex_unlock(&memcg_max_mutex);
return ret;
}
@@ -2941,16 +2951,16 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
}
switch (MEMFILE_TYPE(of_cft(of)->private)) {
case _MEM:
- ret = mem_cgroup_resize_limit(memcg, nr_pages, false);
+ ret = mem_cgroup_resize_max(memcg, nr_pages, false);
break;
case _MEMSWAP:
- ret = mem_cgroup_resize_limit(memcg, nr_pages, true);
+ ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
case _KMEM:
- ret = memcg_update_kmem_limit(memcg, nr_pages);
+ ret = memcg_update_kmem_max(memcg, nr_pages);
break;
case _TCP:
- ret = memcg_update_tcp_limit(memcg, nr_pages);