summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig23
-rw-r--r--mm/Makefile3
-rw-r--r--mm/balloon_compaction.c302
-rw-r--r--mm/bootmem.c79
-rw-r--r--mm/compaction.c145
-rw-r--r--mm/dmapool.c55
-rw-r--r--mm/highmem.c29
-rw-r--r--mm/huge_memory.c533
-rw-r--r--mm/hugetlb.c42
-rw-r--r--mm/hugetlb_cgroup.c23
-rw-r--r--mm/internal.h5
-rw-r--r--mm/ksm.c21
-rw-r--r--mm/memcontrol.c222
-rw-r--r--mm/memory-failure.c36
-rw-r--r--mm/memory.c34
-rw-r--r--mm/memory_hotplug.c421
-rw-r--r--mm/mempolicy.c57
-rw-r--r--mm/migrate.c101
-rw-r--r--mm/mmap.c559
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nobootmem.c22
-rw-r--r--mm/nommu.c15
-rw-r--r--mm/oom_kill.c138
-rw-r--r--mm/page-writeback.c11
-rw-r--r--mm/page_alloc.c304
-rw-r--r--mm/page_cgroup.c5
-rw-r--r--mm/page_isolation.c27
-rw-r--r--mm/pagewalk.c2
-rw-r--r--mm/percpu.c5
-rw-r--r--mm/rmap.c68
-rw-r--r--mm/shmem.c118
-rw-r--r--mm/slub.c4
-rw-r--r--mm/sparse.c35
-rw-r--r--mm/swapfile.c31
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c111
-rw-r--r--mm/vmstat.c12
39 files changed, 2534 insertions, 1074 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index a3f8dddaaab3..71259e052ce8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -143,6 +143,14 @@ config NO_BOOTMEM
config MEMORY_ISOLATION
boolean
+config MOVABLE_NODE
+ boolean "Enable to assign a node which has only movable memory"
+ depends on HAVE_MEMBLOCK
+ depends on NO_BOOTMEM
+ depends on X86_64
+ depends on NUMA
+ depends on BROKEN
+
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
@@ -188,6 +196,21 @@ config SPLIT_PTLOCK_CPUS
default "4"
#
+# support for memory balloon compaction
+config BALLOON_COMPACTION
+ bool "Allow for balloon memory compaction/migration"
+ def_bool y
+ depends on COMPACTION && VIRTIO_BALLOON
+ help
+ Memory fragmentation introduced by ballooning might reduce
+ significantly the number of 2MB contiguous memory blocks that can be
+ used within a guest, thus imposing performance penalties associated
+ with the reduced number of transparent huge pages that could be used
+ by the guest workload. Allowing the compaction & migration for memory
+ pages enlisted as being part of memory balloon devices avoids the
+ scenario aforementioned and helps improving memory defragmentation.
+
+#
# support for memory compaction
config COMPACTION
bool "Allow for memory compaction"
diff --git a/mm/Makefile b/mm/Makefile
index 6b025f80af34..3a4628751f89 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
- compaction.o interval_tree.o $(mmu-y)
+ compaction.o balloon_compaction.o \
+ interval_tree.o $(mmu-y)
obj-y += init-mm.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
new file mode 100644
index 000000000000..07dbc8ec46cf
--- /dev/null
+++ b/mm/balloon_compaction.c
@@ -0,0 +1,302 @@
+/*
+ * mm/balloon_compaction.c
+ *
+ * Common interface for making balloon pages movable by compaction.
+ *
+ * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
+ */
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/balloon_compaction.h>
+
+/*
+ * balloon_devinfo_alloc - allocates a balloon device information descriptor.
+ * @balloon_dev_descriptor: pointer to reference the balloon device which
+ * this struct balloon_dev_info will be servicing.
+ *
+ * Driver must call it to properly allocate and initialize an instance of
+ * struct balloon_dev_info which will be used to reference a balloon device
+ * as well as to keep track of the balloon device page list.
+ */
+struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor)
+{
+ struct balloon_dev_info *b_dev_info;
+ b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL);
+ if (!b_dev_info)
+ return ERR_PTR(-ENOMEM);
+
+ b_dev_info->balloon_device = balloon_dev_descriptor;
+ b_dev_info->mapping = NULL;
+ b_dev_info->isolated_pages = 0;
+ spin_lock_init(&b_dev_info->pages_lock);
+ INIT_LIST_HEAD(&b_dev_info->pages);
+
+ return b_dev_info;
+}
+EXPORT_SYMBOL_GPL(balloon_devinfo_alloc);
+
+/*
+ * balloon_page_enqueue - allocates a new page and inserts it into the balloon
+ * page list.
+ * @b_dev_info: balloon device decriptor where we will insert a new page to
+ *
+ * Driver must call it to properly allocate a new enlisted balloon page
+ * before definetively removing it from the guest system.
+ * This function returns the page address for the recently enqueued page or
+ * NULL in the case we fail to allocate a new page this turn.
+ */
+struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
+{
+ unsigned long flags;
+ struct page *page = alloc_page(balloon_mapping_gfp_mask() |
+ __GFP_NOMEMALLOC | __GFP_NORETRY);
+ if (!page)
+ return NULL;
+
+ /*
+ * Block others from accessing the 'page' when we get around to
+ * establishing additional references. We should be the only one
+ * holding a reference to the 'page' at this point.
+ */
+ BUG_ON(!trylock_page(page));
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages);
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+ unlock_page(page);
+ return page;
+}
+EXPORT_SYMBOL_GPL(balloon_page_enqueue);
+
+/*
+ * balloon_page_dequeue - removes a page from balloon's page list and returns
+ * the its address to allow the driver release the page.
+ * @b_dev_info: balloon device decriptor where we will grab a page from.
+ *
+ * Driver must call it to properly de-allocate a previous enlisted balloon page
+ * before definetively releasing it back to the guest system.
+ * This function returns the page address for the recently dequeued page or
+ * NULL in the case we find balloon's page list temporarily empty due to
+ * compaction isolated pages.
+ */
+struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
+{
+ struct page *page, *tmp;
+ unsigned long flags;
+ bool dequeued_page;
+
+ dequeued_page = false;
+ list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
+ /*
+ * Block others from accessing the 'page' while we get around
+ * establishing additional references and preparing the 'page'
+ * to be released by the balloon driver.
+ */
+ if (trylock_page(page)) {
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ /*
+ * Raise the page refcount here to prevent any wrong
+ * attempt to isolate this page, in case of coliding
+ * with balloon_page_isolate() just after we release
+ * the page lock.
+ *
+ * balloon_page_free() will take care of dropping
+ * this extra refcount later.
+ */
+ get_page(page);
+ balloon_page_delete(page);
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+ unlock_page(page);
+ dequeued_page = true;
+ break;
+ }
+ }
+
+ if (!dequeued_page) {
+ /*
+ * If we are unable to dequeue a balloon page because the page
+ * list is empty and there is no isolated pages, then something
+ * went out of track and some balloon pages are lost.
+ * BUG() here, otherwise the balloon driver may get stuck into
+ * an infinite loop while attempting to release all its pages.
+ */
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ if (unlikely(list_empty(&b_dev_info->pages) &&
+ !b_dev_info->isolated_pages))
+ BUG();
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+ page = NULL;
+ }
+ return page;
+}
+EXPORT_SYMBOL_GPL(balloon_page_dequeue);
+
+#ifdef CONFIG_BALLOON_COMPACTION
+/*
+ * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages.
+ * @b_dev_info: holds the balloon device information descriptor.
+ * @a_ops: balloon_mapping address_space_operations descriptor.
+ *
+ * Driver must call it to properly allocate and initialize an instance of
+ * struct address_space which will be used as the special page->mapping for
+ * balloon device enlisted page instances.
+ */
+struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
+ const struct address_space_operations *a_ops)
+{
+ struct address_space *mapping;
+
+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Give a clean 'zeroed' status to all elements of this special
+ * balloon page->mapping struct address_space instance.
+ */
+ address_space_init_once(mapping);
+
+ /*
+ * Set mapping->flags appropriately, to allow balloon pages
+ * ->mapping identification.
+ */
+ mapping_set_balloon(mapping);
+ mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask());
+
+ /* balloon's page->mapping->a_ops callback descriptor */
+ mapping->a_ops = a_ops;
+
+ /*
+ * Establish a pointer reference back to the balloon device descriptor
+ * this particular page->mapping will be servicing.
+ * This is used by compaction / migration procedures to identify and
+ * access the balloon device pageset while isolating / migrating pages.
+ *
+ * As some balloon drivers can register multiple balloon devices
+ * for a single guest, this also helps compaction / migration to
+ * properly deal with multiple balloon pagesets, when required.
+ */
+ mapping->private_data = b_dev_info;
+ b_dev_info->mapping = mapping;
+
+ return mapping;
+}
+EXPORT_SYMBOL_GPL(balloon_mapping_alloc);
+
+static inline void __isolate_balloon_page(struct page *page)
+{
+ struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+ unsigned long flags;
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ list_del(&page->lru);
+ b_dev_info->isolated_pages++;
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+}
+
+static inline void __putback_balloon_page(struct page *page)
+{
+ struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+ unsigned long flags;
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ list_add(&page->lru, &b_dev_info->pages);
+ b_dev_info->isolated_pages--;
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+}
+
+static inline int __migrate_balloon_page(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ return page->mapping->a_ops->migratepage(mapping, newpage, page, mode);
+}
+
+/* __isolate_lru_page() counterpart for a ballooned page */
+bool balloon_page_isolate(struct page *page)
+{
+ /*
+ * Avoid burning cycles with pages that are yet under __free_pages(),
+ * or just got freed under us.
+ *
+ * In case we 'win' a race for a balloon page being freed under us and
+ * raise its refcount preventing __free_pages() from doing its job
+ * the put_page() at the end of this block will take care of
+ * release this page, thus avoiding a nasty leakage.
+ */
+ if (likely(get_page_unless_zero(page))) {
+ /*
+ * As balloon pages are not isolated from LRU lists, concurrent
+ * compaction threads can race against page migration functions
+ * as well as race against the balloon driver releasing a page.
+ *
+ * In order to avoid having an already isolated balloon page
+ * being (wrongly) re-isolated while it is under migration,
+ * or to avoid attempting to isolate pages being released by
+ * the balloon driver, lets be sure we have the page lock
+ * before proceeding with the balloon page isolation steps.
+ */
+ if (likely(trylock_page(page))) {
+ /*
+ * A ballooned page, by default, has just one refcount.
+ * Prevent concurrent compaction threads from isolating
+ * an already isolated balloon page by refcount check.
+ */
+ if (__is_movable_balloon_page(page) &&
+ page_count(page) == 2) {
+ __isolate_balloon_page(page);
+ unlock_page(page);
+ return true;
+ }
+ unlock_page(page);
+ }
+ put_page(page);
+ }
+ return false;
+}
+
+/* putback_lru_page() counterpart for a ballooned page */
+void balloon_page_putback(struct page *page)
+{
+ /*
+ * 'lock_page()' stabilizes the page and prevents races against
+ * concurrent isolation threads attempting to re-isolate it.
+ */
+ lock_page(page);
+
+ if (__is_movable_balloon_page(page)) {
+ __putback_balloon_page(page);
+ /* drop the extra ref count taken for page isolation */
+ put_page(page);
+ } else {
+ WARN_ON(1);
+ dump_page(page);
+ }
+ unlock_page(page);
+}
+
+/* move_to_new_page() counterpart for a ballooned page */
+int balloon_page_migrate(struct page *newpage,
+ struct page *page, enum migrate_mode mode)
+{
+ struct address_space *mapping;
+ int rc = -EAGAIN;
+
+ /*
+ * Block others from accessing the 'newpage' when we get around to
+ * establishing additional references. We should be the only one
+ * holding a reference to the 'newpage' at this point.
+ */
+ BUG_ON(!trylock_page(newpage));
+
+ if (WARN_ON(!__is_movable_balloon_page(page))) {
+ dump_page(page);
+ unlock_page(newpage);
+ return rc;
+ }
+
+ mapping = page->mapping;
+ if (mapping)
+ rc = __migrate_balloon_page(mapping, newpage, page, mode);
+
+ unlock_page(newpage);
+ return rc;
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index f468185b3b28..1324cd74faec 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -147,21 +147,21 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
/*
* free_bootmem_late - free bootmem pages directly to page allocator
- * @addr: starting address of the range
+ * @addr: starting physical address of the range
* @size: size of the range in bytes
*
* This is only useful when the bootmem allocator has already been torn
* down, but we are still initializing the system. Pages are given directly
* to the page allocator, no bootmem metadata is updated because it is gone.
*/
-void __init free_bootmem_late(unsigned long addr, unsigned long size)
+void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
{
unsigned long cursor, end;
- kmemleak_free_part(__va(addr), size);
+ kmemleak_free_part(__va(physaddr), size);
- cursor = PFN_UP(addr);
- end = PFN_DOWN(addr + size);
+ cursor = PFN_UP(physaddr);
+ end = PFN_DOWN(physaddr + size);
for (; cursor < end; cursor++) {
__free_pages_bootmem(pfn_to_page(cursor), 0);
@@ -229,6 +229,22 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
return count;
}
+static void reset_node_lowmem_managed_pages(pg_data_t *pgdat)
+{
+ struct zone *z;
+
+ /*
+ * In free_area_init_core(), highmem zone's managed_pages is set to
+ * present_pages, and bootmem allocator doesn't allocate from highmem
+ * zones. So there's no need to recalculate managed_pages because all
+ * highmem pages will be managed by the buddy system. Here highmem
+ * zone also includes highmem movable zone.
+ */
+ for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+ if (!is_highmem(z))
+ z->managed_pages = 0;
+}
+
/**
* free_all_bootmem_node - release a node's free pages to the buddy allocator
* @pgdat: node to be released
@@ -238,6 +254,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
register_page_bootmem_info_node(pgdat);
+ reset_node_lowmem_managed_pages(pgdat);
return free_all_bootmem_core(pgdat->bdata);
}
@@ -250,6 +267,10 @@ unsigned long __init free_all_bootmem(void)
{
unsigned long total_pages = 0;
bootmem_data_t *bdata;
+ struct pglist_data *pgdat;
+
+ for_each_online_pgdat(pgdat)
+ reset_node_lowmem_managed_pages(pgdat);
list_for_each_entry(bdata, &bdata_list, list)
total_pages += free_all_bootmem_core(bdata);
@@ -377,21 +398,21 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
/**
* free_bootmem - mark a page range as usable
- * @addr: starting address of the range
+ * @addr: starting physical address of the range
* @size: size of the range in bytes
*
* Partial pages will be considered reserved and left as they are.
*
* The range must be contiguous but may span node boundaries.
*/
-void __init free_bootmem(unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long physaddr, unsigned long size)
{
unsigned long start, end;
- kmemleak_free_part(__va(addr), size);
+ kmemleak_free_part(__va(physaddr), size);
- start = PFN_UP(addr);
- end = PFN_DOWN(addr + size);
+ start = PFN_UP(physaddr);
+ end = PFN_DOWN(physaddr + size);
mark_bootmem(start, end, 0, 0);
}
@@ -439,12 +460,6 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
return mark_bootmem(start, end, 1, flags);
}
-int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
- return reserve_bootmem(phys, len, flags);
-}
-
static unsigned long __init align_idx(struct bootmem_data *bdata,
unsigned long idx, unsigned long step)
{
@@ -575,27 +590,6 @@ find_block:
return NULL;
}
-static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
- unsigned long size, unsigned long align,
- unsigned long goal, unsigned long limit)
-{
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc(size, GFP_NOWAIT);
-
-#ifdef CONFIG_HAVE_ARCH_BOOTMEM
- {
- bootmem_data_t *p_bdata;
-
- p_bdata = bootmem_arch_preferred_node(bdata, size, align,
- goal, limit);
- if (p_bdata)
- return alloc_bootmem_bdata(p_bdata, size, align,
- goal, limit);
- }
-#endif
- return NULL;
-}
-
static void * __init alloc_bootmem_core(unsigned long size,
unsigned long align,
unsigned long goal,
@@ -604,9 +598,8 @@ static void * __init alloc_bootmem_core(unsigned long size,
bootmem_data_t *bdata;
void *region;
- region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
- if (region)
- return region;
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc(size, GFP_NOWAIT);
list_for_each_entry(bdata, &bdata_list, list) {
if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
@@ -704,11 +697,9 @@ void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
{
void *ptr;
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc(size, GFP_NOWAIT);
again:
- ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size,
- align, goal, limit);
- if (ptr)
- return ptr;
/* do not panic in alloc_bootmem_bdata() */
if (limit && goal + size > limit)
diff --git a/mm/compaction.c b/mm/compaction.c
index aee7443a4d5a..5ad7f4f4d6f7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -14,6 +14,7 @@
#include <linux/backing-dev.h>
#include <linux/sysctl.h>
#include <linux/sysfs.h>
+#include <linux/balloon_compaction.h>
#include "internal.h"
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
@@ -214,60 +215,6 @@ static bool suitable_migration_target(struct page *page)
return false;
}
-static void compact_capture_page(struct compact_control *cc)
-{
- unsigned long flags;
- int mtype, mtype_low, mtype_high;
-
- if (!cc->page || *cc->page)
- return;
-
- /*
- * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
- * regardless of the migratetype of the freelist is is captured from.
- * This is fine because the order for a high-order MIGRATE_MOVABLE
- * allocation is typically at least a pageblock size and overall
- * fragmentation is not impaired. Other allocation types must
- * capture pages from their own migratelist because otherwise they
- * could pollute other pageblocks like MIGRATE_MOVABLE with
- * difficult to move pages and making fragmentation worse overall.
- */
- if (cc->migratetype == MIGRATE_MOVABLE) {
- mtype_low = 0;
- mtype_high = MIGRATE_PCPTYPES;
- } else {
- mtype_low = cc->migratetype;
- mtype_high = cc->migratetype + 1;
- }
-
- /* Speculatively examine the free lists without zone lock */
- for (mtype = mtype_low; mtype < mtype_high; mtype++) {
- int order;
- for (order = cc->order; order < MAX_ORDER; order++) {
- struct page *page;
- struct free_area *area;
- area = &(cc->zone->free_area[order]);
- if (list_empty(&area->free_list[mtype]))
- continue;
-
- /* Take the lock and attempt capture of the page */
- if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
- return;
- if (!list_empty(&area->free_list[mtype])) {
- page = list_entry(area->free_list[mtype].next,
- struct page, lru);
- if (capture_free_page(page, cc->order, mtype)) {
- spin_unlock_irqrestore(&cc->zone->lock,
- flags);
- *cc->page = page;
- return;
- }
- }
- spin_unlock_irqrestore(&cc->zone->lock, flags);
- }
- }
-}
-
/*
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -569,9 +516,24 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
goto next_pageblock;
}
- /* Check may be lockless but that's ok as we recheck later */
- if (!PageLRU(page))
+ /*
+ * Check may be lockless but that's ok as we recheck later.
+ * It's possible to migrate LRU pages and balloon pages
+ * Skip any other type of page
+ */
+ if (!PageLRU(page)) {
+ if (unlikely(balloon_page_movable(page))) {
+ if (locked && balloon_page_isolate(page)) {
+ /* Successfully isolated */
+ cc->finished_update_migrate = true;
+ list_add(&page->lru, migratelist);
+ cc->nr_migratepages++;
+ nr_isolated++;
+ goto check_compact_cluster;
+ }
+ }
continue;
+ }
/*
* PageLRU is set. lru_lock normally excludes isolation
@@ -625,6 +587,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
cc->nr_migratepages++;
nr_isolated++;
+check_compact_cluster:
/* Avoid isolating too much */
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
++low_pfn;
@@ -721,7 +684,15 @@ static void isolate_freepages(struct zone *zone,
/* Found a block suitable for isolating free pages from */
isolated = 0;
- end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
+
+ /*
+ * As pfn may not start aligned, pfn+pageblock_nr_page
+ * may cross a MAX_ORDER_NR_PAGES boundary and miss
+ * a pfn_valid check. Ensure isolate_freepages_block()
+ * only scans within a pageblock
+ */
+ end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ end_pfn = min(end_pfn, zone_end_pfn);
isolated = isolate_freepages_block(cc, pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
@@ -936,6 +907,60 @@ unsigned long compaction_suitable(struct zone *zone, int order)
return COMPACT_CONTINUE;
}
+static void compact_capture_page(struct compact_control *cc)
+{
+ unsigned long flags;
+ int mtype, mtype_low, mtype_high;
+
+ if (!cc->page || *cc->page)
+ return;
+
+ /*
+ * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
+ * regardless of the migratetype of the freelist is is captured from.
+ * This is fine because the order for a high-order MIGRATE_MOVABLE
+ * allocation is typically at least a pageblock size and overall
+ * fragmentation is not impaired. Other allocation types must
+ * capture pages from their own migratelist because otherwise they
+ * could pollute other pageblocks like MIGRATE_MOVABLE with
+ * difficult to move pages and making fragmentation worse overall.
+ */
+ if (cc->migratetype == MIGRATE_MOVABLE) {
+ mtype_low = 0;
+ mtype_high = MIGRATE_PCPTYPES;
+ } else {
+ mtype_low = cc->migratetype;
+ mtype_high = cc->migratetype + 1;
+ }
+
+ /* Speculatively examine the free lists without zone lock */
+ for (mtype = mtype_low; mtype < mtype_high; mtype++) {
+ int order;
+ for (order = cc->order; order < MAX_ORDER; order++) {
+ struct page *page;
+ struct free_area *area;
+ area = &(cc->zone->free_area[order]);
+ if (list_empty(&area->free_list[mtype]))
+ continue;
+
+ /* Take the lock and attempt capture of the page */
+ if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
+ return;
+ if (!list_empty(&area->free_list[mtype])) {
+ page = list_entry(area->free_list[mtype].next,
+ struct page, lru);
+ if (capture_free_page(page, cc->order, mtype)) {
+ spin_unlock_irqrestore(&cc->zone->lock,
+ flags);
+ *cc->page = page;
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&cc->zone->lock, flags);
+ }
+ }
+}
+
static int compact_zone(struct zone *zone, struct compact_control *cc)
{
int ret;
@@ -986,7 +1011,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
switch (isolate_migratepages(zone, cc)) {
case ISOLATE_ABORT:
ret = COMPACT_PARTIAL;
- putback_lru_pages(&cc->migratepages);
+ putback_movable_pages(&cc->migratepages);
cc->nr_migratepages = 0;
goto out;
case ISOLATE_NONE:
@@ -1006,9 +1031,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
nr_remaining);
- /* Release LRU pages not migrated */
+ /* Release isolated pages not migrated */
if (err) {
- putback_lru_pages(&cc->migratepages);
+ putback_movable_pages(&cc->migratepages);
cc->nr_migratepages = 0;
if (err == -ENOMEM) {
ret = COMPACT_PARTIAL;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index c5ab33bca0a8..c69781e97cf9 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -50,7 +50,6 @@ struct dma_pool { /* the pool */
size_t allocation;
size_t boundary;
char name[32];
- wait_queue_head_t waitq;
struct list_head pools;
};
@@ -62,8 +61,6 @@ struct dma_page { /* cacheable header for 'allocation' bytes