diff options
| author | Ingo Molnar <mingo@kernel.org> | 2014-05-07 13:15:46 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-05-07 13:15:46 +0200 |
| commit | 2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc (patch) | |
| tree | 9478e8cf470c1d5bdb2d89b57a7e35919ab95e72 /mm | |
| parent | 08f8aeb55d7727d644dbbbbfb798fe937d47751d (diff) | |
| parent | 2b4cfe64dee0d84506b951d81bf55d9891744d25 (diff) | |
| download | linux-2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc.tar.gz linux-2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc.tar.bz2 linux-2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc.zip | |
Merge branch 'sched/urgent' into sched/core, to avoid conflicts
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 4 | ||||
| -rw-r--r-- | mm/Makefile | 6 | ||||
| -rw-r--r-- | mm/backing-dev.c | 16 | ||||
| -rw-r--r-- | mm/compaction.c | 96 | ||||
| -rw-r--r-- | mm/early_ioremap.c | 245 | ||||
| -rw-r--r-- | mm/filemap.c | 921 | ||||
| -rw-r--r-- | mm/huge_memory.c | 113 | ||||
| -rw-r--r-- | mm/hugetlb.c | 309 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 11 | ||||
| -rw-r--r-- | mm/internal.h | 16 | ||||
| -rw-r--r-- | mm/iov_iter.c | 224 | ||||
| -rw-r--r-- | mm/kmemleak.c | 140 | ||||
| -rw-r--r-- | mm/list_lru.c | 16 | ||||
| -rw-r--r-- | mm/memblock.c | 33 | ||||
| -rw-r--r-- | mm/memcontrol.c | 543 | ||||
| -rw-r--r-- | mm/memory-failure.c | 8 | ||||
| -rw-r--r-- | mm/memory.c | 599 | ||||
| -rw-r--r-- | mm/mempolicy.c | 58 | ||||
| -rw-r--r-- | mm/mempool.c | 4 | ||||
| -rw-r--r-- | mm/mincore.c | 20 | ||||
| -rw-r--r-- | mm/mlock.c | 2 | ||||
| -rw-r--r-- | mm/mmap.c | 59 | ||||
| -rw-r--r-- | mm/mprotect.c | 56 | ||||
| -rw-r--r-- | mm/nobootmem.c | 2 | ||||
| -rw-r--r-- | mm/nommu.c | 51 | ||||
| -rw-r--r-- | mm/page-writeback.c | 4 | ||||
| -rw-r--r-- | mm/page_alloc.c | 126 | ||||
| -rw-r--r-- | mm/page_cgroup.c | 12 | ||||
| -rw-r--r-- | mm/process_vm_access.c | 252 | ||||
| -rw-r--r-- | mm/readahead.c | 31 | ||||
| -rw-r--r-- | mm/rmap.c | 14 | ||||
| -rw-r--r-- | mm/shmem.c | 210 | ||||
| -rw-r--r-- | mm/slab.c | 195 | ||||
| -rw-r--r-- | mm/slab.h | 21 | ||||
| -rw-r--r-- | mm/slab_common.c | 250 | ||||
| -rw-r--r-- | mm/slob.c | 10 | ||||
| -rw-r--r-- | mm/slub.c | 116 | ||||
| -rw-r--r-- | mm/sparse.c | 6 | ||||
| -rw-r--r-- | mm/swap.c | 53 | ||||
| -rw-r--r-- | mm/truncate.c | 148 | ||||
| -rw-r--r-- | mm/util.c | 53 | ||||
| -rw-r--r-- | mm/vmacache.c | 112 | ||||
| -rw-r--r-- | mm/vmalloc.c | 10 | ||||
| -rw-r--r-- | mm/vmscan.c | 152 | ||||
| -rw-r--r-- | mm/vmstat.c | 12 | ||||
| -rw-r--r-- | mm/workingset.c | 414 | ||||
| -rw-r--r-- | mm/zsmalloc.c | 17 | ||||
| -rw-r--r-- | mm/zswap.c | 86 |
48 files changed, 3660 insertions, 2196 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 2888024e0b0a..ebe5880c29d6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -216,6 +216,7 @@ config PAGEFLAGS_EXTENDED # config SPLIT_PTLOCK_CPUS int + default "999999" if !MMU default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 default "4" @@ -577,3 +578,6 @@ config PGTABLE_MAPPING You can check speed with zsmalloc benchmark: https://github.com/spartacus06/zsmapbench + +config GENERIC_EARLY_IOREMAP + bool diff --git a/mm/Makefile b/mm/Makefile index 310c90a09264..b484452dac57 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -16,8 +16,9 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o balloon_compaction.o \ - interval_tree.o list_lru.o $(mmu-y) + compaction.o balloon_compaction.o vmacache.o \ + interval_tree.o list_lru.o workingset.o \ + iov_iter.o $(mmu-y) obj-y += init-mm.o @@ -61,3 +62,4 @@ obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o +obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index ce682f7a4f29..09d9591b7708 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -288,13 +288,19 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) * Note, we wouldn't bother setting up the timer, but this function is on the * fast-path (used by '__mark_inode_dirty()'), so we save few context switches * by delaying the wake-up. + * + * We have to be careful not to postpone flush work if it is scheduled for + * earlier. Thus we use queue_delayed_work(). */ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) { unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); + spin_lock_bh(&bdi->wb_lock); + if (test_bit(BDI_registered, &bdi->state)) + queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); + spin_unlock_bh(&bdi->wb_lock); } /* @@ -307,9 +313,6 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) spin_unlock_bh(&bdi_lock); synchronize_rcu_expedited(); - - /* bdi_list is now unused, clear it to mark @bdi dying */ - INIT_LIST_HEAD(&bdi->bdi_list); } int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -360,6 +363,11 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) */ bdi_remove_from_list(bdi); + /* Make sure nobody queues further work */ + spin_lock_bh(&bdi->wb_lock); + clear_bit(BDI_registered, &bdi->state); + spin_unlock_bh(&bdi->wb_lock); + /* * Drain work list and shutdown the delayed_work. At this point, * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi diff --git a/mm/compaction.c b/mm/compaction.c index 918577595ea8..37f976287068 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -217,21 +217,12 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock, /* Returns true if the page is within a block suitable for migration to */ static bool suitable_migration_target(struct page *page) { - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_RESERVE) - return false; - - if (is_migrate_isolate(migratetype)) - return false; - - /* If the page is a large free page, then allow migration */ + /* If the page is a large free page, then disallow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; + return false; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) + if (migrate_async_suitable(get_pageblock_migratetype(page))) return true; /* Otherwise skip the block */ @@ -253,6 +244,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, struct page *cursor, *valid_page = NULL; unsigned long flags; bool locked = false; + bool checked_pageblock = false; cursor = pfn_to_page(blockpfn); @@ -284,8 +276,16 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, break; /* Recheck this is a suitable migration target under lock */ - if (!strict && !suitable_migration_target(page)) - break; + if (!strict && !checked_pageblock) { + /* + * We need to check suitability of pageblock only once + * and this isolate_freepages_block() is called with + * pageblock range, so just check once is sufficient. + */ + checked_pageblock = true; + if (!suitable_migration_target(page)) + break; + } /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) @@ -460,12 +460,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; - isolate_mode_t mode = 0; struct lruvec *lruvec; unsigned long flags; bool locked = false; struct page *page = NULL, *valid_page = NULL; bool skipped_async_unsuitable = false; + const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) | + (unevictable ? ISOLATE_UNEVICTABLE : 0); /* * Ensure that there are not too many pages isolated from the LRU @@ -487,7 +488,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, cond_resched(); for (; low_pfn < end_pfn; low_pfn++) { /* give a chance to irqs before checking need_resched() */ - if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) { + if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) { if (should_release_lock(&zone->lru_lock)) { spin_unlock_irqrestore(&zone->lru_lock, flags); locked = false; @@ -526,8 +527,25 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, /* If isolation recently failed, do not retry */ pageblock_nr = low_pfn >> pageblock_order; - if (!isolation_suitable(cc, page)) - goto next_pageblock; + if (last_pageblock_nr != pageblock_nr) { + int mt; + + last_pageblock_nr = pageblock_nr; + if (!isolation_suitable(cc, page)) + goto next_pageblock; + + /* + * For async migration, also only scan in MOVABLE + * blocks. Async migration is optimistic to see if + * the minimum amount of work satisfies the allocation + */ + mt = get_pageblock_migratetype(page); + if (!cc->sync && !migrate_async_suitable(mt)) { + cc->finished_update_migrate = true; + skipped_async_unsuitable = true; + goto next_pageblock; + } + } /* * Skip if free. page_order cannot be used without zone->lock @@ -537,18 +555,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; /* - * For async migration, also only scan in MOVABLE blocks. Async - * migration is optimistic to see if the minimum amount of work - * satisfies the allocation - */ - if (!cc->sync && last_pageblock_nr != pageblock_nr && - !migrate_async_suitable(get_pageblock_migratetype(page))) { - cc->finished_update_migrate = true; - skipped_async_unsuitable = true; - goto next_pageblock; - } - - /* * Check may be lockless but that's ok as we recheck later. * It's possible to migrate LRU pages and balloon pages * Skip any other type of page @@ -557,11 +563,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (unlikely(balloon_page_movable(page))) { if (locked && balloon_page_isolate(page)) { /* Successfully isolated */ - cc->finished_update_migrate = true; - list_add(&page->lru, migratelist); - cc->nr_migratepages++; - nr_isolated++; - goto check_compact_cluster; + goto isolate_success; } } continue; @@ -584,6 +586,15 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ + if (!page_mapping(page) && + page_count(page) > page_mapcount(page)) + continue; + /* Check if it is ok to still hold the lock */ locked = compact_checklock_irqsave(&zone->lru_lock, &flags, locked, cc); @@ -598,12 +609,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!cc->sync) - mode |= ISOLATE_ASYNC_MIGRATE; - - if (unevictable) - mode |= ISOLATE_UNEVICTABLE; - lruvec = mem_cgroup_page_lruvec(page, zone); /* Try isolate the page */ @@ -613,13 +618,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, VM_BUG_ON_PAGE(PageTransCompound(page), page); /* Successfully isolated */ - cc->finished_update_migrate = true; del_page_from_lru_list(page, lruvec, page_lru(page)); + +isolate_success: + cc->finished_update_migrate = true; list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; -check_compact_cluster: /* Avoid isolating too much */ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { ++low_pfn; @@ -630,7 +636,6 @@ check_compact_cluster: next_pageblock: low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1; - last_pageblock_nr = pageblock_nr; } acct_isolated(zone, locked, cc); @@ -1186,6 +1191,7 @@ static void compact_node(int nid) struct compact_control cc = { .order = -1, .sync = true, + .ignore_skip_hint = true, }; __compact_pgdat(NODE_DATA(nid), &cc); @@ -1225,7 +1231,7 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write, } #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) -ssize_t sysfs_compact_node(struct device *dev, +static ssize_t sysfs_compact_node(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c new file mode 100644 index 000000000000..e10ccd299d66 --- /dev/null +++ b/mm/early_ioremap.c @@ -0,0 +1,245 @@ +/* + * Provide common bits of early_ioremap() support for architectures needing + * temporary mappings during boot before ioremap() is available. + * + * This is mostly a direct copy of the x86 early_ioremap implementation. + * + * (C) Copyright 1995 1996, 2014 Linus Torvalds + * + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <asm/fixmap.h> + +#ifdef CONFIG_MMU +static int early_ioremap_debug __initdata; + +static int __init early_ioremap_debug_setup(char *str) +{ + early_ioremap_debug = 1; + + return 0; +} +early_param("early_ioremap_debug", early_ioremap_debug_setup); + +static int after_paging_init __initdata; + +void __init __weak early_ioremap_shutdown(void) +{ +} + +void __init early_ioremap_reset(void) +{ + early_ioremap_shutdown(); + after_paging_init = 1; +} + +/* + * Generally, ioremap() is available after paging_init() has been called. + * Architectures wanting to allow early_ioremap after paging_init() can + * define __late_set_fixmap and __late_clear_fixmap to do the right thing. + */ +#ifndef __late_set_fixmap +static inline void __init __late_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t prot) +{ + BUG(); +} +#endif + +#ifndef __late_clear_fixmap +static inline void __init __late_clear_fixmap(enum fixed_addresses idx) +{ + BUG(); +} +#endif + +static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + +void __init early_ioremap_setup(void) +{ + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (WARN_ON(prev_map[i])) + break; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); +} + +static int __init check_early_ioremap_leak(void) +{ + int count = 0; + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (prev_map[i]) + count++; + + if (WARN(count, KERN_WARNING + "Debug warning: early ioremap leak of %d areas detected.\n" + "please boot with early_ioremap_debug and report the dmesg.\n", + count)) + return 1; + return 0; +} +late_initcall(check_early_ioremap_leak); + +static void __init __iomem * +__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) +{ + unsigned long offset; + resource_size_t last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + WARN_ON(system_state != SYSTEM_BOOTING); + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (!prev_map[i]) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n", + __func__, (u64)phys_addr, size)) + return NULL; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (WARN_ON(!size || last_addr < phys_addr)) + return NULL; + + prev_size[slot] = size; + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (WARN_ON(nrpages > NR_FIX_BTMAPS)) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_set_fixmap(idx, phys_addr, prot); + else + __early_set_fixmap(idx, phys_addr, prot); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; + } + WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n", + __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]); + + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + return prev_map[slot]; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", + addr, size)) + return; + + if (WARN(prev_size[slot] != size, + "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot])) + return; + + WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", + addr, size, slot); + + virt_addr = (unsigned long)addr; + if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) + return; + + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_clear_fixmap(idx); + else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + --idx; + --nrpages; + } + prev_map[slot] = NULL; +} + +/* Remap an IO device */ +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO); +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void *)__early_ioremap(phys_addr, size, + FIXMAP_PAGE_NORMAL); +} +#else /* CONFIG_MMU */ + +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void __iomem *)phys_addr; +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (void *)phys_addr; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ +} + +#endif /* CONFIG_MMU */ + + +void __init early_memunmap(void *addr, unsigned long size) +{ + early_iounmap((__force void __iomem *)addr, size); +} diff --git a/mm/filemap.c b/mm/filemap.c index 7a13f6ac5421..5020b280a771 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,6 +33,7 @@ #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> #include <linux/cleancache.h> +#include <linux/rmap.h> #include "internal.h" #define CREATE_TRACE_POINTS @@ -76,7 +77,7 @@ * ->mmap_sem * ->lock_page (access_process_vm) * - * ->i_mutex (generic_file_buffered_write) + * ->i_mutex (generic_perform_write) * ->mmap_sem (fault_in_pages_readable->do_page_fault) * * bdi->wb.list_lock @@ -107,12 +108,75 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static void page_cache_tree_delete(struct address_space *mapping, + struct page *page, void *shadow) +{ + struct radix_tree_node *node; + unsigned long index; + unsigned int offset; + unsigned int tag; + void **slot; + + VM_BUG_ON(!PageLocked(page)); + + __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + + if (shadow) { + mapping->nrshadows++; + /* + * Make sure the nrshadows update is committed before + * the nrpages update so that final truncate racing + * with reclaim does not see both counters 0 at the + * same time and miss a shadow entry. + */ + smp_wmb(); + } + mapping->nrpages--; + + if (!node) { + /* Clear direct pointer tags in root node */ + mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; + radix_tree_replace_slot(slot, shadow); + return; + } + + /* Clear tree tags for the removed page */ + index = page->index; + offset = index & RADIX_TREE_MAP_MASK; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { + if (test_bit(offset, node->tags[tag])) + radix_tree_tag_clear(&mapping->page_tree, index, tag); + } + + /* Delete page, swap shadow entry */ + radix_tree_replace_slot(slot, shadow); + workingset_node_pages_dec(node); + if (shadow) + workingset_node_shadows_inc(node); + else + if (__radix_tree_delete_node(&mapping->page_tree, node)) + return; + + /* + * Track node that only contains shadow entries. + * + * Avoid acquiring the list_lru lock if already tracked. The + * list_empty() test is safe as node->private_list is + * protected by mapping->tree_lock. + */ + if (!workingset_node_pages(node) && + list_empty(&node->private_list)) { + node->private_data = mapping; + list_lru_add(&workingset_shadow_nodes, &node->private_list); + } +} + /* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the mapping's tree_lock. */ -void __delete_from_page_cache(struct page *page) +void __delete_from_page_cache(struct page *page, void *shadow) { struct address_space *mapping = page->mapping; @@ -127,10 +191,11 @@ void __delete_from_page_cache(struct page *page) else cleancache_invalidate_page(mapping, page); - radix_tree_delete(&mapping->page_tree, page->index); + page_cache_tree_delete(mapping, page, shadow); + page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ - mapping->nrpages--; + __dec_zone_page_state(page, NR_FILE_PAGES); if (PageSwapBacked(page)) __dec_zone_page_state(page, NR_SHMEM); @@ -166,7 +231,7 @@ void delete_from_page_cache(struct page *page) freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); - __delete_from_page_cache(page); + __delete_from_page_cache(page, NULL); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); @@ -426,7 +491,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) new->index = offset; spin_lock_irq(&mapping->tree_lock); - __delete_from_page_cache(old); + __delete_from_page_cache(old, NULL); error = radix_tree_insert(&mapping->page_tree, offset, new); BUG_ON(error); mapping->nrpages++; @@ -446,25 +511,59 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -/** - * add_to_page_cache_locked - add a locked page to the pagecache - * @page: page to add - * @mapping: the page's address_space - * @offset: page index - * @gfp_mask: page allocation mode - * - * This function is used to add a page to the pagecache. It must be locked. - * This function does not add the page to the LRU. The caller must do that. - */ -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page, void **shadowp) +{ + struct radix_tree_node *node; + void **slot; + int error; + + error = __radix_tree_create(&mapping->page_tree, page->index, + &node, &slot); + if (error) + return error; + if (*slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + if (shadowp) + *shadowp = p; + mapping->nrshadows--; + if (node) + workingset_node_shadows_dec(node); + } + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + if (node) { + workingset_node_pages_inc(node); + /* + * Don't track node that contains actual pages. + * + * Avoid acquiring the list_lru lock if already + * untracked. The list_empty() test is safe as + * node->private_list is protected by + * mapping->tree_lock. + */ + if (!list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + } + return 0; +} |
