/*
* Memory Migration functionality - linux/mm/migration.c
*
* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
*
* Page migration was first developed in the context of the memory hotplug
* project. The main authors of the migration code are:
*
* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
* Hirokazu Takahashi <taka@valinux.co.jp>
* Dave Hansen <haveblue@us.ibm.com>
* Christoph Lameter
*/
#include <linux/migrate.h>
#include <linux/export.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
#include <linux/nsproxy.h>
#include <linux/pagevec.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/writeback.h>
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/memcontrol.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <asm/tlbflush.h>
#define CREATE_TRACE_POINTS
#include <trace/events/migrate.h>
#include "internal.h"
/*
* migrate_prep() needs to be called before we start compiling a list of pages
* to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
* undesirable, use migrate_prep_local()
*/
int migrate_prep(void)
{
/*
* Clear the LRU lists so pages can be isolated.
* Note that pages may be moved off the LRU after we have
* drained them. Those pages will fail to migrate like other
* pages that may be busy.
*/
lru_add_drain_all();
return 0;
}
/* Do the necessary work of migrate_prep but not if it involves other CPUs */
int migrate_prep_local(void)
{
lru_add_drain();
return 0;
}
/*
* Add isolated pages on the list back to the LRU under page lock
* to avoid leaking evictable pages back onto unevictable list.
*/
void putback_lru_pages(struct list_head *l)
{
struct page *page;
struct page *page2;
list_for_each_entry_safe(page, page2, l, lru) {
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
}
}
/*
* Restore a potential migration pte to a working pte entry
*/
static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
unsigned long addr, void *old)
{
struct mm_struct *mm = vma->vm_mm;
swp_entry_t entry;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
spinlock_t *ptl;
if (unlikely(PageHuge(new))) {
ptep = huge_pte_offset(mm, addr);
if (!ptep)
goto out;
ptl = &mm->page_table_lock;
} else {
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd))
goto out;
pud = pud_offset(pgd, addr);
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, addr);
if (pmd_trans_huge(*pmd))
goto out;
if (!pmd_present(*pmd))
goto out;
ptep = pte_offset_map(pmd, addr);
/*
* Peek to check is_swap_pte() before taking ptlock? No, we
* can race mremap's move_ptes(), which skips anon_vma lock.
*/
ptl = pte_lockptr(mm, pmd);
}
spin_lock(ptl);
pte = *ptep;
if (!is_swap_pte(pte))
goto unlock;
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry) ||
migration_entry_to_page(entry) != old)
goto unlock;
get_page(new);
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
if (is_write_migration_entry(entry))
pte = pte_mkwrite(pte);
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new))
pte = pte_mkhuge(pte);
#endif
flush_cache_page(vma, addr, pte_pfn(pte));
set_pte_at(mm, addr, ptep, pte);
if (PageHuge(new)) {
if (PageAnon(new))
hugepage_add_anon_rmap(new, vma, addr);
else
page_dup_rmap(new);
} else if (PageAnon(new))
page_add_anon_rmap(new, vma, addr);
else
page_add_file_rmap(new);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, ptep);
unlock:
pte_unmap_unlock(ptep, ptl);
out:
return SWAP_AGAIN;
}
/*
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
static void remove_migration_ptes(struct page *old, struct page *new)
{
rmap_walk(new, remove_migration_pte, old);
}
/*
* Something used the pte of a page under migration. We need to
* get to the page and wait until migration is finished.
* When we return from this function the fault will be retried.
*/
void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address)
{
pte_t *ptep, pte;
spinlock_t *ptl;
swp_entry_t entry;
struct page *page;
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
pte = *ptep;
if (!is_swap_pte(pte))
goto out;
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry))
goto out;
page = migration_entry_to_page(entry);
/*
* Once radix-tree replacement of page migration started, page_count
* *must* be zero. And, we don't want to call wait_on_page_locked()
* against a page without get_page().
* So, we use get_page_unless_zero(), here. Even failed, page fault
* will occur again.
*/
if (!get_page_unless_zero(page))
goto out;
pte_unmap_unlock(ptep, ptl);
wait_on_page_locked(page);
put_page(page);
return;
out:
pte_unmap_unlock(ptep, ptl);
}
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
enum migrate_mode mode)
{
struct buffer_head *bh = head;
/* Simple case, sync compaction */
if (mode != MIGRATE_ASYNC) {
do {
get_bh(bh);
lock_buffer(bh);
bh = bh->b_this_page;
} while (bh != head);
return true;
}
/* async case, we cannot block on lock_buffer so use trylock_buffer */
do {
get_bh(bh);
if (!trylock_buffer(bh)) {
/*
* We failed to lock the buffer and cannot stall in
* async migration. Release the taken locks
*/
struct buffer_head *failed_bh = bh;
put_bh(failed_bh);
bh = head;
while (bh != failed_bh) {
unlock_buffer(bh);
put_bh(bh);
bh = bh->b_this_page;
}
return false;
}
bh = bh->b_this_page;
} while (bh != head);
return true;
}
#else
static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
enum migrate_mode mode)
{
return true;
}
#endif /* CONFIG_BLOCK */
/*
* Replace the page in the mapping.
*
* The number of remaining references must be:
* 1 for anonymous pages without a mapping
* 2 for pages with a mapping
* 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
*/
static int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
struct buffer_head *head, enum migrate_mode mode)
{
int expected_count = 0;
void **pslot;
if (!mapping) {
/* Anonymous page without mapping */
if (page_count(page) != 1)
return -EAGAIN;
return 0;
}
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
expected_count = 2 + page_has_private(page);
if (page_count(page) != expected_count ||
radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
if (!page_freeze_refs(page, expected_count)) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
/*
* In the async migration case of moving a page with buffers, lock the
* buffers using trylock before the mapping is moved. If the mapping
* was moved, we later failed to lock the buffers and could not move
* the mapping back due to an elevated page count, we would have to
* block waiting on other references to be dropped.
*/
if (mode == MIGRATE_ASYNC && head &&
!buffer_migrate_lock_buffers(head, mode)) {
page_unfreeze_refs(page, expected_count);
spin_unlock_irq(&mapping->tree_lock);
retur
|