diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
| commit | 65090f30ab791810a3dc840317e57df05018559c (patch) | |
| tree | f417526656da37109777e89613e140ffc59228bc /include/linux | |
| parent | 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff) | |
| parent | 0ed950d1f28142ccd9a9453c60df87853530d778 (diff) | |
| download | linux-65090f30ab791810a3dc840317e57df05018559c.tar.gz linux-65090f30ab791810a3dc840317e57df05018559c.tar.bz2 linux-65090f30ab791810a3dc840317e57df05018559c.zip | |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"191 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab,
slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap,
mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization,
pagealloc, and memory-failure)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits)
mm,hwpoison: make get_hwpoison_page() call get_any_page()
mm,hwpoison: send SIGBUS with error virutal address
mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes
mm/page_alloc: allow high-order pages to be stored on the per-cpu lists
mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM
mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA
docs: remove description of DISCONTIGMEM
arch, mm: remove stale mentions of DISCONIGMEM
mm: remove CONFIG_DISCONTIGMEM
m68k: remove support for DISCONTIGMEM
arc: remove support for DISCONTIGMEM
arc: update comment about HIGHMEM implementation
alpha: remove DISCONTIGMEM and NUMA
mm/page_alloc: move free_the_page
mm/page_alloc: fix counting of managed_pages
mm/page_alloc: improve memmap_pages dbg msg
mm: drop SECTION_SHIFT in code comments
mm/page_alloc: introduce vm.percpu_pagelist_high_fraction
mm/page_alloc: limit the number of pages on PCP lists when reclaim is active
mm/page_alloc: scale the number of pages that are batch freed
...
Diffstat (limited to 'include/linux')
29 files changed, 320 insertions, 164 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index fff9367a6348..1d7edad9914f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -154,6 +154,8 @@ struct bdi_writeback { struct cgroup_subsys_state *blkcg_css; /* and blkcg */ struct list_head memcg_node; /* anchored at memcg->cgwb_list */ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ + struct list_head b_attached; /* attached inodes, protected by list_lock */ + struct list_head offline_node; /* anchored at offline_cgwbs */ union { struct work_struct release_work; @@ -239,8 +241,9 @@ static inline void wb_get(struct bdi_writeback *wb) /** * wb_put - decrement a wb's refcount * @wb: bdi_writeback to put + * @nr: number of references to put */ -static inline void wb_put(struct bdi_writeback *wb) +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) { if (WARN_ON_ONCE(!wb->bdi)) { /* @@ -251,7 +254,16 @@ static inline void wb_put(struct bdi_writeback *wb) } if (wb != &wb->bdi->wb) - percpu_ref_put(&wb->refcnt); + percpu_ref_put_many(&wb->refcnt, nr); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + wb_put_many(wb, 1); } /** @@ -280,6 +292,10 @@ static inline void wb_put(struct bdi_writeback *wb) { } +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) +{ +} + static inline bool wb_dying(struct bdi_writeback *wb) { return false; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4a62b3980642..47e13582d9fc 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -54,7 +54,7 @@ enum cpuhp_state { CPUHP_MM_MEMCQ_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, - CPUHP_PAGE_ALLOC_DEAD, + CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, diff --git a/include/linux/fs.h b/include/linux/fs.h index c3c88fdb9b2a..fad6663cd1b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3417,18 +3417,14 @@ extern int simple_rename(struct user_namespace *, struct inode *, extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); -extern int noop_set_page_dirty(struct page *page); extern void noop_invalidatepage(struct page *page, unsigned int offset, unsigned int length); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); -extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); -extern int simple_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); +extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); extern int simple_nosetlease(struct file *, long, struct file_lock **, void **); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e6102dfa4faa..55b2ec1f965a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -506,8 +506,8 @@ static inline int gfp_zonelist(gfp_t flags) * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * - * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets - * optimized to &contig_page_data at compile-time. + * For the case of non-NUMA systems the NODE_DATA() gets optimized to + * &contig_page_data at compile-time. */ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { @@ -548,6 +548,15 @@ alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_arr return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); } +static inline unsigned long +alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) +{ + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). diff --git a/include/linux/iomap.h b/include/linux/iomap.h index c87d0cb0de6d..479c1da3e221 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -159,7 +159,6 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); -int iomap_set_page_dirty(struct page *page); int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index a1c7ce5f3e4f..5310e217bd74 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,7 +18,6 @@ struct task_struct; /* kasan_data struct is used in KUnit tests for KASAN expected failures */ struct kunit_kasan_expectation { - bool report_expected; bool report_found; }; @@ -42,9 +41,9 @@ struct kunit_kasan_expectation { #endif extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; -extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; -extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; +extern pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS]; +extern pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD]; +extern pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 15d8bad3d2f2..bf950621febf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,6 +357,8 @@ int sscanf(const char *, const char *, ...); extern __scanf(2, 0) int vsscanf(const char *, const char *, va_list); +extern int no_hash_pointers_enable(char *str); + extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index d9133d6db308..346b0f269161 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -18,7 +18,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), * @threadfn: the function to run in the thread * @data: data pointer for @threadfn() * @namefmt: printf-style format string for the thread name - * @arg...: arguments for @namefmt. + * @arg: arguments for @namefmt. * * This macro will create a kthread on the current node, leaving it in * the stopped state. This is just a helper for kthread_create_on_node(); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5984fff3f175..552309342c38 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,7 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; enum memblock_flags flags; -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NUMA int nid; #endif }; @@ -347,7 +347,7 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NUMA static inline void memblock_set_region_node(struct memblock_region *r, int nid) { r->nid = nid; @@ -366,7 +366,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) { return 0; } -#endif /* CONFIG_NEED_MULTIPLE_NODES */ +#endif /* CONFIG_NUMA */ /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c193be760709..6d66037be646 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -192,7 +192,7 @@ enum memcg_kmem_state { struct memcg_padding { char x[0]; } ____cacheline_internodealigned_in_smp; -#define MEMCG_PADDING(name) struct memcg_padding name; +#define MEMCG_PADDING(name) struct memcg_padding name #else #define MEMCG_PADDING(name) #endif @@ -349,8 +349,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif - struct mem_cgroup_per_node *nodeinfo[0]; - /* WARNING: nodeinfo must be the last member here */ + struct mem_cgroup_per_node *nodeinfo[]; }; /* @@ -743,35 +742,18 @@ out: /** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page - * @pgdat: pgdat of the page * * This function relies on page->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { + pg_data_t *pgdat = page_pgdat(page); struct mem_cgroup *memcg = page_memcg(page); VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) -{ - pg_data_t *pgdat = page_pgdat(page); - const struct mem_cgroup *memcg; - struct mem_cgroup_per_node *mz; - - if (mem_cgroup_disabled()) - return lruvec == &pgdat->__lruvec; - - mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - memcg = page_memcg(page) ? : root_mem_cgroup; - - return lruvec->pgdat == pgdat && mz->memcg == memcg; -} - struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -1221,18 +1203,11 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) -{ - return &pgdat->__lruvec; -} - -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { pg_data_t *pgdat = page_pgdat(page); - return lruvec == &pgdat->__lruvec; + return &pgdat->__lruvec; } static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) @@ -1255,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) +{ + return NULL; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1516,12 +1497,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } +/* Test requires a stable page->memcg binding, see page_memcg() */ +static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +{ + return lruvec_pgdat(lruvec) == page_pgdat(page) && + lruvec_memcg(lruvec) == page_memcg(page); +} + /* Don't lock again iff page's lruvec locked */ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, struct lruvec *locked_lruvec) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); @@ -1535,7 +1523,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, struct lruvec *locked_lruvec, unsigned long *flags) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); diff --git a/include/linux/mm.h b/include/linux/mm.h index 01ecf9e3603c..6d0f827ca4eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -46,7 +46,7 @@ extern int sysctl_page_lock_unfairness; void init_mm_internals(void); -#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ +#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; static inline void set_max_mapnr(unsigned long limit) @@ -234,7 +234,11 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp, void **shadowp); +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#else +#define nth_page(page,n) ((page) + (n)) +#endif /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) @@ -1341,7 +1345,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, if (!is_cow_mapping(vma->vm_flags)) return false; - if (!atomic_read(&vma->vm_mm->has_pinned)) + if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; return page_maybe_dma_pinned(page); @@ -1850,12 +1854,8 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -void __set_page_dirty(struct page *, struct address_space *, int warn); -int __set_page_dirty_nobuffers(struct page *page); -int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_dirtied(struct page *page, struct address_space *mapping); void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb); int set_page_dirty(struct page *page); @@ -2420,7 +2420,7 @@ static inline unsigned long free_initmem_default(int poison) extern char __init_begin[], __init_end[]; return free_reserved_area(&__init_begin, &__init_end, - poison, "unused kernel"); + poison, "unused kernel image (initmem)"); } static inline unsigned long get_num_physpages(void) @@ -2460,7 +2460,7 @@ extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); -#ifndef CONFIG_NEED_MULTIPLE_NODES +#ifndef CONFIG_NUMA static inline int early_pfn_to_nid(unsigned long pfn) { return 0; @@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int migratetype); -extern void memmap_init_zone(struct zone *zone); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); @@ -2681,17 +2680,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) +/** + * find_vma_intersection() - Look up the first VMA which intersects the interval + * @mm: The process address space. + * @start_addr: The inclusive start user address. + * @end_addr: The exclusive end user address. + * + * Returns: The first VMA within the provided range, %NULL otherwise. Assumes + * start_addr < end_addr. + */ +static inline +struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, + unsigned long start_addr, + unsigned long end_addr) { - struct vm_area_struct * vma = find_vma(mm,start_addr); + struct vm_area_struct *vma = find_vma(mm, start_addr); if (vma && end_addr <= vma->vm_start) vma = NULL; return vma; } +/** + * vma_lookup() - Find a VMA at a specific address + * @mm: The process address space. + * @addr: The user address. + * + * Return: The vm_area_struct at the given address, %NULL otherwise. + */ +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + + if (vma && addr < vma->vm_start) + vma = NULL; + + return vma; +} + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { unsigned long vm_start = vma->vm_start; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8f0fb62e8975..b66d0225414e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -435,16 +435,6 @@ struct mm_struct { */ atomic_t mm_count; - /** - * @has_pinned: Whether this mm has pinned any pages. This can - * be either replaced in the future by @pinned_vm when it - * becomes stable, or grow into a counter on its own. We're - * aggresive on this bit now - even if the pinned pages were - * unpinned later on, we'll still keep this bit set for the - * lifecycle of this mm just for simplicity. - */ - atomic_t has_pinned; - #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif diff --git a/include/linux/mman.h b/include/linux/mman.h index 629cefc4ecba..ebb09a964272 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -31,6 +31,8 @@ /* * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. + * + * MAP_EXECUTABLE is completely ignored throughout the kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 5d0767cb424a..1935d4c72d10 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -9,8 +9,7 @@ struct page; struct vm_area_struct; struct mm_struct; -extern void dump_page(struct page *page, const char *reason); -extern void __dump_page(struct page *page, const char *reason); +void dump_page(struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d53eba1c383..265a32e1ff74 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> +#include <linux/local_lock.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ @@ -134,10 +135,10 @@ enum numa_stat_item { NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ - NR_VM_NUMA_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS }; #else -#define NR_VM_NUMA_STAT_ITEMS 0 +#define NR_VM_NUMA_EVENT_ITEMS 0 #endif enum zone_stat_item { @@ -332,29 +333,55 @@ enum zone_watermarks { NR_WMARK }; +/* + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional + * for pageblock size for THP if configured. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_PCP_THP 1 +#else +#define NR_PCP_THP 0 +#endif +#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) + +/* + * Shift to encode migratetype and order in the same integer, with order + * in the least significant bits. + */ +#define NR_PCP_ORDER_WIDTH 8 +#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) + #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) +/* Fields and list protected by pagesets local_lock in page_alloc.c */ struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ + short free_factor; /* batch scaling factor during free */ +#ifdef CONFIG_NUMA + short expire; /* When 0, remote pagesets are drained */ +#endif /* Lists of pages, one per migrate type stored on the pcp-lists */ - struct list_head lists[MIGRATE_PCPTYPES]; + struct list_head lists[NR_PCP_LISTS]; }; -struct per_cpu_pageset { - struct per_cpu_pages pcp; -#ifdef CONFIG_NUMA - s8 expire; - u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; -#endif +struct per_cpu_zonestat { #ifdef CONFIG_SMP - s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; + s8 stat_threshold; +#endif +#ifdef CONFIG_NUMA + /* + * Low priority inaccurate counters that are only folded + * on demand. Use a large type to avoid the overhead of + * folding during refresh_cpu_vm_stats. + */ + unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #endif }; @@ -484,7 +511,8 @@ struct zone { int node; #endif struct pglist_data *zone_pgdat; - struct per_cpu_pageset __percpu *pageset; + struct per_cpu_pages __percpu *per_cpu_pageset; + struct per_cpu_zonestat __percpu *per_cpu_zonestats; /* * the high and batch values are copied to individual pagesets for * faster access @@ -619,7 +647,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; - atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; + atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { @@ -637,6 +665,7 @@ enum zone_flags { ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. * Cleared when kswapd is woken. */ + ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ }; static inline unsigned long zone_managed_pages(struct zone *zone) @@ -738,10 +767,12 @@ struct zonelist { struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; }; -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +/* + * The array of struct pages for flatmem. + * It must be declared for SPARSEMEM as well because there are configurations + * that rely on that. + */ extern struct page *mem_map; -#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split { @@ -775,7 +806,7 @@ typedef struct pglist_data { struct zonelist node_zonelists[MAX_ZONELISTS]; int nr_zones; /* number of populated zones in this node */ -#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ +#ifdef CONFIG_FLATMEM /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; @@ -865,7 +896,7 @@ typedef struct pglist_data { #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_FLAT_NODE_MEM_MAP +#ifdef CONFIG_FLATMEM #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) #else #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) @@ -982,22 +1013,11 @@ static inline void zone_set_nid(struct zone *zone, int nid) {} extern int movable_zone; -#ifdef CONFIG_HIGHMEM -static inline int zone_movable_is_highmem(void) -{ -#ifdef CONFIG_NEED_MULTIPLE_NODES - return movable_zone == ZONE_HIGHMEM; -#else - return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; -#endif -} -#endif - static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM || - (idx == ZONE_MOVABLE && zone_movable_is_highmem())); + (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); #else return 0; #endif @@ -1029,7 +1049,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, +int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); @@ -1037,21 +1057,21 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int numa_zonelist_order_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -extern int percpu_pagelist_fraction; +extern int percpu_pagelist_high_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 -#ifndef CONFIG_NEED_MULTIPLE_NODES +#ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) #define NODE_MEM_MAP(nid) mem_map -#else /* CONFIG_NEED_MULTIPLE_NODES */ +#else /* CONFIG_NUMA */ #include <asm/mmzone.h> -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ +#endif /* !CONFIG_NUMA */ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); @@ -1200,8 +1220,6 @@ static inline struct zoneref *first_zone |
