diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 17:52:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 17:52:23 -0700 |
| commit | f6f7a6369203fa3e07efb7f35cfd81efe9f25b07 (patch) | |
| tree | 97bec9ddd999040822acf314647eaf4208213589 /include | |
| parent | 839fe9156fbe89c3157aa6146d22090f8cffddd8 (diff) | |
| parent | df69f52d990bd85159727bd26e819d3a6e49c666 (diff) | |
| download | linux-f6f7a6369203fa3e07efb7f35cfd81efe9f25b07.tar.gz linux-f6f7a6369203fa3e07efb7f35cfd81efe9f25b07.tar.bz2 linux-f6f7a6369203fa3e07efb7f35cfd81efe9f25b07.zip | |
Merge branch 'akpm' (patches from Andrew)
Merge second patch-bomb from Andrew Morton:
"Almost all of the rest of MM. There was an unusually large amount of
MM material this time"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (141 commits)
zpool: remove no-op module init/exit
mm: zbud: constify the zbud_ops
mm: zpool: constify the zpool_ops
mm: swap: zswap: maybe_preload & refactoring
zram: unify error reporting
zsmalloc: remove null check from destroy_handle_cache()
zsmalloc: do not take class lock in zs_shrinker_count()
zsmalloc: use class->pages_per_zspage
zsmalloc: consider ZS_ALMOST_FULL as migrate source
zsmalloc: partial page ordering within a fullness_list
zsmalloc: use shrinker to trigger auto-compaction
zsmalloc: account the number of compacted pages
zsmalloc/zram: introduce zs_pool_stats api
zsmalloc: cosmetic compaction code adjustments
zsmalloc: introduce zs_can_compact() function
zsmalloc: always keep per-class stats
zsmalloc: drop unused variable `nr_to_migrate'
mm/memblock.c: fix comment in __next_mem_range()
mm/page_alloc.c: fix type information of memoryless node
memory-hotplug: fix comments in zone_spanned_pages_in_node() and zone_spanned_pages_in_node()
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-generic/early_ioremap.h | 6 | ||||
| -rw-r--r-- | include/linux/dax.h | 39 | ||||
| -rw-r--r-- | include/linux/dmapool.h | 6 | ||||
| -rw-r--r-- | include/linux/fs.h | 14 | ||||
| -rw-r--r-- | include/linux/gfp.h | 31 | ||||
| -rw-r--r-- | include/linux/huge_mm.h | 20 | ||||
| -rw-r--r-- | include/linux/hugetlb.h | 17 | ||||
| -rw-r--r-- | include/linux/memblock.h | 4 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 388 | ||||
| -rw-r--r-- | include/linux/mm.h | 32 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 2 | ||||
| -rw-r--r-- | include/linux/oom.h | 38 | ||||
| -rw-r--r-- | include/linux/page-isolation.h | 5 | ||||
| -rw-r--r-- | include/linux/pci.h | 2 | ||||
| -rw-r--r-- | include/linux/swap.h | 19 | ||||
| -rw-r--r-- | include/linux/swapops.h | 37 | ||||
| -rw-r--r-- | include/linux/zbud.h | 2 | ||||
| -rw-r--r-- | include/linux/zpool.h | 4 | ||||
| -rw-r--r-- | include/linux/zsmalloc.h | 6 | ||||
| -rw-r--r-- | include/net/sock.h | 33 |
20 files changed, 549 insertions, 156 deletions
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index 316bd043319e..734ad4db388c 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -35,6 +35,12 @@ extern void early_ioremap_setup(void); */ extern void early_ioremap_reset(void); +/* + * Early copy from unmapped memory to kernel mapped memory. + */ +extern void copy_from_early_mem(void *dest, phys_addr_t src, + unsigned long size); + #else static inline void early_ioremap_init(void) { } static inline void early_ioremap_setup(void) { } diff --git a/include/linux/dax.h b/include/linux/dax.h new file mode 100644 index 000000000000..b415e521528d --- /dev/null +++ b/include/linux/dax.h @@ -0,0 +1,39 @@ +#ifndef _LINUX_DAX_H +#define _LINUX_DAX_H + +#include <linux/fs.h> +#include <linux/mm.h> +#include <asm/pgtable.h> + +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, + get_block_t, dio_iodone_t, int flags); +int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); +int dax_truncate_page(struct inode *, loff_t from, get_block_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned int flags, get_block_t, dax_iodone_t); +int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned int flags, get_block_t, dax_iodone_t); +#else +static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags, get_block_t gb, + dax_iodone_t di) +{ + return VM_FAULT_FALLBACK; +} +#define __dax_pmd_fault dax_pmd_fault +#endif +int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) + +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); +} +#endif diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index e1043f79122f..53ba737505df 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -24,6 +24,12 @@ void dma_pool_destroy(struct dma_pool *pool); void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); +static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, + dma_addr_t *handle) +{ + return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle); +} + void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index b2f9b9c25e41..72d8a844c692 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -52,7 +52,6 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct vm_fault; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2678,19 +2677,6 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, - get_block_t, dio_iodone_t, int flags); -int dax_clear_blocks(struct inode *, sector_t block, long size); -int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); -int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) -#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) - #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ad35f300b9a4..f92cbd2f4450 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -63,7 +63,10 @@ struct vm_area_struct; * but it is definitely preferable to use the flag rather than opencode endless * loop around allocator. * - * __GFP_NORETRY: The VM implementation must not retry indefinitely. + * __GFP_NORETRY: The VM implementation must not retry indefinitely and will + * return NULL when direct reclaim and memory compaction have failed to allow + * the allocation to succeed. The OOM killer is not called with the current + * implementation. * * __GFP_MOVABLE: Flag that this page will be movable by the page migration * mechanism or reclaimed @@ -300,22 +303,31 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); } -static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order) +/* + * Allocate pages, preferring the node given as nid. The node must be valid and + * online. For more general interface, see alloc_pages_node(). + */ +static inline struct page * +__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - /* Unknown node is current node */ - if (nid < 0) - nid = numa_node_id(); + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON(!node_online(nid)); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } -static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask, +/* + * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, + * prefer the current CPU's closest node. Otherwise node must be valid and + * online. + */ +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)); + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); + return __alloc_pages_node(nid, gfp_mask, order); } #ifdef CONFIG_NUMA @@ -354,7 +366,6 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); -/* This is different from alloc_pages_exact_node !!! */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f10b20f05159..ecb080d6ff42 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -33,6 +33,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); +int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned long pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, @@ -122,7 +124,7 @@ extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, #endif extern int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); -extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, +extern void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); @@ -138,15 +140,6 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, else return 0; } -static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - long adjust_next) -{ - if (!vma->anon_vma || vma->vm_ops) - return; - __vma_adjust_trans_huge(vma, start, end, adjust_next); -} static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -164,6 +157,13 @@ static inline bool is_huge_zero_page(struct page *page) return ACCESS_ONCE(huge_zero_page) == page; } +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return is_huge_zero_page(pmd_page(pmd)); +} + +struct page *get_huge_zero_page(void); + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d891f949466a..5e35379f58a5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -35,6 +35,9 @@ struct resv_map { struct kref refs; spinlock_t lock; struct list_head regions; + long adds_in_progress; + struct list_head region_cache; + long region_cache_count; }; extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); @@ -80,11 +83,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); -void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); +long hugetlb_unreserve_pages(struct inode *inode, long start, long end, + long freed); int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); +void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve); +extern struct mutex *hugetlb_fault_mutex_table; +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, + struct vm_area_struct *vma, + struct address_space *mapping, + pgoff_t idx, unsigned long address); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -320,9 +330,13 @@ struct huge_bootmem_page { #endif }; +struct page *alloc_huge_page(struct vm_area_struct *vma, + unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); +int huge_add_to_page_cache(struct page *page, struct address_space *mapping, + pgoff_t idx); /* arch callback */ int __init alloc_bootmem_huge_page(struct hstate *h); @@ -471,6 +485,7 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +#define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cc4b01972060..c518eb589260 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,6 +77,8 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); +bool memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); @@ -323,7 +325,7 @@ void memblock_enforce_memory_limit(phys_addr_t memory_limit); int memblock_is_memory(phys_addr_t addr); int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); int memblock_is_reserved(phys_addr_t addr); -int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); +bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void __memblock_dump_all(void); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 73b02b0a8f60..d92b80b63c5c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -23,6 +23,11 @@ #include <linux/vm_event_item.h> #include <linux/hardirq.h> #include <linux/jump_label.h> +#include <linux/page_counter.h> +#include <linux/vmpressure.h> +#include <linux/eventfd.h> +#include <linux/mmzone.h> +#include <linux/writeback.h> struct mem_cgroup; struct page; @@ -67,12 +72,221 @@ enum mem_cgroup_events_index { MEMCG_NR_EVENTS, }; +/* + * Per memcg event counter is incremented at every pagein/pageout. With THP, + * it will be incremated by the number of pages. This counter is used for + * for trigger some periodic events. This is straightforward and better + * than using jiffies etc. to handle periodic memcg event. + */ +enum mem_cgroup_events_target { + MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, + MEM_CGROUP_TARGET_NUMAINFO, + MEM_CGROUP_NTARGETS, +}; + +/* + * Bits in struct cg_proto.flags + */ +enum cg_proto_flags { + /* Currently active and new sockets should be assigned to cgroups */ + MEMCG_SOCK_ACTIVE, + /* It was ever activated; we must disarm static keys on destruction */ + MEMCG_SOCK_ACTIVATED, +}; + +struct cg_proto { + struct page_counter memory_allocated; /* Current allocated memory. */ + struct percpu_counter sockets_allocated; /* Current number of sockets. */ + int memory_pressure; + long sysctl_mem[3]; + unsigned long flags; + /* + * memcg field is used to find which memcg we belong directly + * Each memcg struct can hold more than one cg_proto, so container_of + * won't really cut. + * + * The elegant solution would be having an inverse function to + * proto_cgroup in struct proto, but that means polluting the structure + * for everybody, instead of just for memcg users. + */ + struct mem_cgroup *memcg; +}; + #ifdef CONFIG_MEMCG +struct mem_cgroup_stat_cpu { + long count[MEM_CGROUP_STAT_NSTATS]; + unsigned long events[MEMCG_NR_EVENTS]; + unsigned long nr_page_events; + unsigned long targets[MEM_CGROUP_NTARGETS]; +}; + +struct mem_cgroup_reclaim_iter { + struct mem_cgroup *position; + /* scan generation, increased every round-trip */ + unsigned int generation; +}; + +/* + * per-zone information in memory controller. + */ +struct mem_cgroup_per_zone { + struct lruvec lruvec; + unsigned long lru_size[NR_LRU_LISTS]; + + struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; + + struct rb_node tree_node; /* RB tree node */ + unsigned long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; + struct mem_cgroup *memcg; /* Back pointer, we cannot */ + /* use container_of */ +}; + +struct mem_cgroup_per_node { + struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; +}; + +struct mem_cgroup_threshold { + struct eventfd_ctx *eventfd; + unsigned long threshold; +}; + +/* For threshold */ +struct mem_cgroup_threshold_ary { + /* An array index points to threshold just below or equal to usage. */ + int current_threshold; + /* Size of entries[] */ + unsigned int size; + /* Array of thresholds */ + struct mem_cgroup_threshold entries[0]; +}; + +struct mem_cgroup_thresholds { + /* Primary thresholds array */ + struct mem_cgroup_threshold_ary *primary; + /* + * Spare threshold array. + * This is needed to make mem_cgroup_unregister_event() "never fail". + * It must be able to store at least primary->size - 1 entries. + */ + struct mem_cgroup_threshold_ary *spare; +}; + +/* + * The memory controller data structure. The memory controller controls both + * page cache and RSS per cgroup. We would eventually like to provide + * statistics based on the statistics developed by Rik Van Riel for clock-pro, + * to help the administrator determine what knobs to tune. + */ +struct mem_cgroup { + struct cgroup_subsys_state css; + + /* Accounted resources */ + struct page_counter memory; + struct page_counter memsw; + struct page_counter kmem; + + /* Normal memory consumption range */ + unsigned long low; + unsigned long high; + + unsigned long soft_limit; + + /* vmpressure notifications */ + struct vmpressure vmpressure; + + /* css_online() has been completed */ + int initialized; + + /* + * Should the accounting and control be hierarchical, per subtree? + */ + bool use_hierarchy; + + /* protected by memcg_oom_lock */ + bool oom_lock; + int under_oom; + + int swappiness; + /* OOM-Killer disable */ + int oom_kill_disable; + + /* protect arrays of thresholds */ + struct mutex thresholds_lock; + + /* thresholds for memory usage. RCU-protected */ + struct mem_cgroup_thresholds thresholds; + + /* thresholds for mem+swap usage. RCU-protected */ + struct mem_cgroup_thresholds memsw_thresholds; + + /* For oom notifier event fd */ + struct list_head oom_notify; + + /* + * Should we move charges of a task when a task is moved into this + * mem_cgroup ? And what type of charges should we move ? + */ + unsigned long move_charge_at_immigrate; + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + /* taken only while moving_account > 0 */ + spinlock_t move_lock; + struct task_struct *move_lock_task; + unsigned long move_lock_flags; + /* + * percpu counter. + */ + struct mem_cgroup_stat_cpu __percpu *stat; + spinlock_t pcp_counter_lock; + +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) + struct cg_proto tcp_mem; +#endif +#if defined(CONFIG_MEMCG_KMEM) + /* Index in the kmem_cache->memcg_params.memcg_caches array */ + int kmemcg_id; + bool kmem_acct_activated; + bool kmem_acct_active; +#endif + + int last_scanned_node; +#if MAX_NUMNODES > 1 + nodemask_t scan_nodes; + atomic_t numainfo_events; + atomic_t numainfo_updating; +#endif + +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; + struct wb_domain cgwb_domain; +#endif + + /* List of events which userspace want to receive */ + struct list_head event_list; + spinlock_t event_list_lock; + + struct mem_cgroup_per_node *nodeinfo[0]; + /* WARNING: nodeinfo must be the last member here */ +}; extern struct cgroup_subsys_state *mem_cgroup_root_css; -void mem_cgroup_events(struct mem_cgroup *memcg, +/** + * mem_cgroup_events - count memory events against a cgroup + * @memcg: the memory cgroup + * @idx: the event index + * @nr: the number of events to account for + */ +static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, - unsigned int nr); + unsigned int nr) +{ + this_cpu_add(memcg->stat->events[idx], nr); +} bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); @@ -90,15 +304,31 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); -bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, - struct mem_cgroup *root); bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); -extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); -extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); + +struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ + return css ? container_of(css, struct mem_cgroup, css) : NULL; +} + +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, + struct mem_cgroup *, + struct mem_cgroup_reclaim_cookie *); +void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); -extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); -extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); +static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, + struct mem_cgroup *root) +{ + if (root == memcg) + return true; + if (!root->use_hierarchy) + return false; + return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); +} static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) @@ -114,24 +344,67 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return match; } -extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); -extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); +struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); -struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, - struct mem_cgroup *, - struct mem_cgroup_reclaim_cookie *); -void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +static inline bool mem_cgroup_disabled(void) +{ + if (memory_cgrp_subsys.disabled) + return true; + return false; +} /* * For memory reclaim. */ -int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); -bool mem_cgroup_lruvec_online(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); -unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); -void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); -extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, - struct task_struct *p); + +void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int nr_pages); + +static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + struct mem_cgroup_per_zone *mz; + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return true; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + memcg = mz->memcg; + + return !!(memcg->css.flags & CSS_ONLINE); +} + +static inline +unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) +{ + struct mem_cgroup_per_zone *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + return mz->lru_size[lru]; +} + +static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) +{ + unsigned long inactive_ratio; + unsigned long inactive; + unsigned long active; + unsigned long gb; + + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); + + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + + return inactive * inactive_ratio < active; +} + +void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, + struct task_struct *p); static inline void mem_cgroup_oom_enable(void) { @@ -156,18 +429,26 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -static inline bool mem_cgroup_disabled(void) -{ - if (memory_cgrp_subsys.disabled) - return true; - return false; -} - struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); -void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx, int val); void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); +/** + * mem_cgroup_update_page_stat - update page state statistics + * @memcg: memcg to account against + * @idx: page state item to account + * @val: number of pages (positive or negative) + * + * See mem_cgroup_begin_page_stat() for locking requirements. + */ +static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ + VM_BUG_ON(!rcu_read_lock_held()); + + if (memcg) + this_cpu_add(memcg->stat->count[idx], val); +} + static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { @@ -184,13 +465,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, unsigned long *total_scanned); -void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) { + struct mem_cgroup *memcg; + if (mem_cgroup_disabled()) return; - __mem_cgroup_count_vm_event(mm, idx); + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (unlikely(!memcg)) + goto out; + + switch (idx) { + case PGFAULT: + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); + break; + case PGMAJFAULT: + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); + break; + default: + BUG(); + } +out: + rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); @@ -199,8 +498,6 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; -#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) - static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr) @@ -275,12 +572,6 @@ static inline bool task_in_mem_cgroup(struct task_struct *task, return true; } -static inline struct cgroup_subsys_state - *mem_cgroup_css(struct mem_cgroup *memcg) -{ - return NULL; -} - static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -428,8 +719,8 @@ static inline void sock_release_memcg(struct sock *sk) extern struct static_key memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; -extern void memcg_get_cache_ids(void); -extern void memcg_put_cache_ids(void); +void memcg_get_cache_ids(void); +void memcg_put_cache_ids(void); /* * Helper macro to loop through all memcg-specific caches. Callers must still @@ -444,7 +735,10 @@ static inline bool memcg_kmem_enabled(void) return static_key_false(&memcg_kmem_enabled_key); } -bool memcg_kmem_is_active(struct mem_cgroup *memcg); +static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) +{ + return memcg->kmem_acct_active; +} /* * In general, we'll do everything in our power to not incur in any overhead @@ -463,7 +757,15 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order); void __memcg_kmem_uncharge_pages(struct page *page, int order); -int memcg_cache_id(struct mem_cgroup *memcg); +/* + * helper for acessing a memcg's index. It will be used as an index in the + * child cache array in kmem_cache, and also to derive its name. This function + * will return -1 when this is not a kmem-limited memcg. + */ +static inline int memcg_cache_id(struct mem_cgroup *memcg) +{ + return memcg ? memcg->kmemcg_id : -1; +} struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); void __memcg_kmem_put_cache(struct kmem_cache *cachep); diff --git a/include/linux/mm.h b/include/linux/mm.h index 1171a292e06e..f25a957bf0ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -249,6 +249,8 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pmd_fault)(struct vm_area_struct *, unsigned long address, + pmd_t *, unsigned int flags); void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf); /* notification that a previously read-only page is about to become @@ -307,18 +309,6 @@ struct inode; #define page_private(page) ((page)->private) #define set_page_private(page, v) ((page)->private = (v)) -/* It's valid only if the page is free path or free_list */ -static inline void set_freepage_migratetype(struct page *page, int migratetype) -{ - page->index = migratetype; -} - -/* It's valid only if the page is free path or free_list */ -static inline int get_freepage_migratetype(struct page *page) -{ - return page->index; -} - /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) @@ -359,18 +349,6 @@ static inline int get_page_unless_zero(struct page *page) return atomic_inc_not_zero(&page->_count); } -/* - * Try to drop a ref unless the page has a refcount of one, return false if - * that is the case. - * This is to make sure that the refcount won't bec |
