summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--block/blk-mq-sysfs.c6
-rw-r--r--block/blk-mq.c18
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--drivers/target/target_core_tpg.c3
-rw-r--r--fs/aio.c4
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/ext2/super.c6
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/ext4/super.c14
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/super.c3
-rw-r--r--include/linux/blk-mq.h1
-rw-r--r--include/linux/flex_proportions.h5
-rw-r--r--include/linux/percpu-refcount.h122
-rw-r--r--include/linux/percpu.h13
-rw-r--r--include/linux/percpu_counter.h10
-rw-r--r--include/linux/proportions.h5
-rw-r--r--include/net/dst_ops.h2
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--kernel/cgroup.c7
-rw-r--r--lib/flex_proportions.c8
-rw-r--r--lib/percpu-refcount.c305
-rw-r--r--lib/percpu_counter.c20
-rw-r--r--lib/proportions.c10
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/percpu-km.c16
-rw-r--r--mm/percpu-vm.c162
-rw-r--r--mm/percpu.c526
-rw-r--r--mm/shmem.c2
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_memcontrol.c2
-rw-r--r--net/sctp/protocol.c2
39 files changed, 879 insertions, 444 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3201e93ebd07..ac1c4de3a484 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4549,7 +4549,7 @@ int kvm_mmu_module_init(void)
if (!mmu_page_header_cache)
goto nomem;
- if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+ if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
goto nomem;
register_shrinker(&mmu_shrinker);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ed5217867555..371d8800b48a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q)
}
}
+/* see blk_register_queue() */
+void blk_mq_finish_init(struct request_queue *q)
+{
+ percpu_ref_switch_to_percpu(&q->mq_usage_counter);
+}
+
int blk_mq_register_disk(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df8e1e09dd17..38f4a165640d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,16 +119,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
if (freeze) {
- /*
- * XXX: Temporary kludge to work around SCSI blk-mq stall.
- * SCSI synchronously creates and destroys many queues
- * back-to-back during probe leading to lengthy stalls.
- * This will be fixed by keeping ->mq_usage_counter in
- * atomic mode until genhd registration, but, for now,
- * let's work around using expedited synchronization.
- */
- __percpu_ref_kill_expedited(&q->mq_usage_counter);
-
+ percpu_ref_kill(&q->mq_usage_counter);
blk_mq_run_queues(q, false);
}
wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
@@ -1804,7 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
if (!q)
goto err_hctxs;
- if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
+ /*
+ * Init percpu_ref in atomic mode so that it's faster to shutdown.
+ * See blk_register_queue() for details.
+ */
+ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
+ PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto err_map;
setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 17f5c84ce7bf..521ae9089c50 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,12 +551,19 @@ int blk_register_queue(struct gendisk *disk)
return -ENXIO;
/*
- * Initialization must be complete by now. Finish the initial
- * bypass from queue allocation.
+ * SCSI probing may synchronously create and destroy a lot of
+ * request_queues for non-existent devices. Shutting down a fully
+ * functional queue takes measureable wallclock time as RCU grace
+ * periods are involved. To avoid excessive latency in these
+ * cases, a request_queue starts out in a degraded mode which is
+ * faster to shut down and is made fully functional here as
+ * request_queues for non-existent devices never get registered.
*/
if (!blk_queue_init_done(q)) {
queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
blk_queue_bypass_end(q);
+ if (q->mq_ops)
+ blk_mq_finish_init(q);
}
ret = blk_trace_init_sysfs(dev);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index fddfae61222f..be783f717f19 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,8 @@ int core_tpg_add_lun(
{
int ret;
- ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release);
+ ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0,
+ GFP_KERNEL);
if (ret < 0)
return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 733750096b71..84a751005f5b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -661,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
INIT_LIST_HEAD(&ctx->active_reqs);
- if (percpu_ref_init(&ctx->users, free_ioctx_users))
+ if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
goto err;
- if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+ if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
goto err;
ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a1d36e62179c..d0d78dc07792 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1183,7 +1183,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
if (!writers)
return ERR_PTR(-ENOMEM);
- ret = percpu_counter_init(&writers->counter, 0);
+ ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
if (ret < 0) {
kfree(writers);
return ERR_PTR(ret);
@@ -2188,7 +2188,7 @@ int open_ctree(struct super_block *sb,
goto fail_srcu;
}
- ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+ ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_bdi;
@@ -2196,13 +2196,13 @@ int open_ctree(struct super_block *sb,
fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
(1 + ilog2(nr_cpu_ids));
- ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+ ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_dirty_metadata_bytes;
}
- ret = percpu_counter_init(&fs_info->bio_counter, 0);
+ ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_delalloc_bytes;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3efe1c3877bf..caaf015d6e4b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3494,7 +3494,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- ret = percpu_counter_init(&found->total_bytes_pinned, 0);
+ ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
if (ret) {
kfree(found);
return ret;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b88edc05c230..170dc41e8bf4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
err = percpu_counter_init(&sbi->s_freeblocks_counter,
- ext2_count_free_blocks(sb));
+ ext2_count_free_blocks(sb), GFP_KERNEL);
if (!err) {
err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext2_count_free_inodes(sb));
+ ext2_count_free_inodes(sb), GFP_KERNEL);
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirs_counter,
- ext2_count_dirs(sb));
+ ext2_count_dirs(sb), GFP_KERNEL);
}
if (err) {
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 622e88249024..bb0fdacad058 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
err = percpu_counter_init(&sbi->s_freeblocks_counter,
- ext3_count_free_blocks(sb));
+ ext3_count_free_blocks(sb), GFP_KERNEL);
if (!err) {
err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext3_count_free_inodes(sb));
+ ext3_count_free_inodes(sb), GFP_KERNEL);
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirs_counter,
- ext3_count_dirs(sb));
+ ext3_count_dirs(sb), GFP_KERNEL);
}
if (err) {
ext3_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b28b36e7915..05c159218bc2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3892,7 +3892,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Register extent status tree shrinker */
ext4_es_register_shrinker(sbi);
- if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
+ err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
+ if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount3;
}
@@ -4106,17 +4107,20 @@ no_journal:
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
- err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+ err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+ GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
- err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+ GFP_KERNEL);
}
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
+ ext4_count_dirs(sb), GFP_KERNEL);
if (!err)
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+ GFP_KERNEL);
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount6;
diff --git a/fs/file_table.c b/fs/file_table.c
index 385bfd31512a..0bab12b20460 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages)
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
- percpu_counter_init(&nr_files, 0);
+ percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f2d0eee9d1f1..8b663b2d9562 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2725,7 +2725,7 @@ static int __init dquot_init(void)
panic("Cannot create dquot hash table");
for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
- ret = percpu_counter_init(&dqstats.counter[i], 0);
+ ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL);
if (ret)
panic("Cannot create dquot stat counters");
}
diff --git a/fs/super.c b/fs/super.c
index b9a214d2fe98..1b836107acee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
goto fail;
for (i = 0; i < SB_FREEZE_LEVELS; i++) {
- if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+ if (percpu_counter_init(&s->s_writers.counter[i], 0,
+ GFP_KERNEL) < 0)
goto fail;
lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
&type->s_writers_key[i], 0);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f274fcd..c13a0c09faea 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,6 +140,7 @@ enum {
};
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+void blk_mq_finish_init(struct request_queue *q);
int blk_mq_register_disk(struct gendisk *);
void blk_mq_unregister_disk(struct gendisk *);
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index 4ebc49fae391..0d348e011a6e 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -10,6 +10,7 @@
#include <linux/percpu_counter.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
+#include <linux/gfp.h>
/*
* When maximum proportion of some event type is specified, this is the
@@ -32,7 +33,7 @@ struct fprop_global {
seqcount_t sequence;
};
-int fprop_global_init(struct fprop_global *p);
+int fprop_global_init(struct fprop_global *p, gfp_t gfp);
void fprop_global_destroy(struct fprop_global *p);
bool fprop_new_period(struct fprop_global *p, int periods);
@@ -79,7 +80,7 @@ struct fprop_local_percpu {
raw_spinlock_t lock; /* Protect period and numerator */
};
-int fprop_local_init_percpu(struct fprop_local_percpu *pl);
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 68a64f11ce02..d5c89e0dd0e6 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -13,7 +13,7 @@
*
* The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
* than an atomic_t - this is because of the way shutdown works, see
- * percpu_ref_kill()/PCPU_COUNT_BIAS.
+ * percpu_ref_kill()/PERCPU_COUNT_BIAS.
*
* Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
* refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
@@ -49,29 +49,60 @@
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
+#include <linux/gfp.h>
struct percpu_ref;
typedef void (percpu_ref_func_t)(struct percpu_ref *);
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+ __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */
+ __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */
+ __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+ __PERCPU_REF_FLAG_BITS = 2,
+};
+
+/* @flags for percpu_ref_init() */
+enum {
+ /*
+ * Start w/ ref == 1 in atomic mode. Can be switched to percpu
+ * operation using percpu_ref_switch_to_percpu(). If initialized
+ * with this flag, the ref will stay in atomic mode until
+ * percpu_ref_switch_to_percpu() is invoked on it.
+ */
+ PERCPU_REF_INIT_ATOMIC = 1 << 0,
+
+ /*
+ * Start dead w/ ref == 0 in atomic mode. Must be revived with
+ * percpu_ref_reinit() before used. Implies INIT_ATOMIC.
+ */
+ PERCPU_REF_INIT_DEAD = 1 << 1,
+};
+
struct percpu_ref {
- atomic_t count;
+ atomic_long_t count;
/*
* The low bit of the pointer indicates whether the ref is in percpu
* mode; if set, then get/put will manipulate the atomic_t.
*/
- unsigned long pcpu_count_ptr;
+ unsigned long percpu_count_ptr;
percpu_ref_func_t *release;
- percpu_ref_func_t *confirm_kill;
+ percpu_ref_func_t *confirm_switch;
+ bool force_atomic:1;
struct rcu_head rcu;
};
int __must_check percpu_ref_init(struct percpu_ref *ref,
- percpu_ref_func_t *release);
-void percpu_ref_reinit(struct percpu_ref *ref);
+ percpu_ref_func_t *release, unsigned int flags,
+ gfp_t gfp);
void percpu_ref_exit(struct percpu_ref *ref);
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+ percpu_ref_func_t *confirm_switch);
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill);
-void __percpu_ref_kill_expedited(struct percpu_ref *ref);
+void percpu_ref_reinit(struct percpu_ref *ref);
/**
* percpu_ref_kill - drop the initial ref
@@ -88,26 +119,24 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
return percpu_ref_kill_and_confirm(ref, NULL);
}
-#define PCPU_REF_DEAD 1
-
/*
* Internal helper. Don't use outside percpu-refcount proper. The
* function doesn't return the pointer and let the caller test it for NULL
* because doing so forces the compiler to generate two conditional
- * branches as it can't assume that @ref->pcpu_count is not NULL.
+ * branches as it can't assume that @ref->percpu_count is not NULL.
*/
-static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
- unsigned __percpu **pcpu_countp)
+static inline bool __ref_is_percpu(struct percpu_ref *ref,
+ unsigned long __percpu **percpu_countp)
{
- unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
+ unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
/* paired with smp_store_release() in percpu_ref_reinit() */
smp_read_barrier_depends();
- if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+ if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
return false;
- *pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+ *percpu_countp = (unsigned long __percpu *)percpu_ptr;
return true;
}
@@ -115,18 +144,20 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
* percpu_ref_get - increment a percpu refcount
* @ref: percpu_ref to get
*
- * Analagous to atomic_inc().
- */
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
static inline void percpu_ref_get(struct percpu_ref *ref)
{
- unsigned __percpu *pcpu_count;
+ unsigned long __percpu *percpu_count;
rcu_read_lock_sched();
- if (__pcpu_ref_alive(ref, &pcpu_count))
- this_cpu_inc(*pcpu_count);
+ if (__ref_is_percpu(ref, &percpu_count))
+ this_cpu_inc(*percpu_count);
else
- atomic_inc(&ref->count);
+ atomic_long_inc(&ref->count);
rcu_read_unlock_sched();
}
@@ -138,20 +169,20 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
* Increment a percpu refcount unless its count already reached zero.
* Returns %true on success; %false on failure.
*
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
*/
static inline bool percpu_ref_tryget(struct percpu_ref *ref)
{
- unsigned __percpu *pcpu_count;
- int ret = false;
+ unsigned long __percpu *percpu_count;
+ int ret;
rcu_read_lock_sched();
- if (__pcpu_ref_alive(ref, &pcpu_count)) {
- this_cpu_inc(*pcpu_count);
+ if (__ref_is_percpu(ref, &percpu_count)) {
+ this_cpu_inc(*percpu_count);
ret = true;
} else {
- ret = atomic_inc_not_zero(&ref->count);
+ ret = atomic_long_inc_not_zero(&ref->count);
}
rcu_read_unlock_sched();
@@ -166,23 +197,26 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
* Increment a percpu refcount unless it has already been killed. Returns
* %true on success; %false on failure.
*
- * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget
- * will fail. For such guarantee, percpu_ref_kill_and_confirm() should be
- * used. After the confirm_kill callback is invoked, it's guaranteed that
- * no new reference will be given out by percpu_ref_tryget().
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail. For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used. After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
*
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
*/
static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
{
- unsigned __percpu *pcpu_count;
+ unsigned long __percpu *percpu_count;
int ret = false;
rcu_read_lock_sched();
- if (__pcpu_ref_alive(ref, &pcpu_count)) {
- this_cpu_inc(*pcpu_count);
+ if (__ref_is_percpu(ref, &percpu_count)) {
+ this_cpu_inc(*percpu_count);
ret = true;
+ } else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+ ret = atomic_long_inc_not_zero(&ref->count);
}
rcu_read_unlock_sched();
@@ -196,16 +230,18 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
*
* Decrement the refcount, and if 0, call the release function (which was passed
* to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
*/
static inline void percpu_ref_put(struct percpu_ref *ref)
{
- unsigned __percpu *pcpu_count;
+ unsigned long __percpu *percpu_count;
rcu_read_lock_sched();
- if (__pcpu_ref_alive(ref, &pcpu_count))
- this_cpu_dec(*pcpu_count);
- else if (unlikely(atomic_dec_and_test(&ref->count)))
+ if (__ref_is_percpu(ref, &percpu_count))
+ this_cpu_dec(*percpu_count);
+ else if (unlikely(atomic_long_dec_and_test(&ref->count)))
ref->release(ref);
rcu_read_unlock_sched();
@@ -216,14 +252,16 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
* @ref: percpu_ref to test
*
* Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
*/
static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
{
- unsigned __percpu *pcpu_count;
+ unsigned long __percpu *percpu_count;
- if (__pcpu_ref_alive(ref, &pcpu_count))
+ if (__ref_is_percpu(ref, &percpu_count))
return false;
- return !atomic_read(&ref->count);
+ return !atomic_long_read(&ref->count);
}
#endif
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 6f61b61b7996..a3aa63e47637 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,9 +48,9 @@
* intelligent way to determine this would be nice.
*/
#if BITS_PER_LONG > 32
-#define PERCPU_DYNAMIC_RESERVE (20 << 10)
+#define PERCPU_DYNAMIC_RESERVE (28 << 10)
#else
-#define PERCPU_DYNAMIC_RESERVE (12 << 10)
+#define PERCPU_DYNAMIC_RESERVE (20 << 10)
#endif
extern void *pcpu_base_addr;
@@ -122,11 +122,16 @@ extern void __init setup_per_cpu_areas(void);
#endif
extern void __init percpu_init_late(void);
+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
extern void __percpu *__alloc_percpu(size_t size, size_t align);
extern void free_percpu(void __percpu *__pdata);
extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
-#define alloc_percpu(type) \
- (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
+#define alloc_percpu_gfp(type, gfp) \
+ (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \
+ __alignof__(type), gfp)
+#define alloc_percpu(type) \
+ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
+ __alignof__(type))
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index d5dd4657c8d6..50e50095c8d1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -12,6 +12,7 @@
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/types.h>
+#include <linux/gfp.h>
#ifdef CONFIG_SMP
@@ -26,14 +27,14 @@ struct percpu_counter {
extern int percpu_counter_batch;
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
struct lock_class_key *key);
-#define percpu_counter_init(fbc, value) \
+#define percpu_counter_init(fbc, value, gfp) \
({ \
static struct lock_class_key __key; \
\
- __percpu_counter_init(fbc, value, &__key); \
+ __percpu_counter_init(fbc, value, gfp, &__key); \
})
void percpu_counter_destroy(struct percpu_counter *fbc);
@@ -89,7 +90,8 @@ struct percpu_counter {
s64 count;
};
-static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
+static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+ gfp_t gfp)
{
fbc->count = amount;
return 0;
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 26a8a4ed9b07..00e8e8fa7358 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -12,6 +12,7 @@
#include <linux/percpu_counter.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
+#include <linux/gfp.h>
struct prop_global {
/*
@@ -40,7 +41,7 @@ struct prop_descriptor {
struct mutex mutex; /* serialize the prop_global switch */
};
-int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
void prop_change_shift(struct prop_descriptor *pd, int new_shift);
/*
@@ -61,7 +62,7 @@ struct prop_local_percpu {
raw_spinlock_t lock; /* protect the snapshot state */
};
-int prop_local_init_percpu(struct prop_local_percpu *pl);
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
void prop_local_destroy_percpu(struct prop_local_percpu *pl);
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 2f26dfb8450e..1f99a1de0e4f 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val)
static inline int dst_entries_init(struct dst_ops *dst)
{
- return percpu_counter_init(&dst->pcpuc_entries, 0);
+ return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL);
}
static inline void dst_entries_destroy(struct dst_ops *dst)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 65a8855e99fe..8d1765577acc 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
static inline void init_frag_mem_limit(struct netns_frags *nf)
{
- percpu_counter_init(&nf->mem, 0);
+ percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
}
static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cab7dc4284dc..136eceadeed1 100644
--- a/kernel/cgroup.c
+++ b/