summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.preempt15
-rw-r--r--kernel/audit.c19
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/cgroup.c4
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/exit.c98
-rw-r--r--kernel/kprobes.c52
-rw-r--r--kernel/lockdep.c8
-rw-r--r--kernel/marker.c9
-rw-r--r--kernel/module.c24
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/process.c29
-rw-r--r--kernel/power/snapshot.c41
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/rcupreempt.c233
-rw-r--r--kernel/res_counter.c1
-rw-r--r--kernel/sched.c382
-rw-r--r--kernel/sched_fair.c228
-rw-r--r--kernel/sched_rt.c10
-rw-r--r--kernel/signal.c16
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/softlockup.c13
-rw-r--r--kernel/sysctl.c18
-rw-r--r--kernel/time/ntp.c23
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/time/timekeeping.c6
26 files changed, 670 insertions, 575 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 0669b70fa6a3..9fdba03dc1fc 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,8 +52,23 @@ config PREEMPT
endchoice
+config PREEMPT_RCU
+ bool "Preemptible RCU"
+ depends on PREEMPT
+ default n
+ help
+ This option reduces the latency of the kernel by making certain
+ RCU sections preemptible. Normally RCU code is non-preemptible, if
+ this option is selected then read-only RCU sections become
+ preemptible. This helps latency, but may expose bugs due to
+ now-naive assumptions about each RCU read-side critical section
+ remaining on a given CPU through its execution.
+
+ Say N if you are unsure.
+
config RCU_TRACE
bool "Enable tracing for RCU - currently stats in debugfs"
+ depends on PREEMPT_RCU
select DEBUG_FS
default y
help
diff --git a/kernel/audit.c b/kernel/audit.c
index 2eeea9a14240..10c4930c2bbf 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -170,7 +170,9 @@ void audit_panic(const char *message)
printk(KERN_ERR "audit: %s\n", message);
break;
case AUDIT_FAIL_PANIC:
- panic("audit: %s\n", message);
+ /* test audit_pid since printk is always losey, why bother? */
+ if (audit_pid)
+ panic("audit: %s\n", message);
break;
}
}
@@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy)
if (err < 0) {
BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */
printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+ audit_log_lost("auditd dissapeared\n");
audit_pid = 0;
}
} else {
@@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab)
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
+ struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
if (audit_pid) {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
skb_queue_tail(&audit_skb_queue, ab->skb);
ab->skb = NULL;
wake_up_interruptible(&kauditd_wait);
- } else if (printk_ratelimit()) {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
- printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));
- } else {
- audit_log_lost("printk limit exceeded\n");
+ } else if (nlh->nlmsg_type != AUDIT_EOE) {
+ if (printk_ratelimit()) {
+ printk(KERN_NOTICE "type=%d %s\n",
+ nlh->nlmsg_type,
+ ab->skb->data + NLMSG_SPACE(0));
+ } else
+ audit_log_lost("printk limit exceeded\n");
}
}
audit_buffer_free(ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2087d6de67ea..782262e4107d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1070,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
* so we can be sure nothing was lost.
*/
if ((i == 0) && (too_long))
- audit_log_format(*ab, "a%d_len=%ld ", arg_num,
+ audit_log_format(*ab, "a%d_len=%zu ", arg_num,
has_cntl ? 2*len : len);
/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d8abe996e009..e9c2fb01e89b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2232,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
mutex_lock(&cgroup_mutex);
- cgrp->flags = 0;
INIT_LIST_HEAD(&cgrp->sibling);
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->css_sets);
@@ -2242,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
cgrp->root = parent->root;
cgrp->top_cgroup = parent->top_cgroup;
+ if (notify_on_release(parent))
+ set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+
for_each_subsys(root, ss) {
struct cgroup_subsys_state *css = ss->create(ss, cgrp);
if (IS_ERR(css)) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e296ed81d4d..a1b61f414228 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -322,8 +322,8 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
* Call without callback_mutex or task_lock() held. May be
* called with or without cgroup_mutex held. Thanks in part to
* 'the_top_cpuset_hack', the task's cpuset pointer will never
- * be NULL. This routine also might acquire callback_mutex and
- * current->mm->mmap_sem during call.
+ * be NULL. This routine also might acquire callback_mutex during
+ * call.
*
* Reading current->cpuset->mems_generation doesn't need task_lock
* to guard the current->cpuset derefence, because it is guarded
diff --git a/kernel/exit.c b/kernel/exit.c
index 506a957b665a..53872bf993fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -214,20 +214,19 @@ struct pid *session_of_pgrp(struct pid *pgrp)
static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
{
struct task_struct *p;
- int ret = 1;
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
- if (p == ignored_task
- || p->exit_state
- || is_global_init(p->real_parent))
+ if ((p == ignored_task) ||
+ (p->exit_state && thread_group_empty(p)) ||
+ is_global_init(p->real_parent))
continue;
+
if (task_pgrp(p->real_parent) != pgrp &&
- task_session(p->real_parent) == task_session(p)) {
- ret = 0;
- break;
- }
+ task_session(p->real_parent) == task_session(p))
+ return 0;
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
- return ret; /* (sighing) "Often!" */
+
+ return 1;
}
int is_current_pgrp_orphaned(void)
@@ -255,6 +254,37 @@ static int has_stopped_jobs(struct pid *pgrp)
return retval;
}
+/*
+ * Check to see if any process groups have become orphaned as
+ * a result of our exiting, and if they have any stopped jobs,
+ * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
+ */
+static void
+kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
+{
+ struct pid *pgrp = task_pgrp(tsk);
+ struct task_struct *ignored_task = tsk;
+
+ if (!parent)
+ /* exit: our father is in a different pgrp than
+ * we are and we were the only connection outside.
+ */
+ parent = tsk->real_parent;
+ else
+ /* reparent: our child is in a different pgrp than
+ * we are, and it was the only connection outside.
+ */
+ ignored_task = NULL;
+
+ if (task_pgrp(parent) != pgrp &&
+ task_session(parent) == task_session(tsk) &&
+ will_become_orphaned_pgrp(pgrp, ignored_task) &&
+ has_stopped_jobs(pgrp)) {
+ __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
+ __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
+ }
+}
+
/**
* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
*
@@ -635,22 +665,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
p->exit_signal != -1 && thread_group_empty(p))
do_notify_parent(p, p->exit_signal);
- /*
- * process group orphan check
- * Case ii: Our child is in a different pgrp
- * than we are, and it was the only connection
- * outside, so the child pgrp is now orphaned.
- */
- if ((task_pgrp(p) != task_pgrp(father)) &&
- (task_session(p) == task_session(father))) {
- struct pid *pgrp = task_pgrp(p);
-
- if (will_become_orphaned_pgrp(pgrp, NULL) &&
- has_stopped_jobs(pgrp)) {
- __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
- __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
- }
- }
+ kill_orphaned_pgrp(p, father);
}
/*
@@ -735,11 +750,9 @@ static void forget_original_parent(struct task_struct *father)
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
-static void exit_notify(struct task_struct *tsk)
+static void exit_notify(struct task_struct *tsk, int group_dead)
{
int state;
- struct task_struct *t;
- struct pid *pgrp;
/*
* This does two things:
@@ -753,25 +766,8 @@ static void exit_notify(struct task_struct *tsk)
exit_task_namespaces(tsk);
write_lock_irq(&tasklist_lock);
- /*
- * Check to see if any process groups have become orphaned
- * as a result of our exiting, and if they have any stopped
- * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
- *
- * Case i: Our father is in a different pgrp than we are
- * and we were the only connection outside, so our pgrp
- * is about to become orphaned.
- */
- t = tsk->real_parent;
-
- pgrp = task_pgrp(tsk);
- if ((task_pgrp(t) != pgrp) &&
- (task_session(t) == task_session(tsk)) &&
- will_become_orphaned_pgrp(pgrp, tsk) &&
- has_stopped_jobs(pgrp)) {
- __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
- __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
- }
+ if (group_dead)
+ kill_orphaned_pgrp(tsk->group_leader, NULL);
/* Let father know we died
*
@@ -788,8 +784,8 @@ static void exit_notify(struct task_struct *tsk)
* the same after a fork.
*/
if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
- ( tsk->parent_exec_id != t->self_exec_id ||
- tsk->self_exec_id != tsk->parent_exec_id)
+ (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
+ tsk->self_exec_id != tsk->parent_exec_id)
&& !capable(CAP_KILL))
tsk->exit_signal = SIGCHLD;
@@ -986,7 +982,7 @@ NORET_TYPE void do_exit(long code)
module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
- exit_notify(tsk);
+ exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
mpol_free(tsk->mempolicy);
tsk->mempolicy = NULL;
@@ -1382,7 +1378,7 @@ unlock_sig:
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop)
- retval = put_user(why, &infop->si_code);
+ retval = put_user((short)why, &infop->si_code);
if (!retval && infop)
retval = put_user(exit_code, &infop->si_status);
if (!retval && infop)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7a86e6432338..fcfb580c3afc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
return 0;
}
+/*
+ * If we have a symbol_name argument, look it up and add the offset field
+ * to it. This way, we can specify a relative address to a symbol.
+ */
+static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
+{
+ kprobe_opcode_t *addr = p->addr;
+ if (p->symbol_name) {
+ if (addr)
+ return NULL;
+ kprobe_lookup_name(p->symbol_name, addr);
+ }
+
+ if (!addr)
+ return NULL;
+ return (kprobe_opcode_t *)(((char *)addr) + p->offset);
+}
+
static int __kprobes __register_kprobe(struct kprobe *p,
unsigned long called_from)
{
int ret = 0;
struct kprobe *old_p;
struct module *probed_mod;
+ kprobe_opcode_t *addr;
- /*
- * If we have a symbol_name argument look it up,
- * and add it to the address. That way the addr
- * field can either be global or relative to a symbol.
- */
- if (p->symbol_name) {
- if (p->addr)
- return -EINVAL;
- kprobe_lookup_name(p->symbol_name, p->addr);
- }
-
- if (!p->addr)
+ addr = kprobe_addr(p);
+ if (!addr)
return -EINVAL;
- p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
+ p->addr = addr;
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr))
@@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
unregister_kprobe(&jp->kp);
}
-#ifdef ARCH_SUPPORTS_KRETPROBES
-
+#ifdef CONFIG_KRETPROBES
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
* hits it will set up the return probe.
@@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
int ret = 0;
struct kretprobe_instance *inst;
int i;
- void *addr = rp->kp.addr;
+ void *addr;
if (kretprobe_blacklist_size) {
- if (addr == NULL)
- kprobe_lookup_name(rp->kp.symbol_name, addr);
- addr += rp->kp.offset;
+ addr = kprobe_addr(&rp->kp);
+ if (!addr)
+ return -EINVAL;
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
if (kretprobe_blacklist[i].addr == addr)
@@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
return ret;
}
-#else /* ARCH_SUPPORTS_KRETPROBES */
-
+#else /* CONFIG_KRETPROBES */
int __kprobes register_kretprobe(struct kretprobe *rp)
{
return -ENOSYS;
@@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
{
return 0;
}
-
-#endif /* ARCH_SUPPORTS_KRETPROBES */
+#endif /* CONFIG_KRETPROBES */
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3574379f4d62..81a4e4a3f087 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -779,6 +779,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
* parallel walking of the hash-list safe:
*/
list_add_tail_rcu(&class->hash_entry, hash_head);
+ /*
+ * Add it to the global list of classes:
+ */
+ list_add_tail_rcu(&class->lock_entry, &all_lock_classes);
if (verbose(class)) {
graph_unlock();
@@ -2282,10 +2286,6 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
break;
case LOCK_USED:
- /*
- * Add it to the global list of classes:
- */
- list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
debug_atomic_dec(&nr_unused_locks);
break;
default:
diff --git a/kernel/marker.c b/kernel/marker.c
index 50effc01d9a2..48a4ea5afffd 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name,
{
struct marker_entry *entry;
struct marker_probe_closure *old;
- int ret = 0;
+ int ret = -ENOENT;
mutex_lock(&markers_mutex);
entry = get_marker(name);
- if (!entry) {
- ret = -ENOENT;
+ if (!entry)
goto end;
- }
if (entry->rcu_pending)
rcu_barrier();
old = marker_entry_remove_probe(entry, probe, probe_private);
@@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name,
marker_update_probes(); /* may update entry */
mutex_lock(&markers_mutex);
entry = get_marker(name);
+ if (!entry)
+ goto end;
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
call_rcu(&entry->rcu, free_old_closure);
remove_marker(name); /* Ignore busy error message */
+ ret = 0;
end:
mutex_unlock(&markers_mutex);
return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 901cd6ac2f11..5d437bffd8dc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1933,8 +1933,15 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
+ /*
+ * ndiswrapper is under GPL by itself, but loads proprietary modules.
+ * Don't use add_taint_module(), as it would prevent ndiswrapper from
+ * using GPL-only symbols it needs.
+ */
if (strcmp(mod->name, "ndiswrapper") == 0)
- add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ add_taint(TAINT_PROPRIETARY_MODULE);
+
+ /* driverloader was caught wrongly pretending to be under GPL */
if (strcmp(mod->name, "driverloader") == 0)
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
@@ -2171,10 +2178,20 @@ sys_init_module(void __user *umod,
wake_up(&module_wq);
return ret;
}
+ if (ret > 0) {
+ printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
+ "it should follow 0/-E convention\n"
+ KERN_WARNING "%s: loading module anyway...\n",
+ __func__, mod->name, ret,
+ __func__);
+ dump_stack();
+ }
- /* Now it's a first class citizen! */
- mutex_lock(&module_mutex);
+ /* Now it's a first class citizen! Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
+ wake_up(&module_wq);
+
+ mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
unwind_remove_table(mod->unwind_info, 1);
@@ -2183,7 +2200,6 @@ sys_init_module(void __user *umod,
mod->init_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
- wake_up(&module_wq);
return 0;
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 79833170bb9c..6233f3b4ae66 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -190,7 +190,7 @@ config APM_EMULATION
notification of APM "events" (e.g. battery status change).
In order to use APM, you will need supporting software. For location
- and more information, read <file:Documentation/pm.txt> and the
+ and more information, read <file:Documentation/power/pm.txt> and the
Battery Powered Linux mini-HOWTO, available from
<http://www.tldp.org/docs.html#howto>.
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7c2118f9597f..f1d0b345c9ba 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -75,22 +75,15 @@ void refrigerator(void)
__set_current_state(save);
}
-static void fake_signal_wake_up(struct task_struct *p, int resume)
+static void fake_signal_wake_up(struct task_struct *p)
{
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
- signal_wake_up(p, resume);
+ signal_wake_up(p, 0);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
-static void send_fake_signal(struct task_struct *p)
-{
- if (task_is_stopped(p))
- force_sig_specific(SIGSTOP, p);
- fake_signal_wake_up(p, task_is_stopped(p));
-}
-
static int has_mm(struct task_struct *p)
{
return (p->mm && !(p->flags & PF_BORROWED_MM));
@@ -121,7 +114,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only)
if (freezing(p)) {
if (has_mm(p)) {
if (!signal_pending(p))
- fake_signal_wake_up(p, 0);
+ fake_signal_wake_up(p);
} else {
if (with_mm_only)
ret = 0;
@@ -135,7 +128,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only)
} else {
if (has_mm(p)) {
set_freeze_flag(p);
- send_fake_signal(p);
+ fake_signal_wake_up(p);
} else {
if (with_mm_only) {
ret = 0;
@@ -182,15 +175,17 @@ static int try_to_freeze_tasks(int freeze_user_space)
if (frozen(p) || !freezeable(p))
continue;
- if (task_is_traced(p) && frozen(p->parent)) {
- cancel_freezing(p);
- continue;
- }
-
if (!freeze_task(p, freeze_user_space))
continue;
- if (!freezer_should_skip(p))
+ /*
+ * Now that we've done set_freeze_flag, don't
+ * perturb a task in TASK_STOPPED or TASK_TRACED.
+ * It is "frozen enough". If the task does wake
+ * up, it will immediately call try_to_freeze.
+ */
+ if (!task_is_stopped_or_traced(p) &&
+ !freezer_should_skip(p))
todo++;
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 72a020cabb4c..5f91a07c4eac 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -447,7 +447,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
* of @bm->cur_zone_bm are updated.
*/
-static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
+static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
void **addr, unsigned int *bit_nr)
{
struct zone_bitmap *zone_bm;
@@ -461,7 +461,8 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
zone_bm = zone_bm->next;
- BUG_ON(!zone_bm);
+ if (!zone_bm)
+ return -EFAULT;
}
bm->cur.zone_bm = zone_bm;
}
@@ -479,23 +480,40 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
pfn -= bb->start_pfn;
*bit_nr = pfn % BM_BITS_PER_CHUNK;
*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+ return 0;
}
static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
{
void *addr;
unsigned int bit;
+ int error;
- memory_bm_find_bit(bm, pfn, &addr, &bit);
+ error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+ BUG_ON(error);
set_bit(bit, addr);
}
+static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+ int error;
+
+ error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+ if (!error)
+ set_bit(bit, addr);
+ return error;
+}
+
static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
{
void *addr;
unsigned int bit;
+ int error;
- memory_bm_find_bit(bm, pfn, &addr, &bit);
+ error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+ BUG_ON(error);
clear_bit(bit, addr);
}
@@ -503,8 +521,10 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
{
void *addr;
unsigned int bit;
+ int error;
- memory_bm_find_bit(bm, pfn, &addr, &bit);
+ error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+ BUG_ON(error);
return test_bit(bit, addr);
}
@@ -709,8 +729,15 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
region->end_pfn << PAGE_SHIFT);
for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
- if (pfn_valid(pfn))
- memory_bm_set_bit(bm, pfn);
+ if (pfn_valid(pfn)) {
+ /*
+ * It is safe to ignore the result of
+ * mem_bm_set_bit_check() here, since we won't
+ * touch the PFNs for which the error is
+ * returned anyway.
+ */
+ mem_bm_set_bit_check(bm, pfn);
+ }
}
}
diff --git a/kernel/printk.c b/kernel/printk.c
index bee36100f110..9adc2a473e6e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -666,7 +666,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
}
/* Emit the output into the temporary buffer */
printed_len += vscnprintf(printk_buf + printed_len,
- sizeof(printk_buf), fmt, args);
+ sizeof(printk_buf) - printed_len, fmt, args);
/*
* Copy the output into log_buf. If the caller didn't provide
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 987cfb7ade89..e9517014b57c 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -23,6 +23,10 @@
* to Suparna Bhattacharya for pushing me completely away
* from atomic instructions on the read side.
*
+ * - Added handling of Dynamic Ticks
+ * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
+ * - Steven Rostedt <srostedt@redhat.com>
+ *
* Papers: http://www.rdrop.com/users/paulmck/RCU
*
* Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
}
}
+#ifdef CONFIG_NO_HZ
+
+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+static DEFINE_PER_CPU(int, rcu_update_flag);
+
+/**
+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * dynticks_progress_counter to let the RCU handling know that the
+ * CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+ int cpu = smp_processor_id();
+
+ if (per_cpu(rcu_update_flag, cpu))
+ per_cpu(rcu_update_flag, cpu)++;
+
+ /*
+ * Only update if we are coming from a stopped ticks mode
+ * (dynticks_progress_counter is even).
+ */
+ if (!in_interrupt() &&
+ (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+ /*
+ * The following might seem like we could have a race
+ * with NMI/SMIs. But this really isn't a problem.
+ * Here we do a read/modify/write, and the race happens
+ * when an NMI/SMI comes in after the read and before
+ * the write. But NMI/SMIs will increment this counter
+ * twice before returning, so the zero bit will not
+ * be corrupted by the NMI/SMI which is the most important
+ * part.
+ *
+ * The only thing is that we would bring back the counter
+ * to a postion that it was in during the NMI/SMI.
+ * But the zero bit would be set, so the rest of the
+ * counter would again be ignored.
+ *
+ * On return from the IRQ, the counter may have the zero
+ * bit be 0 and the counter the same as the return from
+ * the NMI/SMI. If the state machine was so unlucky to
+ * see that, it still doesn't matter, since all
+ * RCU read-side critical sections on this CPU would
+ * have already completed.
+ */
+ per_cpu(dynticks_progress_counter, cpu)++;
+ /*
+ * The following memory barrier ensures that any
+ * rcu_read_lock() primitives in the irq handler
+ * are seen by other CPUs to follow the above
+ * increment to dynticks_progress_counter. This is
+ * required in order for other CPUs to correctly
+ * determine when it is safe to advance the RCU
+ * grace-period state machine.
+ */
+ smp_mb(); /* see above block comment. */
+ /*
+ * Since we can't determine the dynamic tick mode from
+ * the dynticks_progress_counter after this routine,
+ * we use a second flag to acknowledge that we came
+ * from an idle state with ticks stopped.
+ */
+ per_cpu(rcu_update_flag, cpu)++;
+ /*
+ * If we take an NMI/SMI now, they will also increment
+ * the rcu_update_flag, and will not update the
+ * dynticks_progress_counter on exit. That is for
+ * this IRQ to do.
+ */
+ }
+}
+
+/**
+ * rcu_irq_exit - Called from exiting Hard irq context.
+ *
+ * If the CPU was idle with dynamic ticks active, update the
+ * dynticks_progress_counter to put let the RCU handling be
+ * aware that the CPU is going back to idle with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * rcu_update_flag is set if we interrupted the CPU
+ * when it was idle with ticks stopped.
+ * Once this occurs, we keep track of interrupt nesting
+ * because a NMI/SMI could also come in, and we still
+ * only want the IRQ that started the increment of the
+ * dynticks_progress_counter to be the one that modifies
+ * it on exit.
+ */
+ if (per_cpu(rcu_update_flag, cpu)) {
+ if (--per_cpu(rcu_update_flag, cpu))
+ return;
+
+ /* This must match the interrupt nesting */
+ WARN_ON(in_interrupt());
+
+ /*
+ * If an NMI/SMI happens now we are still
+ * protected by the dynticks_progress_counter being odd.
+ */
+
+ /*
+ * The following memory barrier ensures that any
+ * rcu_read_unlock() primitives in the irq handler
+ * are seen by other CPUs to preceed the following
+ * increment to dynticks_progress_counter. This
+ * is required in order for other CPUs to determine
+ * when it is safe to advance the RCU grace-period
+ * state machine.
+ */
+ smp_mb(); /* see above block comment. */
+ per_cpu(dynticks_progress_counter, cpu)++;
+ WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+ }
+}
+
+static void dyntick_save_progress_counter(int cpu)
+{
+ per_cpu(rcu_dyntick_snapshot, cpu) =
+ per_cpu(dynticks_progress_counter, cpu);
+}
+
+static inline int
+rcu_try_flip_waitack_needed(int cpu)
+{
+ long curr;
+ long snap;
+
+ curr = per_cpu(dynticks_progress_counter, cpu);
+ snap = per_cpu(rcu_dyntick_snapshot, cpu);
+ smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+ /*
+ * If the CPU remained in dynticks mode for the entire time
+ * and didn't take any interrupts, NMIs, SMIs, or whatever,
+ * then it cannot be in the middle of an rcu_read_lock(), so
+ * the next rcu_read_lock() it executes must use the new value
+ * of the counter. So we can safely pretend that this CPU
+ * already acknowledged the counter.
+ */
+
+ if ((curr == snap) && ((curr & 0x1) == 0))
+ return 0;
+
+ /*
+ * If the CPU passed through or entered a dynticks idle p