diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.preempt | 15 | ||||
| -rw-r--r-- | kernel/audit.c | 19 | ||||
| -rw-r--r-- | kernel/auditsc.c | 2 | ||||
| -rw-r--r-- | kernel/cgroup.c | 4 | ||||
| -rw-r--r-- | kernel/cpuset.c | 4 | ||||
| -rw-r--r-- | kernel/exit.c | 98 | ||||
| -rw-r--r-- | kernel/kprobes.c | 52 | ||||
| -rw-r--r-- | kernel/lockdep.c | 8 | ||||
| -rw-r--r-- | kernel/marker.c | 9 | ||||
| -rw-r--r-- | kernel/module.c | 24 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/power/process.c | 29 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 41 | ||||
| -rw-r--r-- | kernel/printk.c | 2 | ||||
| -rw-r--r-- | kernel/rcupreempt.c | 233 | ||||
| -rw-r--r-- | kernel/res_counter.c | 1 | ||||
| -rw-r--r-- | kernel/sched.c | 382 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 228 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 10 | ||||
| -rw-r--r-- | kernel/signal.c | 16 | ||||
| -rw-r--r-- | kernel/softirq.c | 1 | ||||
| -rw-r--r-- | kernel/softlockup.c | 13 | ||||
| -rw-r--r-- | kernel/sysctl.c | 18 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 23 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 5 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 6 |
26 files changed, 670 insertions, 575 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 0669b70fa6a3..9fdba03dc1fc 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,8 +52,23 @@ config PREEMPT endchoice +config PREEMPT_RCU + bool "Preemptible RCU" + depends on PREEMPT + default n + help + This option reduces the latency of the kernel by making certain + RCU sections preemptible. Normally RCU code is non-preemptible, if + this option is selected then read-only RCU sections become + preemptible. This helps latency, but may expose bugs due to + now-naive assumptions about each RCU read-side critical section + remaining on a given CPU through its execution. + + Say N if you are unsure. + config RCU_TRACE bool "Enable tracing for RCU - currently stats in debugfs" + depends on PREEMPT_RCU select DEBUG_FS default y help diff --git a/kernel/audit.c b/kernel/audit.c index 2eeea9a14240..10c4930c2bbf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -170,7 +170,9 @@ void audit_panic(const char *message) printk(KERN_ERR "audit: %s\n", message); break; case AUDIT_FAIL_PANIC: - panic("audit: %s\n", message); + /* test audit_pid since printk is always losey, why bother? */ + if (audit_pid) + panic("audit: %s\n", message); break; } } @@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy) if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); + audit_log_lost("auditd dissapeared\n"); audit_pid = 0; } } else { @@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { + struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); if (audit_pid) { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); skb_queue_tail(&audit_skb_queue, ab->skb); ab->skb = NULL; wake_up_interruptible(&kauditd_wait); - } else if (printk_ratelimit()) { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0)); - } else { - audit_log_lost("printk limit exceeded\n"); + } else if (nlh->nlmsg_type != AUDIT_EOE) { + if (printk_ratelimit()) { + printk(KERN_NOTICE "type=%d %s\n", + nlh->nlmsg_type, + ab->skb->data + NLMSG_SPACE(0)); + } else + audit_log_lost("printk limit exceeded\n"); } } audit_buffer_free(ab); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2087d6de67ea..782262e4107d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1070,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * so we can be sure nothing was lost. */ if ((i == 0) && (too_long)) - audit_log_format(*ab, "a%d_len=%ld ", arg_num, + audit_log_format(*ab, "a%d_len=%zu ", arg_num, has_cntl ? 2*len : len); /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d8abe996e009..e9c2fb01e89b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2232,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, mutex_lock(&cgroup_mutex); - cgrp->flags = 0; INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->css_sets); @@ -2242,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, cgrp->root = parent->root; cgrp->top_cgroup = parent->top_cgroup; + if (notify_on_release(parent)) + set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); if (IS_ERR(css)) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e296ed81d4d..a1b61f414228 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -322,8 +322,8 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * Call without callback_mutex or task_lock() held. May be * called with or without cgroup_mutex held. Thanks in part to * 'the_top_cpuset_hack', the task's cpuset pointer will never - * be NULL. This routine also might acquire callback_mutex and - * current->mm->mmap_sem during call. + * be NULL. This routine also might acquire callback_mutex during + * call. * * Reading current->cpuset->mems_generation doesn't need task_lock * to guard the current->cpuset derefence, because it is guarded diff --git a/kernel/exit.c b/kernel/exit.c index 506a957b665a..53872bf993fa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -214,20 +214,19 @@ struct pid *session_of_pgrp(struct pid *pgrp) static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) { struct task_struct *p; - int ret = 1; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - if (p == ignored_task - || p->exit_state - || is_global_init(p->real_parent)) + if ((p == ignored_task) || + (p->exit_state && thread_group_empty(p)) || + is_global_init(p->real_parent)) continue; + if (task_pgrp(p->real_parent) != pgrp && - task_session(p->real_parent) == task_session(p)) { - ret = 0; - break; - } + task_session(p->real_parent) == task_session(p)) + return 0; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); - return ret; /* (sighing) "Often!" */ + + return 1; } int is_current_pgrp_orphaned(void) @@ -255,6 +254,37 @@ static int has_stopped_jobs(struct pid *pgrp) return retval; } +/* + * Check to see if any process groups have become orphaned as + * a result of our exiting, and if they have any stopped jobs, + * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + */ +static void +kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) +{ + struct pid *pgrp = task_pgrp(tsk); + struct task_struct *ignored_task = tsk; + + if (!parent) + /* exit: our father is in a different pgrp than + * we are and we were the only connection outside. + */ + parent = tsk->real_parent; + else + /* reparent: our child is in a different pgrp than + * we are, and it was the only connection outside. + */ + ignored_task = NULL; + + if (task_pgrp(parent) != pgrp && + task_session(parent) == task_session(tsk) && + will_become_orphaned_pgrp(pgrp, ignored_task) && + has_stopped_jobs(pgrp)) { + __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); + __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); + } +} + /** * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd * @@ -635,22 +665,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) p->exit_signal != -1 && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); - /* - * process group orphan check - * Case ii: Our child is in a different pgrp - * than we are, and it was the only connection - * outside, so the child pgrp is now orphaned. - */ - if ((task_pgrp(p) != task_pgrp(father)) && - (task_session(p) == task_session(father))) { - struct pid *pgrp = task_pgrp(p); - - if (will_become_orphaned_pgrp(pgrp, NULL) && - has_stopped_jobs(pgrp)) { - __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); - __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); - } - } + kill_orphaned_pgrp(p, father); } /* @@ -735,11 +750,9 @@ static void forget_original_parent(struct task_struct *father) * Send signals to all our closest relatives so that they know * to properly mourn us.. */ -static void exit_notify(struct task_struct *tsk) +static void exit_notify(struct task_struct *tsk, int group_dead) { int state; - struct task_struct *t; - struct pid *pgrp; /* * This does two things: @@ -753,25 +766,8 @@ static void exit_notify(struct task_struct *tsk) exit_task_namespaces(tsk); write_lock_irq(&tasklist_lock); - /* - * Check to see if any process groups have become orphaned - * as a result of our exiting, and if they have any stopped - * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) - * - * Case i: Our father is in a different pgrp than we are - * and we were the only connection outside, so our pgrp - * is about to become orphaned. - */ - t = tsk->real_parent; - - pgrp = task_pgrp(tsk); - if ((task_pgrp(t) != pgrp) && - (task_session(t) == task_session(tsk)) && - will_become_orphaned_pgrp(pgrp, tsk) && - has_stopped_jobs(pgrp)) { - __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); - __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); - } + if (group_dead) + kill_orphaned_pgrp(tsk->group_leader, NULL); /* Let father know we died * @@ -788,8 +784,8 @@ static void exit_notify(struct task_struct *tsk) * the same after a fork. */ if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && - ( tsk->parent_exec_id != t->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) + (tsk->parent_exec_id != tsk->real_parent->self_exec_id || + tsk->self_exec_id != tsk->parent_exec_id) && !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; @@ -986,7 +982,7 @@ NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); proc_exit_connector(tsk); - exit_notify(tsk); + exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; @@ -1382,7 +1378,7 @@ unlock_sig: if (!retval && infop) retval = put_user(0, &infop->si_errno); if (!retval && infop) - retval = put_user(why, &infop->si_code); + retval = put_user((short)why, &infop->si_code); if (!retval && infop) retval = put_user(exit_code, &infop->si_status); if (!retval && infop) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e6432338..fcfb580c3afc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr) return 0; } +/* + * If we have a symbol_name argument, look it up and add the offset field + * to it. This way, we can specify a relative address to a symbol. + */ +static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) +{ + kprobe_opcode_t *addr = p->addr; + if (p->symbol_name) { + if (addr) + return NULL; + kprobe_lookup_name(p->symbol_name, addr); + } + + if (!addr) + return NULL; + return (kprobe_opcode_t *)(((char *)addr) + p->offset); +} + static int __kprobes __register_kprobe(struct kprobe *p, unsigned long called_from) { int ret = 0; struct kprobe *old_p; struct module *probed_mod; + kprobe_opcode_t *addr; - /* - * If we have a symbol_name argument look it up, - * and add it to the address. That way the addr - * field can either be global or relative to a symbol. - */ - if (p->symbol_name) { - if (p->addr) - return -EINVAL; - kprobe_lookup_name(p->symbol_name, p->addr); - } - - if (!p->addr) + addr = kprobe_addr(p); + if (!addr) return -EINVAL; - p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset); + p->addr = addr; if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr)) @@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) unregister_kprobe(&jp->kp); } -#ifdef ARCH_SUPPORTS_KRETPROBES - +#ifdef CONFIG_KRETPROBES /* * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. @@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp) int ret = 0; struct kretprobe_instance *inst; int i; - void *addr = rp->kp.addr; + void *addr; if (kretprobe_blacklist_size) { - if (addr == NULL) - kprobe_lookup_name(rp->kp.symbol_name, addr); - addr += rp->kp.offset; + addr = kprobe_addr(&rp->kp); + if (!addr) + return -EINVAL; for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { if (kretprobe_blacklist[i].addr == addr) @@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) return ret; } -#else /* ARCH_SUPPORTS_KRETPROBES */ - +#else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; @@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, { return 0; } - -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ void __kprobes unregister_kretprobe(struct kretprobe *rp) { diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3574379f4d62..81a4e4a3f087 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -779,6 +779,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * parallel walking of the hash-list safe: */ list_add_tail_rcu(&class->hash_entry, hash_head); + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&class->lock_entry, &all_lock_classes); if (verbose(class)) { graph_unlock(); @@ -2282,10 +2286,6 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; break; case LOCK_USED: - /* - * Add it to the global list of classes: - */ - list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); debug_atomic_dec(&nr_unused_locks); break; default: diff --git a/kernel/marker.c b/kernel/marker.c index 50effc01d9a2..48a4ea5afffd 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name, { struct marker_entry *entry; struct marker_probe_closure *old; - int ret = 0; + int ret = -ENOENT; mutex_lock(&markers_mutex); entry = get_marker(name); - if (!entry) { - ret = -ENOENT; + if (!entry) goto end; - } if (entry->rcu_pending) rcu_barrier(); old = marker_entry_remove_probe(entry, probe, probe_private); @@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name, marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); entry = get_marker(name); + if (!entry) + goto end; entry->oldptr = old; entry->rcu_pending = 1; /* write rcu_pending before calling the RCU callback */ smp_wmb(); call_rcu(&entry->rcu, free_old_closure); remove_marker(name); /* Ignore busy error message */ + ret = 0; end: mutex_unlock(&markers_mutex); return ret; diff --git a/kernel/module.c b/kernel/module.c index 901cd6ac2f11..5d437bffd8dc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1933,8 +1933,15 @@ static struct module *load_module(void __user *umod, /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); + /* + * ndiswrapper is under GPL by itself, but loads proprietary modules. + * Don't use add_taint_module(), as it would prevent ndiswrapper from + * using GPL-only symbols it needs. + */ if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint(TAINT_PROPRIETARY_MODULE); + + /* driverloader was caught wrongly pretending to be under GPL */ if (strcmp(mod->name, "driverloader") == 0) add_taint_module(mod, TAINT_PROPRIETARY_MODULE); @@ -2171,10 +2178,20 @@ sys_init_module(void __user *umod, wake_up(&module_wq); return ret; } + if (ret > 0) { + printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " + "it should follow 0/-E convention\n" + KERN_WARNING "%s: loading module anyway...\n", + __func__, mod->name, ret, + __func__); + dump_stack(); + } - /* Now it's a first class citizen! */ - mutex_lock(&module_mutex); + /* Now it's a first class citizen! Wake up anyone waiting for it. */ mod->state = MODULE_STATE_LIVE; + wake_up(&module_wq); + + mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); unwind_remove_table(mod->unwind_info, 1); @@ -2183,7 +2200,6 @@ sys_init_module(void __user *umod, mod->init_size = 0; mod->init_text_size = 0; mutex_unlock(&module_mutex); - wake_up(&module_wq); return 0; } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 79833170bb9c..6233f3b4ae66 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -190,7 +190,7 @@ config APM_EMULATION notification of APM "events" (e.g. battery status change). In order to use APM, you will need supporting software. For location - and more information, read <file:Documentation/pm.txt> and the + and more information, read <file:Documentation/power/pm.txt> and the Battery Powered Linux mini-HOWTO, available from <http://www.tldp.org/docs.html#howto>. diff --git a/kernel/power/process.c b/kernel/power/process.c index 7c2118f9597f..f1d0b345c9ba 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -75,22 +75,15 @@ void refrigerator(void) __set_current_state(save); } -static void fake_signal_wake_up(struct task_struct *p, int resume) +static void fake_signal_wake_up(struct task_struct *p) { unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, resume); + signal_wake_up(p, 0); spin_unlock_irqrestore(&p->sighand->siglock, flags); } -static void send_fake_signal(struct task_struct *p) -{ - if (task_is_stopped(p)) - force_sig_specific(SIGSTOP, p); - fake_signal_wake_up(p, task_is_stopped(p)); -} - static int has_mm(struct task_struct *p) { return (p->mm && !(p->flags & PF_BORROWED_MM)); @@ -121,7 +114,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) if (freezing(p)) { if (has_mm(p)) { if (!signal_pending(p)) - fake_signal_wake_up(p, 0); + fake_signal_wake_up(p); } else { if (with_mm_only) ret = 0; @@ -135,7 +128,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) } else { if (has_mm(p)) { set_freeze_flag(p); - send_fake_signal(p); + fake_signal_wake_up(p); } else { if (with_mm_only) { ret = 0; @@ -182,15 +175,17 @@ static int try_to_freeze_tasks(int freeze_user_space) if (frozen(p) || !freezeable(p)) continue; - if (task_is_traced(p) && frozen(p->parent)) { - cancel_freezing(p); - continue; - } - if (!freeze_task(p, freeze_user_space)) continue; - if (!freezer_should_skip(p)) + /* + * Now that we've done set_freeze_flag, don't + * perturb a task in TASK_STOPPED or TASK_TRACED. + * It is "frozen enough". If the task does wake + * up, it will immediately call try_to_freeze. + */ + if (!task_is_stopped_or_traced(p) && + !freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 72a020cabb4c..5f91a07c4eac 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -447,7 +447,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) * of @bm->cur_zone_bm are updated. */ -static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, void **addr, unsigned int *bit_nr) { struct zone_bitmap *zone_bm; @@ -461,7 +461,8 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { zone_bm = zone_bm->next; - BUG_ON(!zone_bm); + if (!zone_bm) + return -EFAULT; } bm->cur.zone_bm = zone_bm; } @@ -479,23 +480,40 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, pfn -= bb->start_pfn; *bit_nr = pfn % BM_BITS_PER_CHUNK; *addr = bb->data + pfn / BM_BITS_PER_CHUNK; + return 0; } static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; + int error; - memory_bm_find_bit(bm, pfn, &addr, &bit); + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); set_bit(bit, addr); } +static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + if (!error) + set_bit(bit, addr); + return error; +} + static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; + int error; - memory_bm_find_bit(bm, pfn, &addr, &bit); + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); clear_bit(bit, addr); } @@ -503,8 +521,10 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; + int error; - memory_bm_find_bit(bm, pfn, &addr, &bit); + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); return test_bit(bit, addr); } @@ -709,8 +729,15 @@ static void mark_nosave_pages(struct memory_bitmap *bm) region->end_pfn << PAGE_SHIFT); for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) - if (pfn_valid(pfn)) - memory_bm_set_bit(bm, pfn); + if (pfn_valid(pfn)) { + /* + * It is safe to ignore the result of + * mem_bm_set_bit_check() here, since we won't + * touch the PFNs for which the error is + * returned anyway. + */ + mem_bm_set_bit_check(bm, pfn); + } } } diff --git a/kernel/printk.c b/kernel/printk.c index bee36100f110..9adc2a473e6e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -666,7 +666,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) } /* Emit the output into the temporary buffer */ printed_len += vscnprintf(printk_buf + printed_len, - sizeof(printk_buf), fmt, args); + sizeof(printk_buf) - printed_len, fmt, args); /* * Copy the output into log_buf. If the caller didn't provide diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade89..e9517014b57c 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -23,6 +23,10 @@ * to Suparna Bhattacharya for pushing me completely away * from atomic instructions on the read side. * + * - Added handling of Dynamic Ticks + * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com> + * - Steven Rostedt <srostedt@redhat.com> + * * Papers: http://www.rdrop.com/users/paulmck/RCU * * Design Document: http://lwn.net/Articles/253651/ @@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) } } +#ifdef CONFIG_NO_HZ + +DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; +static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); +static DEFINE_PER_CPU(int, rcu_update_flag); + +/** + * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. + * + * If the CPU was idle with dynamic ticks active, this updates the + * dynticks_progress_counter to let the RCU handling know that the + * CPU is active. + */ +void rcu_irq_enter(void) +{ + int cpu = smp_processor_id(); + + if (per_cpu(rcu_update_flag, cpu)) + per_cpu(rcu_update_flag, cpu)++; + + /* + * Only update if we are coming from a stopped ticks mode + * (dynticks_progress_counter is even). + */ + if (!in_interrupt() && + (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { + /* + * The following might seem like we could have a race + * with NMI/SMIs. But this really isn't a problem. + * Here we do a read/modify/write, and the race happens + * when an NMI/SMI comes in after the read and before + * the write. But NMI/SMIs will increment this counter + * twice before returning, so the zero bit will not + * be corrupted by the NMI/SMI which is the most important + * part. + * + * The only thing is that we would bring back the counter + * to a postion that it was in during the NMI/SMI. + * But the zero bit would be set, so the rest of the + * counter would again be ignored. + * + * On return from the IRQ, the counter may have the zero + * bit be 0 and the counter the same as the return from + * the NMI/SMI. If the state machine was so unlucky to + * see that, it still doesn't matter, since all + * RCU read-side critical sections on this CPU would + * have already completed. + */ + per_cpu(dynticks_progress_counter, cpu)++; + /* + * The following memory barrier ensures that any + * rcu_read_lock() primitives in the irq handler + * are seen by other CPUs to follow the above + * increment to dynticks_progress_counter. This is + * required in order for other CPUs to correctly + * determine when it is safe to advance the RCU + * grace-period state machine. + */ + smp_mb(); /* see above block comment. */ + /* + * Since we can't determine the dynamic tick mode from + * the dynticks_progress_counter after this routine, + * we use a second flag to acknowledge that we came + * from an idle state with ticks stopped. + */ + per_cpu(rcu_update_flag, cpu)++; + /* + * If we take an NMI/SMI now, they will also increment + * the rcu_update_flag, and will not update the + * dynticks_progress_counter on exit. That is for + * this IRQ to do. + */ + } +} + +/** + * rcu_irq_exit - Called from exiting Hard irq context. + * + * If the CPU was idle with dynamic ticks active, update the + * dynticks_progress_counter to put let the RCU handling be + * aware that the CPU is going back to idle with no ticks. + */ +void rcu_irq_exit(void) +{ + int cpu = smp_processor_id(); + + /* + * rcu_update_flag is set if we interrupted the CPU + * when it was idle with ticks stopped. + * Once this occurs, we keep track of interrupt nesting + * because a NMI/SMI could also come in, and we still + * only want the IRQ that started the increment of the + * dynticks_progress_counter to be the one that modifies + * it on exit. + */ + if (per_cpu(rcu_update_flag, cpu)) { + if (--per_cpu(rcu_update_flag, cpu)) + return; + + /* This must match the interrupt nesting */ + WARN_ON(in_interrupt()); + + /* + * If an NMI/SMI happens now we are still + * protected by the dynticks_progress_counter being odd. + */ + + /* + * The following memory barrier ensures that any + * rcu_read_unlock() primitives in the irq handler + * are seen by other CPUs to preceed the following + * increment to dynticks_progress_counter. This + * is required in order for other CPUs to determine + * when it is safe to advance the RCU grace-period + * state machine. + */ + smp_mb(); /* see above block comment. */ + per_cpu(dynticks_progress_counter, cpu)++; + WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); + } +} + +static void dyntick_save_progress_counter(int cpu) +{ + per_cpu(rcu_dyntick_snapshot, cpu) = + per_cpu(dynticks_progress_counter, cpu); +} + +static inline int +rcu_try_flip_waitack_needed(int cpu) +{ + long curr; + long snap; + + curr = per_cpu(dynticks_progress_counter, cpu); + snap = per_cpu(rcu_dyntick_snapshot, cpu); + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot be in the middle of an rcu_read_lock(), so + * the next rcu_read_lock() it executes must use the new value + * of the counter. So we can safely pretend that this CPU + * already acknowledged the counter. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU passed through or entered a dynticks idle p |
