X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Frcupdate.c;h=2058f88c7bbb3d9c9d0c7e195f3d67f25d8f64ed;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=f436993bd590a4b0bb145d4402d52cb2aaeb7c76;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f436993bd..2058f88c7 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -46,31 +47,64 @@ #include #include #include +#include /* Definition for rcupdate control block. */ -struct rcu_ctrlblk rcu_ctrlblk = - { .cur = -300, .completed = -300 }; -struct rcu_ctrlblk rcu_bh_ctrlblk = - { .cur = -300, .completed = -300 }; - -/* Bookkeeping of the progress of the grace period */ -struct rcu_state { - spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, }; - -static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; +#ifdef CONFIG_SMP +static int rsinterval = 1000; +#endif + +static atomic_t rcu_barrier_cpu_count; +static DEFINE_MUTEX(rcu_barrier_mutex); +static struct completion rcu_barrier_completion; + +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { + rdp->last_rs_qlen = rdp->qlen; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} +#endif /** * call_rcu - Queue an RCU callback for invocation after a grace period. @@ -95,6 +129,10 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } local_irq_restore(flags); } @@ -126,9 +164,60 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + local_irq_restore(flags); } +/* + * Return the number of RCU batches processed thus far. Useful + * for debug and statistics. + */ +long rcu_batches_completed(void) +{ + return rcu_ctrlblk.completed; +} + +static void rcu_barrier_callback(struct rcu_head *notused) +{ + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + complete(&rcu_barrier_completion); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_func(void *notused) +{ + int cpu = smp_processor_id(); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_head *head; + + head = &rdp->barrier; + atomic_inc(&rcu_barrier_cpu_count); + call_rcu(head, rcu_barrier_callback); +} + +/** + * rcu_barrier - Wait until all the in-flight RCUs are complete. + */ +void rcu_barrier(void) +{ + BUG_ON(in_interrupt()); + /* Take cpucontrol mutex to protect against CPU hotplug */ + mutex_lock(&rcu_barrier_mutex); + init_completion(&rcu_barrier_completion); + atomic_set(&rcu_barrier_cpu_count, 0); + on_each_cpu(rcu_barrier_func, NULL, 0, 1); + wait_for_completion(&rcu_barrier_completion); + mutex_unlock(&rcu_barrier_mutex); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + /* * Invoke the completed RCU callbacks. They are expected to be in * a per-cpu list. @@ -143,9 +232,12 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; - if (++count >= maxbatch) + rdp->qlen--; + if (++count >= rdp->blimit) break; } + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; if (!rdp->donelist) rdp->donetail = &rdp->donelist; else @@ -159,13 +251,13 @@ static void rcu_do_batch(struct rcu_data *rdp) * This is done by rcu_start_batch. The start is not broadcasted to * all cpus, they must pick this up by comparing rcp->cur with * rdp->quiescbatch. All cpus are recorded in the - * rcu_state.cpumask bitmap. + * rcu_ctrlblk.cpumask bitmap. * - All cpus must go through a quiescent state. * Since the start of the grace period is not broadcasted, at least two * calls to rcu_check_quiescent_state are required: * The first call just notices that a new grace period is running. The * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_state.cpumask. If + * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If * the bitmap is empty, then the grace period is completed. * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace * period (if necessary). @@ -173,25 +265,29 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Register a new batch of callbacks, and start it up if there is currently no * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_state.lock. + * Caller must hold rcu_ctrlblk.lock. */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, - int next_pending) +static void rcu_start_batch(struct rcu_ctrlblk *rcp) { - if (next_pending) - rcp->next_pending = 1; - if (rcp->next_pending && rcp->completed == rcp->cur) { - /* Can't change, since spin lock held. */ - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); - rcp->next_pending = 0; - /* next_pending == 0 must be visible in __rcu_process_callbacks() - * before it can see new value of cur. + /* + * next_pending == 0 must be visible in + * __rcu_process_callbacks() before it can see new value of cur. */ smp_wmb(); rcp->cur++; + + /* + * Accessing nohz_cpu_mask before incrementing rcp->cur needs a + * Barrier Otherwise it can cause tickless idle CPUs to be + * included in rcp->cpumask, which will extend graceperiods + * unnecessarily. + */ + smp_mb(); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); + } } @@ -200,13 +296,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, * Clear it from the cpu mask and complete the grace period if it was the last * cpu. Start another grace period if someone has further entries pending */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) { - cpu_clear(cpu, rsp->cpumask); - if (cpus_empty(rsp->cpumask)) { + cpu_clear(cpu, rcp->cpumask); + if (cpus_empty(rcp->cpumask)) { /* batch completed ! */ rcp->completed = rcp->cur; - rcu_start_batch(rcp, rsp, 0); + rcu_start_batch(rcp); } } @@ -216,7 +312,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * quiescent cycle, then indicate that it has done so. */ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->quiescbatch != rcp->cur) { /* start new grace period: */ @@ -241,15 +337,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, return; rdp->qs_pending = 0; - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. */ if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp, rsp); + cpu_quiet(rdp->cpu, rcp); - spin_unlock(&rsp->lock); + spin_unlock(&rcp->lock); } @@ -270,28 +366,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, } static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { /* if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ - spin_lock_bh(&rsp->lock); + spin_lock_bh(&rcp->lock); if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp, rsp); - spin_unlock_bh(&rsp->lock); + cpu_quiet(rdp->cpu, rcp); + spin_unlock_bh(&rcp->lock); rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); - + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); } + static void rcu_offline_cpu(int cpu) { struct rcu_data *this_rdp = &get_cpu_var(rcu_data); struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, + __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, + __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); put_cpu_var(rcu_data); put_cpu_var(rcu_bh_data); @@ -310,7 +407,7 @@ static void rcu_offline_cpu(int cpu) * This does the RCU processing work from tasklet context. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { *rdp->donetail = rdp->curlist; @@ -319,8 +416,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, rdp->curtail = &rdp->curlist; } - local_irq_disable(); if (rdp->nxtlist && !rdp->curlist) { + local_irq_disable(); rdp->curlist = rdp->nxtlist; rdp->curtail = rdp->nxttail; rdp->nxtlist = NULL; @@ -340,24 +437,71 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (!rcp->next_pending) { /* and start it/schedule start if it's a new batch */ - spin_lock(&rsp->lock); - rcu_start_batch(rcp, rsp, 1); - spin_unlock(&rsp->lock); + spin_lock(&rcp->lock); + rcp->next_pending = 1; + rcu_start_batch(rcp); + spin_unlock(&rcp->lock); } - } else { - local_irq_enable(); } - rcu_check_quiescent_state(rcp, rsp, rdp); + + rcu_check_quiescent_state(rcp, rdp); if (rdp->donelist) rcu_do_batch(rdp); } static void rcu_process_callbacks(unsigned long unused) { - __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, - &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, - &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); +} + +static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + + /* This cpu has no pending entries, but there are new entries */ + if (!rdp->curlist && rdp->nxtlist) + return 1; + + /* This cpu has finished callbacks to invoke */ + if (rdp->donelist) + return 1; + + /* The rcu core waits for a quiescent state from the cpu */ + if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) + return 1; + + /* nothing to do */ + return 0; +} + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); +} + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); } void rcu_check_callbacks(int cpu, int user) @@ -382,6 +526,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; rdp->cpu = cpu; + rdp->blimit = blimit; } static void __devinit rcu_online_cpu(int cpu) @@ -394,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int __devinit rcu_cpu_notify(struct notifier_block *self, +static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -411,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata rcu_nb = { +static struct notifier_block rcu_nb = { .notifier_call = rcu_cpu_notify, }; @@ -475,8 +620,14 @@ void synchronize_kernel(void) synchronize_rcu(); } -module_param(maxbatch, int, 0); -EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ -EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); +#ifdef CONFIG_SMP +module_param(rsinterval, int, 0); +#endif +EXPORT_SYMBOL_GPL(rcu_batches_completed); +EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL_GPL(synchronize_rcu); -EXPORT_SYMBOL(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */