From 16d650e5a47cd0aa0430ab252330f0e66f3420c6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 4 Jan 2012 17:22:07 -0800 Subject: [PATCH] datapath: Fix OVS compat workqueue support. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/datapath.c | 9 +- datapath/linux/Modules.mk | 3 +- .../linux/compat/include/linux/workqueue.h | 74 ++++-- datapath/linux/compat/workqueue.c | 213 ++++++++++++++++++ 4 files changed, 276 insertions(+), 23 deletions(-) create mode 100644 datapath/linux/compat/workqueue.c diff --git a/datapath/datapath.c b/datapath/datapath.c index 281e86bf3..754cb32ff 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -2054,10 +2054,14 @@ static int __init dp_init(void) if (err) goto error; - err = ovs_tnl_init(); + err = ovs_workqueues_init(); if (err) goto error_genl_exec; + err = ovs_tnl_init(); + if (err) + goto error_wq; + err = ovs_flow_init(); if (err) goto error_tnl_exit; @@ -2084,6 +2088,8 @@ error_flow_exit: ovs_flow_exit(); error_tnl_exit: ovs_tnl_exit(); +error_wq: + ovs_workqueues_exit(); error_genl_exec: genl_exec_exit(); error: @@ -2098,6 +2104,7 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_tnl_exit(); + ovs_workqueues_exit(); genl_exec_exit(); } diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index f6cb88edd..40c3927ac 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -9,7 +9,8 @@ openvswitch_sources += \ linux/compat/netdevice.c \ linux/compat/reciprocal_div.c \ linux/compat/skbuff-openvswitch.c \ - linux/compat/time.c + linux/compat/time.c \ + linux/compat/workqueue.c openvswitch_headers += \ linux/compat/include/linux/compiler.h \ linux/compat/include/linux/compiler-gcc.h \ diff --git a/datapath/linux/compat/include/linux/workqueue.h b/datapath/linux/compat/include/linux/workqueue.h index 01c6345e9..919afe350 100644 --- a/datapath/linux/compat/include/linux/workqueue.h +++ b/datapath/linux/compat/include/linux/workqueue.h @@ -1,41 +1,73 @@ #ifndef __LINUX_WORKQUEUE_WRAPPER_H #define __LINUX_WORKQUEUE_WRAPPER_H 1 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) #include_next +static inline int __init ovs_workqueues_init(void) { return 0; } +static inline void ovs_workqueues_exit(void) {} + +#else +#include + +int __init ovs_workqueues_init(void); +void ovs_workqueues_exit(void); -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) /* Older kernels have an implementation of work queues with some very bad * characteristics when trying to cancel work (potential deadlocks, use after - * free, etc. Here we directly use timers instead for delayed work. It's not - * optimal but it is better than the alternative. Note that work queues - * normally run in process context but this will cause them to operate in - * softirq context. + * free, etc. Therefore we implement simple ovs specific work queue using + * single worker thread. work-queue API are kept similar for compatibility. */ +struct work_struct; -#include +typedef void (*work_func_t)(struct work_struct *work); -#undef DECLARE_DELAYED_WORK -#define DECLARE_DELAYED_WORK(n, f) \ - struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0) +#define work_data_bits(work) ((unsigned long *)(&(work)->data)) -#define schedule_delayed_work rpl_schedule_delayed_work -static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay) -{ - if (timer_pending(timer)) - return 0; +struct work_struct { +#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ + atomic_long_t data; + struct list_head entry; + work_func_t func; +}; + +#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) + +#define work_clear_pending(work) \ + clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) + +struct delayed_work { + struct work_struct work; + struct timer_list timer; +}; - mod_timer(timer, jiffies + delay); - return 1; +#define __WORK_INITIALIZER(n, f) { \ + .data = WORK_DATA_INIT(), \ + .entry = { &(n).entry, &(n).entry }, \ + .func = (f), \ } -#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync -static inline int cancel_delayed_work_sync(struct timer_list *timer) -{ - return del_timer_sync(timer); +#define __DELAYED_WORK_INITIALIZER(n, f) { \ + .work = __WORK_INITIALIZER((n).work, (f)), \ + .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } +#define DECLARE_DELAYED_WORK(n, f) \ + struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) + +#define schedule_delayed_work rpl_schedule_delayed_work +int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); + +#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync +int cancel_delayed_work_sync(struct delayed_work *dwork); + +#define INIT_WORK(_work, _func) \ + do { \ + (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->func = (_func); \ + } while (0) + #endif /* kernel version < 2.6.23 */ #endif diff --git a/datapath/linux/compat/workqueue.c b/datapath/linux/compat/workqueue.c new file mode 100644 index 000000000..883665bf4 --- /dev/null +++ b/datapath/linux/compat/workqueue.c @@ -0,0 +1,213 @@ +/* + * Derived from the kernel/workqueue.c + * + * This is the generic async execution mechanism. Work items as are + * executed in process context. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +static spinlock_t wq_lock; +static struct list_head workq; +static wait_queue_head_t more_work; +static struct task_struct *workq_thread; +static struct work_struct *current_work; + +static void queue_work(struct work_struct *work) +{ + unsigned long flags; + + spin_lock_irqsave(&wq_lock, flags); + list_add_tail(&work->entry, &workq); + wake_up(&more_work); + spin_unlock_irqrestore(&wq_lock, flags); +} + +static void _delayed_work_timer_fn(unsigned long __data) +{ + struct delayed_work *dwork = (struct delayed_work *)__data; + queue_work(&dwork->work); +} + +static void __queue_delayed_work(struct delayed_work *dwork, + unsigned long delay) +{ + struct timer_list *timer = &dwork->timer; + struct work_struct *work = &dwork->work; + + BUG_ON(timer_pending(timer)); + BUG_ON(!list_empty(&work->entry)); + + timer->expires = jiffies + delay; + timer->data = (unsigned long)dwork; + timer->function = _delayed_work_timer_fn; + + add_timer(timer); +} + +int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) +{ + if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(&dwork->work))) + return 0; + + if (delay == 0) + queue_work(&dwork->work); + else + __queue_delayed_work(dwork, delay); + + return 1; +} + +struct wq_barrier { + struct work_struct work; + struct completion done; +}; + +static void wq_barrier_func(struct work_struct *work) +{ + struct wq_barrier *barr = container_of(work, struct wq_barrier, work); + complete(&barr->done); +} + +static void workqueue_barrier(struct work_struct *work) +{ + bool need_barrier; + struct wq_barrier barr; + + spin_lock_irq(&wq_lock); + if (current_work != work) + need_barrier = false; + else { + INIT_WORK(&barr.work, wq_barrier_func); + init_completion(&barr.done); + list_add(&barr.work.entry, &workq); + wake_up(&more_work); + need_barrier = true; + } + spin_unlock_irq(&wq_lock); + + if (need_barrier) + wait_for_completion(&barr.done); +} + +static int try_to_grab_pending(struct work_struct *work) +{ + int ret; + + BUG_ON(in_interrupt()); + + if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) + return 0; + + spin_lock_irq(&wq_lock); + if (!list_empty(&work->entry)) { + list_del_init(&work->entry); + ret = 0; + } else + /* Already executed, retry. */ + ret = -1; + spin_unlock_irq(&wq_lock); + + return ret; +} + +static int __cancel_work_timer(struct work_struct *work, + struct timer_list *timer) +{ + int ret; + + for (;;) { + ret = (timer && likely(del_timer(timer))); + if (ret) /* Was active timer, return true. */ + break; + + /* Inactive timer case */ + ret = try_to_grab_pending(work); + if (!ret) + break; + } + workqueue_barrier(work); + work_clear_pending(work); + return ret; +} + +int cancel_delayed_work_sync(struct delayed_work *dwork) +{ + return __cancel_work_timer(&dwork->work, &dwork->timer); +} + +static void run_workqueue(void) +{ + spin_lock_irq(&wq_lock); + while (!list_empty(&workq)) { + struct work_struct *work = list_entry(workq.next, + struct work_struct, entry); + + work_func_t f = work->func; + list_del_init(workq.next); + current_work = work; + spin_unlock_irq(&wq_lock); + + work_clear_pending(work); + f(work); + + BUG_ON(in_interrupt()); + spin_lock_irq(&wq_lock); + current_work = NULL; + } + spin_unlock_irq(&wq_lock); +} + +static int worker_thread(void *dummy) +{ + for (;;) { + wait_event_interruptible(more_work, + (kthread_should_stop() || !list_empty(&workq))); + + if (kthread_should_stop()) + break; + + run_workqueue(); + } + + return 0; +} + +int __init ovs_workqueues_init(void) +{ + spin_lock_init(&wq_lock); + INIT_LIST_HEAD(&workq); + init_waitqueue_head(&more_work); + + workq_thread = kthread_create(worker_thread, NULL, "ovs_workq"); + if (IS_ERR(workq_thread)) + return PTR_ERR(workq_thread); + + wake_up_process(workq_thread); + return 0; +} + +void ovs_workqueues_exit(void) +{ + BUG_ON(!list_empty(&workq)); + kthread_stop(workq_thread); +} +#endif -- 2.43.0