From: Alex Wang Date: Fri, 4 Apr 2014 01:31:13 +0000 (-0700) Subject: ofproto-dpif-monitor: Fix deadlock. X-Git-Tag: sliver-openvswitch-2.2.90-1~3^2~3 X-Git-Url: http://git.onelab.eu/?p=sliver-openvswitch.git;a=commitdiff_plain;h=6d308b28c023e864925b1a6775b3b91d215e89bf ofproto-dpif-monitor: Fix deadlock. Commit 6b59b543 (ovs-thread: Use fair (but nonrecursive) rwlocks on glibc.) changed the rwlocks to nonrecursive, writer-biased lock. It also made the following deadlock possible. Assume BFD is used on both end of a link. Consider the following events: 1. Handler at one end received the BFD control packet with POLL flag set while holding the read lock of 'xlate_rwlock'. Since a BFD control packet with FINAL flag set should be sent back immediately, it calls the ofproto_dpif_monitor_port_send_soon(), in which, it tries to grab the 'monitor_mutex'. 2. The main thread needs to configure the ofproto-dpif-xlate module. It tries to grab the write lock of 'xlate_rwlock' and is blocked by event 1. 3. The monitor thread, after acquired the 'monitor_mutex', wants to acquire the read lock of 'xlate_rwlock'. Since the rwlock is now writer-biased, the attempt of acquiring read lock in event 3 will be blocked by event 2. This will subsequently cause the block of event 1, since monitor thread is holding the 'monitor_mutex'. So the deadlock happens. This commit resolves the above issue by removing the requirement of acquiring 'monitor_mutex' in ofproto_dpif_monitor_port_send_soon(). Signed-off-by: Alex Wang Acked-by: Ben Pfaff --- diff --git a/lib/guarded-list.h b/lib/guarded-list.h index 625865d42..df17cbc74 100644 --- a/lib/guarded-list.h +++ b/lib/guarded-list.h @@ -28,6 +28,11 @@ struct guarded_list { size_t n; }; +#define GUARDED_LIST_INITIALIZER(LIST) { \ + .mutex = OVS_MUTEX_INITIALIZER, \ + .list = LIST_INITIALIZER(&((LIST)->list)), \ + .n = 0 } + void guarded_list_init(struct guarded_list *); void guarded_list_destroy(struct guarded_list *); diff --git a/ofproto/ofproto-dpif-monitor.c b/ofproto/ofproto-dpif-monitor.c index 7c84e4b3a..764c7b199 100644 --- a/ofproto/ofproto-dpif-monitor.c +++ b/ofproto/ofproto-dpif-monitor.c @@ -21,6 +21,7 @@ #include "bfd.h" #include "cfm.h" +#include "guarded-list.h" #include "hash.h" #include "heap.h" #include "hmap.h" @@ -52,12 +53,24 @@ struct mport { uint8_t hw_addr[OFP_ETH_ALEN]; /* Hardware address. */ }; +/* Entry of the 'send_soon' list. Contains the pointer to the + * 'ofport_dpif'. Note, the pointed object is not protected, so + * users should always use the mport_find() to convert it to 'mport'. */ +struct send_soon_entry { + struct list list_node; /* In send_soon. */ + const struct ofport_dpif *ofport; +}; + /* hmap that contains "struct mport"s. */ static struct hmap monitor_hmap = HMAP_INITIALIZER(&monitor_hmap); /* heap for ordering mport based on bfd/cfm wakeup time. */ static struct heap monitor_heap; +/* guarded-list for storing the mports that need to send bfd/cfm control + * packet soon. */ +static struct guarded_list send_soon = GUARDED_LIST_INITIALIZER(&send_soon); + /* The monitor thread id. */ static pthread_t monitor_tid; /* True if the monitor thread is running. */ @@ -67,7 +80,9 @@ static struct latch monitor_exit_latch; static struct ovs_mutex monitor_mutex = OVS_MUTEX_INITIALIZER; static void *monitor_main(void *); +static void monitor_check_send_soon(struct ofpbuf *); static void monitor_run(void); +static void monitor_mport_run(struct mport *, struct ofpbuf *); static void mport_register(const struct ofport_dpif *, struct bfd *, struct cfm *, uint8_t[ETH_ADDR_LEN]) @@ -172,9 +187,8 @@ monitor_main(void * args OVS_UNUSED) * reconfigured monitoring ports are run in a timely manner. */ #define MONITOR_INTERVAL_MSEC 100 -/* Checks the sending of control packets on mports that have timed out. - * Sends the control packets if needed. Executes bfd and cfm periodic - * functions (run, wait) on those mports. */ +/* Checks the 'send_soon' list and the heap for mports that have timed + * out bfd/cfm sessions. */ static void monitor_run(void) { @@ -184,39 +198,26 @@ monitor_run(void) ofpbuf_use_stub(&packet, stub, sizeof stub); ovs_mutex_lock(&monitor_mutex); + + /* The monitor_check_send_soon() needs to be run twice. The first + * time is for preventing the same 'mport' from being processed twice + * (i.e. once from heap, the other from the 'send_soon' array). + * The second run is to cover the case when the control packet is sent + * via patch port and the other end needs to send back immediately. */ + monitor_check_send_soon(&packet); + prio_now = MSEC_TO_PRIO(time_msec()); /* Peeks the top of heap and checks if we should run this mport. */ while (!heap_is_empty(&monitor_heap) && heap_max(&monitor_heap)->priority >= prio_now) { - long long int next_wake_time; struct mport *mport; mport = CONTAINER_OF(heap_max(&monitor_heap), struct mport, heap_node); - if (mport->cfm && cfm_should_send_ccm(mport->cfm)) { - ofpbuf_clear(&packet); - cfm_compose_ccm(mport->cfm, &packet, mport->hw_addr); - ofproto_dpif_send_packet(mport->ofport, &packet); - } - if (mport->bfd && bfd_should_send_packet(mport->bfd)) { - ofpbuf_clear(&packet); - bfd_put_packet(mport->bfd, &packet, mport->hw_addr); - ofproto_dpif_send_packet(mport->ofport, &packet); - } - if (mport->cfm) { - cfm_run(mport->cfm); - cfm_wait(mport->cfm); - } - if (mport->bfd) { - bfd_run(mport->bfd); - bfd_wait(mport->bfd); - } - /* Computes the next wakeup time for this mport. */ - next_wake_time = MIN(bfd_wake_time(mport->bfd), - cfm_wake_time(mport->cfm)); - heap_change(&monitor_heap, &mport->heap_node, - MSEC_TO_PRIO(next_wake_time)); + monitor_mport_run(mport, &packet); } + monitor_check_send_soon(&packet); + /* Waits on the earliest next wakeup time. */ if (!heap_is_empty(&monitor_heap)) { long long int next_timeout, next_mport_wakeup; @@ -228,6 +229,61 @@ monitor_run(void) ovs_mutex_unlock(&monitor_mutex); ofpbuf_uninit(&packet); } + +/* Checks the 'send_soon' list for any mport that needs to send cfm/bfd + * control packet immediately, and calls monitor_mport_run(). */ +static void +monitor_check_send_soon(struct ofpbuf *packet) + OVS_REQUIRES(monitor_mutex) +{ + while (!guarded_list_is_empty(&send_soon)) { + struct send_soon_entry *entry; + struct mport *mport; + + entry = CONTAINER_OF(guarded_list_pop_front(&send_soon), + struct send_soon_entry, list_node); + mport = mport_find(entry->ofport); + if (mport) { + monitor_mport_run(mport, packet); + } + free(entry); + } +} + +/* Checks the sending of control packet on 'mport'. Sends the control + * packet if needed. Executes bfd and cfm periodic functions (run, wait) + * on 'mport'. And changes the location of 'mport' in heap based on next + * timeout. */ +static void +monitor_mport_run(struct mport *mport, struct ofpbuf *packet) + OVS_REQUIRES(monitor_mutex) +{ + long long int next_wake_time; + + if (mport->cfm && cfm_should_send_ccm(mport->cfm)) { + ofpbuf_clear(packet); + cfm_compose_ccm(mport->cfm, packet, mport->hw_addr); + ofproto_dpif_send_packet(mport->ofport, packet); + } + if (mport->bfd && bfd_should_send_packet(mport->bfd)) { + ofpbuf_clear(packet); + bfd_put_packet(mport->bfd, packet, mport->hw_addr); + ofproto_dpif_send_packet(mport->ofport, packet); + } + if (mport->cfm) { + cfm_run(mport->cfm); + cfm_wait(mport->cfm); + } + if (mport->bfd) { + bfd_run(mport->bfd); + bfd_wait(mport->bfd); + } + /* Computes the next wakeup time for this mport. */ + next_wake_time = MIN(bfd_wake_time(mport->bfd), + cfm_wake_time(mport->cfm)); + heap_change(&monitor_heap, &mport->heap_node, + MSEC_TO_PRIO(next_wake_time)); +} /* Creates the mport in monitor module if either bfd or cfm @@ -262,25 +318,16 @@ ofproto_dpif_monitor_port_update(const struct ofport_dpif *ofport, } } -/* Moves the mport on top of the heap. This is necessary when - * for example, bfd POLL is received and the mport should - * immediately send FINAL back. */ -void -ofproto_dpif_monitor_port_send_soon_safe(const struct ofport_dpif *ofport) -{ - ovs_mutex_lock(&monitor_mutex); - ofproto_dpif_monitor_port_send_soon(ofport); - ovs_mutex_unlock(&monitor_mutex); -} - +/* Registers the 'ofport' in the 'send_soon' list. We cannot directly + * insert the corresponding mport to the 'send_soon' list, since the + * 'send_soon' list is not updated when the mport is removed. + * + * Reader of the 'send_soon' list is responsible for freeing the entry. */ void ofproto_dpif_monitor_port_send_soon(const struct ofport_dpif *ofport) - OVS_REQUIRES(monitor_mutex) { - struct mport *mport; + struct send_soon_entry *entry = xzalloc(sizeof *entry); + entry->ofport = ofport; - mport = mport_find(ofport); - if (mport) { - heap_change(&monitor_heap, &mport->heap_node, LLONG_MAX); - } + guarded_list_push_back(&send_soon, &entry->list_node, SIZE_MAX); } diff --git a/ofproto/ofproto-dpif-monitor.h b/ofproto/ofproto-dpif-monitor.h index 12aa2bb6c..2c466c2f9 100644 --- a/ofproto/ofproto-dpif-monitor.h +++ b/ofproto/ofproto-dpif-monitor.h @@ -25,7 +25,6 @@ struct cfm; struct ofport_dpif; void ofproto_dpif_monitor_port_send_soon(const struct ofport_dpif *); -void ofproto_dpif_monitor_port_send_soon_safe(const struct ofport_dpif *); void ofproto_dpif_monitor_port_update(const struct ofport_dpif *, struct bfd *, struct cfm *, diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 57e84a72d..da538b7f8 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1778,11 +1778,7 @@ process_special(struct xlate_ctx *ctx, const struct flow *flow, bfd_process_packet(xport->bfd, flow, packet); /* If POLL received, immediately sends FINAL back. */ if (bfd_should_send_packet(xport->bfd)) { - if (xport->peer) { - ofproto_dpif_monitor_port_send_soon(xport->ofport); - } else { - ofproto_dpif_monitor_port_send_soon_safe(xport->ofport); - } + ofproto_dpif_monitor_port_send_soon(xport->ofport); } } return SLOW_BFD;