From: Ben Pfaff <blp@nicira.com> Date: Tue, 9 Sep 2008 19:53:47 +0000 (-0700) Subject: Send of0 packets from workqueue, to avoid recursive locking of ofN device. X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=71f13ed0b;p=sliver-openvswitch.git Send of0 packets from workqueue, to avoid recursive locking of ofN device. lockdep reported the following warning, showing that dev_queue_xmit() for of0 and for another ethernet device (eth0?) were taking locks of the same class. It was not a problem in this particular case, because of0 != eth0, but if a flow were to send a packet back to of0 then we would have a deadlock. Solution: use a workqueue to send packets to avoid recursive locking. We will still waste a lot of CPU time if we get a packet that loops back to of0. Solution for that still needed. ============================================= [ INFO: possible recursive locking detected ] 2.6.26.5 #1 --------------------------------------------- memcheck/1258 is trying to acquire lock: (_xmit_ETHER){-+..}, at: [<c025b126>] __qdisc_run+0xa0/0x18a but task is already holding lock: (_xmit_ETHER){-+..}, at: [<c024f3cd>] dev_queue_xmit+0x23c/0x334 other info that might help us debug this: 9 locks held by memcheck/1258: #0: (genl_mutex){--..}, at: [<c0262037>] genl_rcv+0x12/0x2b #1: (dp_mutex){--..}, at: [<c88e9a13>] dp_genl_openflow+0x58/0x91 [openflow_mo d] #2: (rcu_read_lock){..--}, at: [<c024e749>] netif_receive_skb+0x9e/0x328 #3: (rcu_read_lock){..--}, at: [<c0267f77>] ip_local_deliver_finish+0x2a/0x1d7 #4: (slock-AF_INET/1){-+..}, at: [<c027f1b8>] tcp_v4_rcv+0x29f/0x5ef #5: (rcu_read_lock){..--}, at: [<c024f308>] dev_queue_xmit+0x177/0x334 #6: (_xmit_ETHER){-+..}, at: [<c024f3cd>] dev_queue_xmit+0x23c/0x334 #7: (rcu_read_lock){..--}, at: [<c88ea214>] dp_dev_xmit+0x0/0x6c [openflow_mod ] #8: (rcu_read_lock){..--}, at: [<c024f308>] dev_queue_xmit+0x177/0x334 stack backtrace: Pid: 1258, comm: memcheck Not tainted 2.6.26.5 #1 [<c02c11cc>] ? printk+0xf/0x13 [<c01368c5>] __lock_acquire+0x8c2/0xbe4 [<c0134d35>] ? add_lock_to_list+0x64/0x8a [<c0107a78>] ? native_sched_clock+0x82/0x94 [<c0136c3e>] lock_acquire+0x57/0x73 [<c025b126>] ? __qdisc_run+0xa0/0x18a [<c02c3919>] _spin_lock+0x1c/0x49 [<c025b126>] ? __qdisc_run+0xa0/0x18a [<c025b126>] __qdisc_run+0xa0/0x18a [<c024f382>] dev_queue_xmit+0x1f1/0x334 [<c88e9cde>] xmit_skb+0x5b/0x65 [openflow_mod] [<c88e9e60>] dp_output_port+0x178/0x1ae [openflow_mod] [<c88eaf91>] do_output+0x2a/0x4c [openflow_mod] [<c88eb11f>] execute_actions+0x16c/0x198 [openflow_mod] [<c88eb622>] run_flow_through_tables+0xe9/0xf6 [openflow_mod] [<c88eb63e>] fwd_port_input+0xf/0x3d [openflow_mod] [<c88ea260>] dp_dev_xmit+0x4c/0x6c [openflow_mod] [<c88ea214>] ? dp_dev_xmit+0x0/0x6c [openflow_mod] [<c024f042>] dev_hard_start_xmit+0x20f/0x276 [<c024f3e2>] dev_queue_xmit+0x251/0x334 [<c026bfcb>] ip_finish_output+0x1ea/0x222 [<c026c081>] ip_output+0x7e/0x83 [<c026b37a>] ip_local_out+0x18/0x1b [<c026baa2>] ip_queue_xmit+0x288/0x2c9 [<c0136a0c>] ? __lock_acquire+0xa09/0xbe4 [<c0111264>] ? kernel_map_pages+0xfc/0x113 [<c027ddad>] ? tcp_v4_send_check+0x80/0xba [<c027a281>] tcp_transmit_skb+0x695/0x6cd [<c016060a>] ? __kmalloc_track_caller+0xee/0x12a [<c024aacc>] ? __alloc_skb+0x51/0xff [<c027a43c>] tcp_send_ack+0xdf/0xe7 [<c02781e4>] tcp_rcv_state_process+0x389/0xc33 [<c027eebd>] tcp_v4_do_rcv+0x3bd/0x419 [<c027f2c7>] tcp_v4_rcv+0x3ae/0x5ef [<c026805f>] ip_local_deliver_finish+0x112/0x1d7 [<c0268185>] ip_local_deliver+0x61/0x6a [<c0267d14>] ip_rcv_finish+0x2a4/0x2c3 [<c0267f23>] ip_rcv+0x1f0/0x21a [<c024e990>] netif_receive_skb+0x2e5/0x328 [<c024ea52>] process_backlog+0x7f/0xca [<c024d867>] net_rx_action+0x72/0x127 [<c0120ea1>] __do_softirq+0x7b/0xf2 [<c0105a5d>] do_softirq+0x66/0xb3 [<c024ec69>] netif_rx_ni+0x29/0x2e [<c88ea2cd>] dp_dev_recv+0x4d/0x6c [openflow_mod] [<c88e9e06>] dp_output_port+0x11e/0x1ae [openflow_mod] [<c88eaf91>] do_output+0x2a/0x4c [openflow_mod] [<c88eb11f>] execute_actions+0x16c/0x198 [openflow_mod] [<c88eb312>] recv_flow+0x1c7/0x2a5 [openflow_mod] [<c88eb14b>] ? recv_flow+0x0/0x2a5 [openflow_mod] [<c88eab37>] fwd_control_input+0x53/0x60 [openflow_mod] [<c88e9a27>] dp_genl_openflow+0x6c/0x91 [openflow_mod] [<c02621ce>] genl_rcv_msg+0x17e/0x198 [<c0262050>] ? genl_rcv_msg+0x0/0x198 [<c02614c6>] netlink_rcv_skb+0x30/0x76 [<c0262043>] genl_rcv+0x1e/0x2b [<c0261053>] netlink_unicast+0x1a9/0x20f [<c02612dc>] netlink_sendmsg+0x223/0x230 [<c0245384>] sock_sendmsg+0xca/0xe1 [<c012c501>] ? autoremove_wake_function+0x0/0x33 [<c015f6ce>] ? cache_free_debugcheck+0x2a3/0x2be [<c0107a78>] ? native_sched_clock+0x82/0x94 [<c0134382>] ? lock_release_holdtime+0x1a/0x115 [<c01f52b7>] ? copy_from_user+0x34/0x11b [<c024b6a5>] ? verify_iovec+0x40/0x6f [<c02454da>] sys_sendmsg+0x13f/0x192 [<c0168d51>] ? pipe_write+0x434/0x43f [<c0107a78>] ? native_sched_clock+0x82/0x94 [<c0107a78>] ? native_sched_clock+0x82/0x94 [<c0134382>] ? lock_release_holdtime+0x1a/0x115 [<c0107a78>] ? native_sched_clock+0x82/0x94 [<c0134382>] ? lock_release_holdtime+0x1a/0x115 [<c0246189>] sys_socketcall+0x14e/0x169 [<c0102d43>] ? restore_nocheck+0x12/0x15 [<c0102ce2>] syscall_call+0x7/0xb ======================= --- diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index 4601852d8..7817d0d6d 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -3,6 +3,7 @@ #include <linux/etherdevice.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> +#include <linux/workqueue.h> #include "datapath.h" #include "forward.h" @@ -10,8 +11,11 @@ struct dp_dev { struct net_device_stats stats; struct datapath *dp; + struct sk_buff_head xmit_queue; + struct work_struct xmit_work; }; + static struct dp_dev *dp_dev_priv(struct net_device *netdev) { return netdev_priv(netdev); @@ -60,14 +64,37 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) dp_dev->stats.tx_packets++; dp_dev->stats.tx_bytes += skb->len; - skb_reset_mac_header(skb); - rcu_read_lock(); - fwd_port_input(dp->chain, skb, OFPP_LOCAL); - rcu_read_unlock(); + if (skb_queue_len(&dp_dev->xmit_queue) >= dp->netdev->tx_queue_len) { + /* Queue overflow. Stop transmitter. */ + netif_stop_queue(dp->netdev); + + /* We won't see all dropped packets individually, so overrun + * error is appropriate. */ + dp_dev->stats.tx_fifo_errors++; + } + skb_queue_tail(&dp_dev->xmit_queue, skb); + dp->netdev->trans_start = jiffies; + + schedule_work(&dp_dev->xmit_work); return 0; } +static void dp_dev_do_xmit(struct work_struct *work) +{ + struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work); + struct datapath *dp = dp_dev->dp; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) { + skb_reset_mac_header(skb); + rcu_read_lock(); + fwd_port_input(dp->chain, skb, OFPP_LOCAL); + rcu_read_unlock(); + } + netif_wake_queue(dp->netdev); +} + static int dp_dev_open(struct net_device *netdev) { netif_start_queue(netdev); @@ -89,7 +116,7 @@ do_setup(struct net_device *netdev) netdev->hard_start_xmit = dp_dev_xmit; netdev->open = dp_dev_open; netdev->stop = dp_dev_stop; - netdev->tx_queue_len = 0; + netdev->tx_queue_len = 100; netdev->set_mac_address = dp_dev_mac_addr; netdev->flags = IFF_BROADCAST | IFF_MULTICAST; @@ -121,14 +148,19 @@ int dp_dev_setup(struct datapath *dp) dp_dev = dp_dev_priv(netdev); dp_dev->dp = dp; + skb_queue_head_init(&dp_dev->xmit_queue); + INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit); dp->netdev = netdev; return 0; } void dp_dev_destroy(struct datapath *dp) { + struct dp_dev *dp_dev = dp_dev_priv(dp->netdev); + netif_tx_disable(dp->netdev); synchronize_net(); + skb_queue_purge(&dp_dev->xmit_queue); unregister_netdev(dp->netdev); free_netdev(dp->netdev); }