From 1954e6bbcb7dabbbcee3dfc6f0363e32fde255b0 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Fri, 7 Mar 2014 10:57:36 -0800 Subject: [PATCH] dpif: Change dpif API to allow multiple handler threads read upcall. This commit changes the API in 'dpif-provider.h' to allow multiple handler threads call dpif_recv() simultaneously. Signed-off-by: Alex Wang Acked-by: Ben Pfaff --- lib/dpif-linux.c | 21 ++++++++--- lib/dpif-netdev.c | 14 +++++-- lib/dpif-provider.h | 59 +++++++++++++++++++++++------ lib/dpif.c | 70 ++++++++++++++++++++++++++++------- lib/dpif.h | 36 +++++++++++++----- ofproto/ofproto-dpif-upcall.c | 16 ++++++-- ofproto/ofproto-dpif-xlate.c | 2 +- ofproto/ofproto-dpif.c | 2 +- 8 files changed, 170 insertions(+), 50 deletions(-) diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 6f21fc47d..779f764e6 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -171,7 +171,7 @@ static unsigned int ovs_vport_mcgroup; static int dpif_linux_init(void); static int open_dpif(const struct dpif_linux_dp *, struct dpif **); static uint32_t dpif_linux_port_get_pid(const struct dpif *, - odp_port_t port_no); + odp_port_t port_no, uint32_t hash); static int dpif_linux_refresh_channels(struct dpif *); static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, @@ -307,7 +307,7 @@ destroy_channels(struct dpif_linux *dpif) dpif->n_events = dpif->event_offset = 0; /* Don't close dpif->epoll_fd since that would cause other threads that - * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */ + * call dpif_recv_wait() to wait on an arbitrary fd or a closed fd. */ } static int @@ -678,7 +678,8 @@ dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname, } static uint32_t -dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) +dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, + uint32_t hash OVS_UNUSED) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); uint32_t port_idx = odp_to_u32(port_no); @@ -1459,6 +1460,13 @@ dpif_linux_recv_set(struct dpif *dpif_, bool enable) return error; } +static int +dpif_linux_handlers_set(struct dpif *dpif_ OVS_UNUSED, + uint32_t n_handlers OVS_UNUSED) +{ + return 0; +} + static int dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) @@ -1605,8 +1613,8 @@ dpif_linux_recv__(struct dpif *dpif_, struct dpif_upcall *upcall, } static int -dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall, - struct ofpbuf *buf) +dpif_linux_recv(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED, + struct dpif_upcall *upcall, struct ofpbuf *buf) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int error; @@ -1619,7 +1627,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall, } static void -dpif_linux_recv_wait(struct dpif *dpif_) +dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); @@ -1682,6 +1690,7 @@ const struct dpif_class dpif_linux_class = { dpif_linux_execute, dpif_linux_operate, dpif_linux_recv_set, + dpif_linux_handlers_set, dpif_linux_queue_to_priority, dpif_linux_recv, dpif_linux_recv_wait, diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 21f0d9a58..1c23739eb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1478,6 +1478,13 @@ dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED) return 0; } +static int +dpif_netdev_handlers_set(struct dpif *dpif OVS_UNUSED, + uint32_t n_handlers OVS_UNUSED) +{ + return 0; +} + static int dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) @@ -1502,8 +1509,8 @@ find_nonempty_queue(struct dp_netdev *dp) } static int -dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall, - struct ofpbuf *buf) +dpif_netdev_recv(struct dpif *dpif, uint32_t n_handlers OVS_UNUSED, + struct dpif_upcall *upcall, struct ofpbuf *buf) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_queue *q; @@ -1529,7 +1536,7 @@ dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall, } static void -dpif_netdev_recv_wait(struct dpif *dpif) +dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id OVS_UNUSED) { struct dp_netdev *dp = get_dp_netdev(dpif); uint64_t seq; @@ -1923,6 +1930,7 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_execute, NULL, /* operate */ dpif_netdev_recv_set, + dpif_netdev_handlers_set, dpif_netdev_queue_to_priority, dpif_netdev_recv, dpif_netdev_recv_wait, diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index dd4f74ef7..615f2c680 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -146,7 +146,16 @@ struct dpif_class { /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in - * flows whose packets arrived on port 'port_no'. + * flows whose packets arrived on port 'port_no'. In the case where the + * provider allocates multiple Netlink PIDs to a single port, it may use + * 'hash' to spread load among them. The caller need not use a particular + * hash function; a 5-tuple hash is suitable. + * + * (The datapath implementation might use some different hash function for + * distributing packets received via flow misses among PIDs. This means + * that packets received via flow misses might be reordered relative to + * packets received via userspace actions. This is not ordinarily a + * problem.) * * A 'port_no' of UINT32_MAX should be treated as a special case. The * implementation should return a reserved PID, not allocated to any port, @@ -158,7 +167,8 @@ struct dpif_class { * * A dpif provider that doesn't have meaningful Netlink PIDs can use NULL * for this function. This is equivalent to always returning 0. */ - uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no); + uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no, + uint32_t hash); /* Attempts to begin dumping the ports in a dpif. On success, returns 0 * and initializes '*statep' with any data needed for iteration. On @@ -355,14 +365,38 @@ struct dpif_class { * updating flows as necessary if it does this. */ int (*recv_set)(struct dpif *dpif, bool enable); + /* Refreshes the poll loops and Netlink sockets associated to each port, + * when the number of upcall handlers (upcall receiving thread) is changed + * to 'n_handlers' and receiving packets for 'dpif' is enabled by + * recv_set(). + * + * Since multiple upcall handlers can read upcalls simultaneously from + * 'dpif', each port can have multiple Netlink sockets, one per upcall + * handler. So, handlers_set() is responsible for the following tasks: + * + * When receiving upcall is enabled, extends or creates the + * configuration to support: + * + * - 'n_handlers' Netlink sockets for each port. + * + * - 'n_handlers' poll loops, one for each upcall handler. + * + * - registering the Netlink sockets for the same upcall handler to + * the corresponding poll loop. + * */ + int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); + /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id, uint32_t *priority); - /* Polls for an upcall from 'dpif'. If successful, stores the upcall into - * '*upcall', using 'buf' for storage. Should only be called if 'recv_set' - * has been used to enable receiving packets from 'dpif'. + /* Polls for an upcall from 'dpif' for an upcall handler. Since there + * can be multiple poll loops (see ->handlers_set()), 'handler_id' is + * needed as index to identify the corresponding poll loop. If + * successful, stores the upcall into '*upcall', using 'buf' for + * storage. Should only be called if 'recv_set' has been used to enable + * receiving packets from 'dpif'. * * The implementation should point 'upcall->key' and 'upcall->userdata' * (if any) into data in the caller-provided 'buf'. The implementation may @@ -378,12 +412,15 @@ struct dpif_class { * * This function must not block. If no upcall is pending when it is * called, it should return EAGAIN without blocking. */ - int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall, - struct ofpbuf *buf); - - /* Arranges for the poll loop to wake up when 'dpif' has a message queued - * to be received with the recv member function. */ - void (*recv_wait)(struct dpif *dpif); + int (*recv)(struct dpif *dpif, uint32_t handler_id, + struct dpif_upcall *upcall, struct ofpbuf *buf); + + /* Arranges for the poll loop for an upcall handler to wake up when 'dpif' + * has a message queued to be received with the recv member functions. + * Since there can be multiple poll loops (see ->handlers_set()), + * 'handler_id' is needed as index to identify the corresponding poll loop. + * */ + void (*recv_wait)(struct dpif *dpif, uint32_t handler_id); /* Throws away any queued upcalls that 'dpif' currently has ready to * return. */ diff --git a/lib/dpif.c b/lib/dpif.c index 08fffe430..b33d13e7f 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -632,9 +632,18 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, return error; } -/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions - * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose - * packets arrived on port 'port_no'. +/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE + * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in + * flows whose packets arrived on port 'port_no'. In the case where the + * provider allocates multiple Netlink PIDs to a single port, it may use + * 'hash' to spread load among them. The caller need not use a particular + * hash function; a 5-tuple hash is suitable. + * + * (The datapath implementation might use some different hash function for + * distributing packets received via flow misses among PIDs. This means + * that packets received via flow misses might be reordered relative to + * packets received via userspace actions. This is not ordinarily a + * problem.) * * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not * allocated to any port, that the client may use for special purposes. @@ -645,10 +654,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, * update all of the flows that it installed that contain * OVS_ACTION_ATTR_USERSPACE actions. */ uint32_t -dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no) +dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash) { return (dpif->dpif_class->port_get_pid - ? (dpif->dpif_class->port_get_pid)(dpif, port_no) + ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash) : 0); } @@ -1294,9 +1303,39 @@ dpif_recv_set(struct dpif *dpif, bool enable) return error; } -/* Polls for an upcall from 'dpif'. If successful, stores the upcall into - * '*upcall', using 'buf' for storage. Should only be called if - * dpif_recv_set() has been used to enable receiving packets on 'dpif'. +/* Refreshes the poll loops and Netlink sockets associated to each port, + * when the number of upcall handlers (upcall receiving thread) is changed + * to 'n_handlers' and receiving packets for 'dpif' is enabled by + * recv_set(). + * + * Since multiple upcall handlers can read upcalls simultaneously from + * 'dpif', each port can have multiple Netlink sockets, one per upcall + * handler. So, handlers_set() is responsible for the following tasks: + * + * When receiving upcall is enabled, extends or creates the + * configuration to support: + * + * - 'n_handlers' Netlink sockets for each port. + * + * - 'n_handlers' poll loops, one for each upcall handler. + * + * - registering the Netlink sockets for the same upcall handler to + * the corresponding poll loop. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers) +{ + int error = dpif->dpif_class->handlers_set(dpif, n_handlers); + log_operation(dpif, "handlers_set", error); + return error; +} + +/* Polls for an upcall from 'dpif' for an upcall handler. Since there + * there can be multiple poll loops, 'handler_id' is needed as index to + * identify the corresponding poll loop. If successful, stores the upcall + * into '*upcall', using 'buf' for storage. Should only be called if + * 'recv_set' has been used to enable receiving packets from 'dpif'. * * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided * 'buf', so their memory cannot be freed separately from 'buf'. @@ -1311,9 +1350,10 @@ dpif_recv_set(struct dpif *dpif, bool enable) * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */ int -dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) +dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall, + struct ofpbuf *buf) { - int error = dpif->dpif_class->recv(dpif, upcall, buf); + int error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf); if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) { struct ds flow; char *packet; @@ -1347,12 +1387,14 @@ dpif_recv_purge(struct dpif *dpif) } } -/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be - * received with dpif_recv(). */ +/* Arranges for the poll loop for an upcall handler to wake up when 'dpif' + * 'dpif' has a message queued to be received with the recv member + * function. Since there can be multiple poll loops, 'handler_id' is + * needed as index to identify the corresponding poll loop. */ void -dpif_recv_wait(struct dpif *dpif) +dpif_recv_wait(struct dpif *dpif, uint32_t handler_id) { - dpif->dpif_class->recv_wait(dpif); + dpif->dpif_class->recv_wait(dpif, handler_id); } /* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type' diff --git a/lib/dpif.h b/lib/dpif.h index 9cd8f6afa..e7aca8e41 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,8 @@ * "internal" (for a simulated port used to connect to the TCP/IP stack), * and "gre" (for a GRE tunnel). * - * - A Netlink PID (see "Upcall Queuing and Ordering" below). + * - A Netlink PID for each upcall reading thread (see "Upcall Queuing and + * Ordering" below). * * The dpif interface has functions for adding and deleting ports. When a * datapath implements these (e.g. as the Linux and netdev datapaths do), then @@ -205,10 +206,10 @@ * connection consists of two flows with 1-ms latency to set up each one. * * To receive upcalls, a client has to enable them with dpif_recv_set(). A - * datapath should generally support multiple clients at once (e.g. so that one - * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is - * also running) but need not support multiple clients enabling upcalls at - * once. + * datapath should generally support being opened multiple times (e.g. so that + * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" + * is also running) but need not support more than one of these clients + * enabling upcalls at once. * * * Upcall Queuing and Ordering @@ -261,7 +262,7 @@ * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value * and all upcalls are appended to a single queue. * - * The ideal behavior is: + * The preferred behavior is: * * - Each port has a PID that identifies the queue used for "miss" upcalls * on that port. (Thus, if each port has its own queue for "miss" @@ -275,6 +276,18 @@ * * - Upcalls that specify the "special" Netlink PID are queued separately. * + * Multiple threads may want to read upcalls simultaneously from a single + * datapath. To support multiple threads well, one extends the above preferred + * behavior: + * + * - Each port has multiple PIDs. The datapath distributes "miss" upcalls + * across the PIDs, ensuring that a given flow is mapped in a stable way + * to a single PID. + * + * - For "action" upcalls, the thread can specify its own Netlink PID or + * other threads' Netlink PID of the same port for offloading purpose + * (e.g. in a "round robin" manner). + * * * Packet Format * ============= @@ -453,7 +466,8 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); int dpif_port_get_name(struct dpif *, odp_port_t port_no, char *name, size_t name_size); -uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no); +uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no, + uint32_t hash); struct dpif_port_dump { const struct dpif *dpif; @@ -625,9 +639,11 @@ struct dpif_upcall { }; int dpif_recv_set(struct dpif *, bool enable); -int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *); +int dpif_handlers_set(struct dpif *, uint32_t n_handlers); +int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, + struct ofpbuf *); void dpif_recv_purge(struct dpif *); -void dpif_recv_wait(struct dpif *); +void dpif_recv_wait(struct dpif *, uint32_t handler_id); /* Miscellaneous. */ diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index db81dbecc..56e6d24dc 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -298,8 +298,9 @@ void udpif_set_threads(struct udpif *udpif, size_t n_handlers, size_t n_revalidators) { - ovsrcu_quiesce_start(); + int error; + ovsrcu_quiesce_start(); /* Stop the old threads (if any). */ if (udpif->handlers && (udpif->n_handlers != n_handlers @@ -372,6 +373,13 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, udpif->n_handlers = 0; } + error = dpif_handlers_set(udpif->dpif, 1); + if (error) { + VLOG_ERR("failed to configure handlers in dpif %s: %s", + dpif_name(udpif->dpif), ovs_strerror(error)); + return; + } + /* Start new threads (if necessary). */ if (!udpif->handlers && n_handlers) { size_t i; @@ -544,7 +552,7 @@ udpif_dispatcher(void *arg) set_subprogram_name("dispatcher"); while (!latch_is_set(&udpif->exit_latch)) { recv_upcalls(udpif); - dpif_recv_wait(udpif->dpif); + dpif_recv_wait(udpif->dpif, 0); latch_wait(&udpif->exit_latch); poll_block(); } @@ -825,7 +833,7 @@ recv_upcalls(struct udpif *udpif) upcall = xmalloc(sizeof *upcall); ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub, sizeof upcall->upcall_stub); - error = dpif_recv(udpif->dpif, &upcall->dpif_upcall, + error = dpif_recv(udpif->dpif, 0, &upcall->dpif_upcall, &upcall->upcall_buf); if (error) { /* upcall_destroy() can only be called on successfully received @@ -913,7 +921,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout, port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP) ? ODPP_NONE : odp_in_port; - pid = dpif_port_get_pid(udpif->dpif, port); + pid = dpif_port_get_pid(udpif->dpif, port, 0); odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf); } diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index d0b1f47e0..04bdf9576 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1491,7 +1491,7 @@ compose_sample_action(const struct xbridge *xbridge, actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS); odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port); - pid = dpif_port_get_pid(xbridge->dpif, odp_port); + pid = dpif_port_get_pid(xbridge->dpif, odp_port, 0); cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions); nl_msg_end_nested(odp_actions, actions_offset); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 1122f5fa4..aa16896b8 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -930,7 +930,7 @@ check_variable_length_userdata(struct dpif_backer *backer) ofpbuf_init(&actions, 64); start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID, - dpif_port_get_pid(backer->dpif, ODPP_NONE)); + dpif_port_get_pid(backer->dpif, ODPP_NONE, 0)); nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4); nl_msg_end_nested(&actions, start); -- 2.43.0