static int dpif_linux_init(void);
static int open_dpif(const struct dpif_linux_dp *, struct dpif **);
static uint32_t dpif_linux_port_get_pid(const struct dpif *,
- odp_port_t port_no);
+ odp_port_t port_no, uint32_t hash);
static int dpif_linux_refresh_channels(struct dpif *);
static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
dpif->n_events = dpif->event_offset = 0;
/* Don't close dpif->epoll_fd since that would cause other threads that
- * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */
+ * call dpif_recv_wait() to wait on an arbitrary fd or a closed fd. */
}
static int
}
static uint32_t
-dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
+dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
+ uint32_t hash OVS_UNUSED)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
uint32_t port_idx = odp_to_u32(port_no);
return error;
}
+static int
+dpif_linux_handlers_set(struct dpif *dpif_ OVS_UNUSED,
+ uint32_t n_handlers OVS_UNUSED)
+{
+ return 0;
+}
+
static int
dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
uint32_t queue_id, uint32_t *priority)
}
static int
-dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall,
- struct ofpbuf *buf)
+dpif_linux_recv(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED,
+ struct dpif_upcall *upcall, struct ofpbuf *buf)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
int error;
}
static void
-dpif_linux_recv_wait(struct dpif *dpif_)
+dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id OVS_UNUSED)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
dpif_linux_execute,
dpif_linux_operate,
dpif_linux_recv_set,
+ dpif_linux_handlers_set,
dpif_linux_queue_to_priority,
dpif_linux_recv,
dpif_linux_recv_wait,
return 0;
}
+static int
+dpif_netdev_handlers_set(struct dpif *dpif OVS_UNUSED,
+ uint32_t n_handlers OVS_UNUSED)
+{
+ return 0;
+}
+
static int
dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
uint32_t queue_id, uint32_t *priority)
}
static int
-dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
- struct ofpbuf *buf)
+dpif_netdev_recv(struct dpif *dpif, uint32_t n_handlers OVS_UNUSED,
+ struct dpif_upcall *upcall, struct ofpbuf *buf)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_queue *q;
}
static void
-dpif_netdev_recv_wait(struct dpif *dpif)
+dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id OVS_UNUSED)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
uint64_t seq;
dpif_netdev_execute,
NULL, /* operate */
dpif_netdev_recv_set,
+ dpif_netdev_handlers_set,
dpif_netdev_queue_to_priority,
dpif_netdev_recv,
dpif_netdev_recv_wait,
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
* actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
- * flows whose packets arrived on port 'port_no'.
+ * flows whose packets arrived on port 'port_no'. In the case where the
+ * provider allocates multiple Netlink PIDs to a single port, it may use
+ * 'hash' to spread load among them. The caller need not use a particular
+ * hash function; a 5-tuple hash is suitable.
+ *
+ * (The datapath implementation might use some different hash function for
+ * distributing packets received via flow misses among PIDs. This means
+ * that packets received via flow misses might be reordered relative to
+ * packets received via userspace actions. This is not ordinarily a
+ * problem.)
*
* A 'port_no' of UINT32_MAX should be treated as a special case. The
* implementation should return a reserved PID, not allocated to any port,
*
* A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
* for this function. This is equivalent to always returning 0. */
- uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no);
+ uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no,
+ uint32_t hash);
/* Attempts to begin dumping the ports in a dpif. On success, returns 0
* and initializes '*statep' with any data needed for iteration. On
* updating flows as necessary if it does this. */
int (*recv_set)(struct dpif *dpif, bool enable);
+ /* Refreshes the poll loops and Netlink sockets associated to each port,
+ * when the number of upcall handlers (upcall receiving thread) is changed
+ * to 'n_handlers' and receiving packets for 'dpif' is enabled by
+ * recv_set().
+ *
+ * Since multiple upcall handlers can read upcalls simultaneously from
+ * 'dpif', each port can have multiple Netlink sockets, one per upcall
+ * handler. So, handlers_set() is responsible for the following tasks:
+ *
+ * When receiving upcall is enabled, extends or creates the
+ * configuration to support:
+ *
+ * - 'n_handlers' Netlink sockets for each port.
+ *
+ * - 'n_handlers' poll loops, one for each upcall handler.
+ *
+ * - registering the Netlink sockets for the same upcall handler to
+ * the corresponding poll loop.
+ * */
+ int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers);
+
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
* priority value used for setting packet priority. */
int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id,
uint32_t *priority);
- /* Polls for an upcall from 'dpif'. If successful, stores the upcall into
- * '*upcall', using 'buf' for storage. Should only be called if 'recv_set'
- * has been used to enable receiving packets from 'dpif'.
+ /* Polls for an upcall from 'dpif' for an upcall handler. Since there
+ * can be multiple poll loops (see ->handlers_set()), 'handler_id' is
+ * needed as index to identify the corresponding poll loop. If
+ * successful, stores the upcall into '*upcall', using 'buf' for
+ * storage. Should only be called if 'recv_set' has been used to enable
+ * receiving packets from 'dpif'.
*
* The implementation should point 'upcall->key' and 'upcall->userdata'
* (if any) into data in the caller-provided 'buf'. The implementation may
*
* This function must not block. If no upcall is pending when it is
* called, it should return EAGAIN without blocking. */
- int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall,
- struct ofpbuf *buf);
-
- /* Arranges for the poll loop to wake up when 'dpif' has a message queued
- * to be received with the recv member function. */
- void (*recv_wait)(struct dpif *dpif);
+ int (*recv)(struct dpif *dpif, uint32_t handler_id,
+ struct dpif_upcall *upcall, struct ofpbuf *buf);
+
+ /* Arranges for the poll loop for an upcall handler to wake up when 'dpif'
+ * has a message queued to be received with the recv member functions.
+ * Since there can be multiple poll loops (see ->handlers_set()),
+ * 'handler_id' is needed as index to identify the corresponding poll loop.
+ * */
+ void (*recv_wait)(struct dpif *dpif, uint32_t handler_id);
/* Throws away any queued upcalls that 'dpif' currently has ready to
* return. */
return error;
}
-/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions
- * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
- * packets arrived on port 'port_no'.
+/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
+ * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
+ * flows whose packets arrived on port 'port_no'. In the case where the
+ * provider allocates multiple Netlink PIDs to a single port, it may use
+ * 'hash' to spread load among them. The caller need not use a particular
+ * hash function; a 5-tuple hash is suitable.
+ *
+ * (The datapath implementation might use some different hash function for
+ * distributing packets received via flow misses among PIDs. This means
+ * that packets received via flow misses might be reordered relative to
+ * packets received via userspace actions. This is not ordinarily a
+ * problem.)
*
* A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
* allocated to any port, that the client may use for special purposes.
* update all of the flows that it installed that contain
* OVS_ACTION_ATTR_USERSPACE actions. */
uint32_t
-dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
+dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
{
return (dpif->dpif_class->port_get_pid
- ? (dpif->dpif_class->port_get_pid)(dpif, port_no)
+ ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
: 0);
}
return error;
}
-/* Polls for an upcall from 'dpif'. If successful, stores the upcall into
- * '*upcall', using 'buf' for storage. Should only be called if
- * dpif_recv_set() has been used to enable receiving packets on 'dpif'.
+/* Refreshes the poll loops and Netlink sockets associated to each port,
+ * when the number of upcall handlers (upcall receiving thread) is changed
+ * to 'n_handlers' and receiving packets for 'dpif' is enabled by
+ * recv_set().
+ *
+ * Since multiple upcall handlers can read upcalls simultaneously from
+ * 'dpif', each port can have multiple Netlink sockets, one per upcall
+ * handler. So, handlers_set() is responsible for the following tasks:
+ *
+ * When receiving upcall is enabled, extends or creates the
+ * configuration to support:
+ *
+ * - 'n_handlers' Netlink sockets for each port.
+ *
+ * - 'n_handlers' poll loops, one for each upcall handler.
+ *
+ * - registering the Netlink sockets for the same upcall handler to
+ * the corresponding poll loop.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. */
+int
+dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers)
+{
+ int error = dpif->dpif_class->handlers_set(dpif, n_handlers);
+ log_operation(dpif, "handlers_set", error);
+ return error;
+}
+
+/* Polls for an upcall from 'dpif' for an upcall handler. Since there
+ * there can be multiple poll loops, 'handler_id' is needed as index to
+ * identify the corresponding poll loop. If successful, stores the upcall
+ * into '*upcall', using 'buf' for storage. Should only be called if
+ * 'recv_set' has been used to enable receiving packets from 'dpif'.
*
* 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
* 'buf', so their memory cannot be freed separately from 'buf'.
* Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
* if no upcall is immediately available. */
int
-dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf)
+dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall,
+ struct ofpbuf *buf)
{
- int error = dpif->dpif_class->recv(dpif, upcall, buf);
+ int error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf);
if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) {
struct ds flow;
char *packet;
}
}
-/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
- * received with dpif_recv(). */
+/* Arranges for the poll loop for an upcall handler to wake up when 'dpif'
+ * 'dpif' has a message queued to be received with the recv member
+ * function. Since there can be multiple poll loops, 'handler_id' is
+ * needed as index to identify the corresponding poll loop. */
void
-dpif_recv_wait(struct dpif *dpif)
+dpif_recv_wait(struct dpif *dpif, uint32_t handler_id)
{
- dpif->dpif_class->recv_wait(dpif);
+ dpif->dpif_class->recv_wait(dpif, handler_id);
}
/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* "internal" (for a simulated port used to connect to the TCP/IP stack),
* and "gre" (for a GRE tunnel).
*
- * - A Netlink PID (see "Upcall Queuing and Ordering" below).
+ * - A Netlink PID for each upcall reading thread (see "Upcall Queuing and
+ * Ordering" below).
*
* The dpif interface has functions for adding and deleting ports. When a
* datapath implements these (e.g. as the Linux and netdev datapaths do), then
* connection consists of two flows with 1-ms latency to set up each one.
*
* To receive upcalls, a client has to enable them with dpif_recv_set(). A
- * datapath should generally support multiple clients at once (e.g. so that one
- * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is
- * also running) but need not support multiple clients enabling upcalls at
- * once.
+ * datapath should generally support being opened multiple times (e.g. so that
+ * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd"
+ * is also running) but need not support more than one of these clients
+ * enabling upcalls at once.
*
*
* Upcall Queuing and Ordering
* PID in "action" upcalls is that dpif_port_get_pid() returns a constant value
* and all upcalls are appended to a single queue.
*
- * The ideal behavior is:
+ * The preferred behavior is:
*
* - Each port has a PID that identifies the queue used for "miss" upcalls
* on that port. (Thus, if each port has its own queue for "miss"
*
* - Upcalls that specify the "special" Netlink PID are queued separately.
*
+ * Multiple threads may want to read upcalls simultaneously from a single
+ * datapath. To support multiple threads well, one extends the above preferred
+ * behavior:
+ *
+ * - Each port has multiple PIDs. The datapath distributes "miss" upcalls
+ * across the PIDs, ensuring that a given flow is mapped in a stable way
+ * to a single PID.
+ *
+ * - For "action" upcalls, the thread can specify its own Netlink PID or
+ * other threads' Netlink PID of the same port for offloading purpose
+ * (e.g. in a "round robin" manner).
+ *
*
* Packet Format
* =============
struct dpif_port *);
int dpif_port_get_name(struct dpif *, odp_port_t port_no,
char *name, size_t name_size);
-uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
+uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
+ uint32_t hash);
struct dpif_port_dump {
const struct dpif *dpif;
};
int dpif_recv_set(struct dpif *, bool enable);
-int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *);
+int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
+int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
+ struct ofpbuf *);
void dpif_recv_purge(struct dpif *);
-void dpif_recv_wait(struct dpif *);
+void dpif_recv_wait(struct dpif *, uint32_t handler_id);
\f
/* Miscellaneous. */
udpif_set_threads(struct udpif *udpif, size_t n_handlers,
size_t n_revalidators)
{
- ovsrcu_quiesce_start();
+ int error;
+ ovsrcu_quiesce_start();
/* Stop the old threads (if any). */
if (udpif->handlers &&
(udpif->n_handlers != n_handlers
udpif->n_handlers = 0;
}
+ error = dpif_handlers_set(udpif->dpif, 1);
+ if (error) {
+ VLOG_ERR("failed to configure handlers in dpif %s: %s",
+ dpif_name(udpif->dpif), ovs_strerror(error));
+ return;
+ }
+
/* Start new threads (if necessary). */
if (!udpif->handlers && n_handlers) {
size_t i;
set_subprogram_name("dispatcher");
while (!latch_is_set(&udpif->exit_latch)) {
recv_upcalls(udpif);
- dpif_recv_wait(udpif->dpif);
+ dpif_recv_wait(udpif->dpif, 0);
latch_wait(&udpif->exit_latch);
poll_block();
}
upcall = xmalloc(sizeof *upcall);
ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
sizeof upcall->upcall_stub);
- error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
+ error = dpif_recv(udpif->dpif, 0, &upcall->dpif_upcall,
&upcall->upcall_buf);
if (error) {
/* upcall_destroy() can only be called on successfully received
port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
? ODPP_NONE
: odp_in_port;
- pid = dpif_port_get_pid(udpif->dpif, port);
+ pid = dpif_port_get_pid(udpif->dpif, port, 0);
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
}
actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port);
- pid = dpif_port_get_pid(xbridge->dpif, odp_port);
+ pid = dpif_port_get_pid(xbridge->dpif, odp_port, 0);
cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
nl_msg_end_nested(odp_actions, actions_offset);
ofpbuf_init(&actions, 64);
start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
- dpif_port_get_pid(backer->dpif, ODPP_NONE));
+ dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
nl_msg_end_nested(&actions, start);