/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include <config.h>
-#include "dpif.h"
+#include "dpif-provider.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
-#include <fcntl.h>
#include <inttypes.h>
-#include <net/if.h>
-#include <linux/rtnetlink.h>
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-#include <unistd.h>
#include "coverage.h"
#include "dynamic-string.h"
#include "flow.h"
+#include "netdev.h"
#include "netlink.h"
#include "odp-util.h"
#include "ofp-print.h"
+#include "ofp-util.h"
#include "ofpbuf.h"
#include "packets.h"
#include "poll-loop.h"
+#include "shash.h"
+#include "svec.h"
#include "util.h"
#include "valgrind.h"
-
#include "vlog.h"
-#define THIS_MODULE VLM_dpif
+
+VLOG_DEFINE_THIS_MODULE(dpif);
+
+COVERAGE_DEFINE(dpif_destroy);
+COVERAGE_DEFINE(dpif_port_add);
+COVERAGE_DEFINE(dpif_port_del);
+COVERAGE_DEFINE(dpif_flow_flush);
+COVERAGE_DEFINE(dpif_flow_get);
+COVERAGE_DEFINE(dpif_flow_put);
+COVERAGE_DEFINE(dpif_flow_del);
+COVERAGE_DEFINE(dpif_flow_query_list);
+COVERAGE_DEFINE(dpif_flow_query_list_n);
+COVERAGE_DEFINE(dpif_execute);
+COVERAGE_DEFINE(dpif_purge);
+
+static const struct dpif_class *base_dpif_classes[] = {
+#ifdef HAVE_NETLINK
+ &dpif_linux_class,
+#endif
+ &dpif_netdev_class,
+};
+
+struct registered_dpif_class {
+ const struct dpif_class *dpif_class;
+ int refcount;
+};
+static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
/* Rate limit for individual messages going to or from the datapath, output at
* DBG level. This is very high because, if these are enabled, it is because
static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
/* Not really much point in logging many dpif errors. */
-static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
-
-static int get_minor_from_name(const char *name, unsigned int *minor);
-static int name_to_minor(const char *name, unsigned int *minor);
-static int lookup_minor(const char *name, unsigned int *minor);
-static int open_by_minor(unsigned int minor, struct dpif *);
-static int make_openvswitch_device(unsigned int minor, char **fnp);
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
+static void log_operation(const struct dpif *, const char *operation,
+ int error);
+static void log_flow_operation(const struct dpif *, const char *operation,
+ int error, struct odp_flow *flow);
+static void log_flow_put(struct dpif *, int error,
+ const struct odp_flow_put *);
+static bool should_log_flow_message(int error);
static void check_rw_odp_flow(struct odp_flow *);
-int
-dpif_open(const char *name, struct dpif *dpif)
+static void
+dp_initialize(void)
{
- int listen_mask;
- int error;
+ static int status = -1;
- dpif->fd = -1;
+ if (status < 0) {
+ int i;
- error = name_to_minor(name, &dpif->minor);
- if (error) {
- return error;
+ status = 0;
+ for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
+ dp_register_provider(base_dpif_classes[i]);
+ }
}
+}
- error = open_by_minor(dpif->minor, dpif);
- if (error) {
- return error;
+/* Performs periodic work needed by all the various kinds of dpifs.
+ *
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_run() within its main poll loop. */
+void
+dp_run(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ if (registered_class->dpif_class->run) {
+ registered_class->dpif_class->run();
+ }
}
+}
- /* We can open the device, but that doesn't mean that it's been created.
- * If it hasn't been, then any command other than ODP_DP_CREATE will
- * return ENODEV. Try something innocuous. */
- listen_mask = 0; /* Make Valgrind happy. */
- if (ioctl(dpif->fd, ODP_GET_LISTEN_MASK, &listen_mask)) {
- error = errno;
- if (error != ENODEV) {
- VLOG_WARN("dp%u: probe returned unexpected error: %s",
- dpif->minor, strerror(error));
+/* Arranges for poll_block() to wake up when dp_run() needs to be called.
+ *
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_wait() within its main poll loop. */
+void
+dp_wait(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ if (registered_class->dpif_class->wait) {
+ registered_class->dpif_class->wait();
}
- dpif_close(dpif);
- return error;
}
+}
+
+/* Registers a new datapath provider. After successful registration, new
+ * datapaths of that type can be opened using dpif_open(). */
+int
+dp_register_provider(const struct dpif_class *new_class)
+{
+ struct registered_dpif_class *registered_class;
+
+ if (shash_find(&dpif_classes, new_class->type)) {
+ VLOG_WARN("attempted to register duplicate datapath provider: %s",
+ new_class->type);
+ return EEXIST;
+ }
+
+ registered_class = xmalloc(sizeof *registered_class);
+ registered_class->dpif_class = new_class;
+ registered_class->refcount = 0;
+
+ shash_add(&dpif_classes, new_class->type, registered_class);
+
return 0;
}
-void
-dpif_close(struct dpif *dpif)
+/* Unregisters a datapath provider. 'type' must have been previously
+ * registered and not currently be in use by any dpifs. After unregistration
+ * new datapaths of that type cannot be opened using dpif_open(). */
+int
+dp_unregister_provider(const char *type)
{
- if (dpif) {
- close(dpif->fd);
- dpif->fd = -1;
+ struct shash_node *node;
+ struct registered_dpif_class *registered_class;
+
+ node = shash_find(&dpif_classes, type);
+ if (!node) {
+ VLOG_WARN("attempted to unregister a datapath provider that is not "
+ "registered: %s", type);
+ return EAFNOSUPPORT;
}
+
+ registered_class = node->data;
+ if (registered_class->refcount) {
+ VLOG_WARN("attempted to unregister in use datapath provider: %s", type);
+ return EBUSY;
+ }
+
+ shash_delete(&dpif_classes, node);
+ free(registered_class);
+
+ return 0;
}
-static int
-do_ioctl(const struct dpif *dpif, int cmd, const char *cmd_name,
- const void *arg)
+/* Clears 'types' and enumerates the types of all currently registered datapath
+ * providers into it. The caller must first initialize the svec. */
+void
+dp_enumerate_types(struct svec *types)
{
- int error = ioctl(dpif->fd, cmd, arg) ? errno : 0;
- if (cmd_name) {
- if (error) {
- VLOG_WARN_RL(&error_rl, "dp%u: ioctl(%s) failed (%s)",
- dpif->minor, cmd_name, strerror(error));
- } else {
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: ioctl(%s): success",
- dpif->minor, cmd_name);
- }
+ struct shash_node *node;
+
+ dp_initialize();
+ svec_clear(types);
+
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ svec_add(types, registered_class->dpif_class->type);
}
- return error;
}
+/* Clears 'names' and enumerates the names of all known created datapaths with
+ * the given 'type'. The caller must first initialize the svec. Returns 0 if
+ * successful, otherwise a positive errno value.
+ *
+ * Some kinds of datapaths might not be practically enumerable. This is not
+ * considered an error. */
int
-dpif_create(const char *name, struct dpif *dpif)
+dp_enumerate_names(const char *type, struct svec *names)
{
- unsigned int minor;
+ const struct registered_dpif_class *registered_class;
+ const struct dpif_class *dpif_class;
int error;
- if (!get_minor_from_name(name, &minor)) {
- /* Minor was specified in 'name', go ahead and create it. */
- error = open_by_minor(minor, dpif);
- if (error) {
- return error;
- }
+ dp_initialize();
+ svec_clear(names);
- if (!strncmp(name, "nl:", 3)) {
- char devname[128];
- sprintf(devname, "of%u", minor);
- error = ioctl(dpif->fd, ODP_DP_CREATE, devname) < 0 ? errno : 0;
- } else {
- error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
- }
- if (error) {
- dpif_close(dpif);
- }
- return error;
- } else {
- for (minor = 0; minor < ODP_MAX; minor++) {
- error = open_by_minor(minor, dpif);
- if (error) {
- return error;
- }
-
- error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
- if (!error) {
- return 0;
- }
- dpif_close(dpif);
- if (error != EBUSY) {
- return error;
- }
- }
- return ENOBUFS;
+ registered_class = shash_find_data(&dpif_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not enumerate unknown type: %s", type);
+ return EAFNOSUPPORT;
+ }
+
+ dpif_class = registered_class->dpif_class;
+ error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0;
+
+ if (error) {
+ VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type,
+ strerror(error));
}
+
+ return error;
}
-int
-dpif_delete(struct dpif *dpif)
+/* Parses 'datapath name', which is of the form type@name into its
+ * component pieces. 'name' and 'type' must be freed by the caller. */
+void
+dp_parse_name(const char *datapath_name_, char **name, char **type)
{
- COVERAGE_INC(dpif_destroy);
- return do_ioctl(dpif, ODP_DP_DESTROY, "ODP_DP_DESTROY", NULL);
+ char *datapath_name = xstrdup(datapath_name_);
+ char *separator;
+
+ separator = strchr(datapath_name, '@');
+ if (separator) {
+ *separator = '\0';
+ *type = datapath_name;
+ *name = xstrdup(separator + 1);
+ } else {
+ *name = datapath_name;
+ *type = NULL;
+ }
}
-int
-dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
+static int
+do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
{
- memset(stats, 0, sizeof *stats);
- return do_ioctl(dpif, ODP_DP_STATS, "ODP_DP_STATS", stats);
+ struct dpif *dpif = NULL;
+ int error;
+ struct registered_dpif_class *registered_class;
+
+ dp_initialize();
+
+ if (!type || *type == '\0') {
+ type = "system";
+ }
+
+ registered_class = shash_find_data(&dpif_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not create datapath %s of unknown type %s", name,
+ type);
+ error = EAFNOSUPPORT;
+ goto exit;
+ }
+
+ error = registered_class->dpif_class->open(registered_class->dpif_class,
+ name, create, &dpif);
+ if (!error) {
+ assert(dpif->dpif_class == registered_class->dpif_class);
+ registered_class->refcount++;
+ }
+
+exit:
+ *dpifp = error ? NULL : dpif;
+ return error;
}
+/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
+ * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
+ * the empty string to specify the default system type. Returns 0 if
+ * successful, otherwise a positive errno value. On success stores a pointer
+ * to the datapath in '*dpifp', otherwise a null pointer. */
int
-dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
+dpif_open(const char *name, const char *type, struct dpif **dpifp)
{
- int tmp;
- int error = do_ioctl(dpif, ODP_GET_DROP_FRAGS, "ODP_GET_DROP_FRAGS", &tmp);
- *drop_frags = error ? tmp & 1 : false;
- return error;
+ return do_open(name, type, false, dpifp);
}
+/* Tries to create and open a new datapath with the given 'name' and 'type'.
+ * 'type' may be either NULL or the empty string to specify the default system
+ * type. Will fail if a datapath with 'name' and 'type' already exists.
+ * Returns 0 if successful, otherwise a positive errno value. On success
+ * stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */
int
-dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
+dpif_create(const char *name, const char *type, struct dpif **dpifp)
{
- int tmp = drop_frags;
- return do_ioctl(dpif, ODP_SET_DROP_FRAGS, "ODP_SET_DROP_FRAGS", &tmp);
+ return do_open(name, type, true, dpifp);
}
+/* Tries to open a datapath with the given 'name' and 'type', creating it if it
+ * does not exist. 'type' may be either NULL or the empty string to specify
+ * the default system type. Returns 0 if successful, otherwise a positive
+ * errno value. On success stores a pointer to the datapath in '*dpifp',
+ * otherwise a null pointer. */
int
-dpif_get_listen_mask(const struct dpif *dpif, int *listen_mask)
+dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
{
- int error = do_ioctl(dpif, ODP_GET_LISTEN_MASK, "ODP_GET_LISTEN_MASK",
- listen_mask);
- if (error) {
- *listen_mask = 0;
+ int error;
+
+ error = dpif_create(name, type, dpifp);
+ if (error == EEXIST || error == EBUSY) {
+ error = dpif_open(name, type, dpifp);
+ if (error) {
+ VLOG_WARN("datapath %s already exists but cannot be opened: %s",
+ name, strerror(error));
+ }
+ } else if (error) {
+ VLOG_WARN("failed to create datapath %s: %s", name, strerror(error));
}
return error;
}
+/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
+ * itself; call dpif_delete() first, instead, if that is desirable. */
+void
+dpif_close(struct dpif *dpif)
+{
+ if (dpif) {
+ struct registered_dpif_class *registered_class;
+
+ registered_class = shash_find_data(&dpif_classes,
+ dpif->dpif_class->type);
+ assert(registered_class);
+ assert(registered_class->refcount);
+
+ registered_class->refcount--;
+ dpif_uninit(dpif, true);
+ }
+}
+
+/* Returns the name of datapath 'dpif' prefixed with the type
+ * (for use in log messages). */
+const char *
+dpif_name(const struct dpif *dpif)
+{
+ return dpif->full_name;
+}
+
+/* Returns the name of datapath 'dpif' without the type
+ * (for use in device names). */
+const char *
+dpif_base_name(const struct dpif *dpif)
+{
+ return dpif->base_name;
+}
+
+/* Enumerates all names that may be used to open 'dpif' into 'all_names'. The
+ * Linux datapath, for example, supports opening a datapath both by number,
+ * e.g. "dp0", and by the name of the datapath's local port. For some
+ * datapaths, this might be an infinite set (e.g. in a file name, slashes may
+ * be duplicated any number of times), in which case only the names most likely
+ * to be used will be enumerated.
+ *
+ * The caller must already have initialized 'all_names'. Any existing names in
+ * 'all_names' will not be disturbed. */
int
-dpif_set_listen_mask(struct dpif *dpif, int listen_mask)
+dpif_get_all_names(const struct dpif *dpif, struct svec *all_names)
{
- return do_ioctl(dpif, ODP_SET_LISTEN_MASK, "ODP_SET_LISTEN_MASK",
- &listen_mask);
+ if (dpif->dpif_class->get_all_names) {
+ int error = dpif->dpif_class->get_all_names(dpif, all_names);
+ if (error) {
+ VLOG_WARN_RL(&error_rl,
+ "failed to retrieve names for datpath %s: %s",
+ dpif_name(dpif), strerror(error));
+ }
+ return error;
+ } else {
+ svec_add(all_names, dpif_base_name(dpif));
+ return 0;
+ }
}
+
+/* Destroys the datapath that 'dpif' is connected to, first removing all of its
+ * ports. After calling this function, it does not make sense to pass 'dpif'
+ * to any functions other than dpif_name() or dpif_close(). */
int
-dpif_purge(struct dpif *dpif)
+dpif_delete(struct dpif *dpif)
{
- struct odp_stats stats;
- unsigned int i;
int error;
- COVERAGE_INC(dpif_purge);
+ COVERAGE_INC(dpif_destroy);
- error = dpif_get_dp_stats(dpif, &stats);
+ error = dpif->dpif_class->destroy(dpif);
+ log_operation(dpif, "delete", error);
+ return error;
+}
+
+/* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful,
+ * otherwise a positive errno value. */
+int
+dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
+{
+ int error = dpif->dpif_class->get_stats(dpif, stats);
if (error) {
- return error;
+ memset(stats, 0, sizeof *stats);
}
+ log_operation(dpif, "get_stats", error);
+ return error;
+}
- for (i = 0; i < stats.max_miss_queue + stats.max_action_queue; i++) {
- struct ofpbuf *buf;
- error = dpif_recv(dpif, &buf);
- if (error) {
- return error == EAGAIN ? 0 : error;
- }
- ofpbuf_delete(buf);
+/* Retrieves the current IP fragment handling policy for 'dpif' into
+ * '*drop_frags': true indicates that fragments are dropped, false indicates
+ * that fragments are treated in the same way as other IP packets (except that
+ * the L4 header cannot be read). Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
+{
+ int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
+ if (error) {
+ *drop_frags = false;
}
- return 0;
+ log_operation(dpif, "get_drop_frags", error);
+ return error;
}
+/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is
+ * the same as for the get_drop_frags member function. Returns 0 if
+ * successful, otherwise a positive errno value. */
int
-dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no,
- uint16_t flags)
+dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
{
- struct odp_port port;
+ int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
+ log_operation(dpif, "set_drop_frags", error);
+ return error;
+}
+
+/* Attempts to add 'netdev' as a port on 'dpif'. If successful, returns 0 and
+ * sets '*port_nop' to the new port's port number (if 'port_nop' is non-null).
+ * On failure, returns a positive errno value and sets '*port_nop' to
+ * UINT16_MAX (if 'port_nop' is non-null). */
+int
+dpif_port_add(struct dpif *dpif, struct netdev *netdev, uint16_t *port_nop)
+{
+ const char *netdev_name = netdev_get_name(netdev);
+ uint16_t port_no;
+ int error;
COVERAGE_INC(dpif_port_add);
- memset(&port, 0, sizeof port);
- strncpy(port.devname, devname, sizeof port.devname);
- port.port = port_no;
- port.flags = flags;
- if (!ioctl(dpif->fd, ODP_PORT_ADD, &port)) {
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: added %s as port %"PRIu16,
- dpif->minor, devname, port_no);
- return 0;
+
+ error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16,
+ dpif_name(dpif), netdev_name, port_no);
} else {
- VLOG_WARN_RL(&error_rl, "dp%u: failed to add %s as port "
- "%"PRIu16": %s", dpif->minor, devname, port_no,
- strerror(errno));
- return errno;
+ VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
+ dpif_name(dpif), netdev_name, strerror(error));
+ port_no = UINT16_MAX;
+ }
+ if (port_nop) {
+ *port_nop = port_no;
}
+ return error;
}
+/* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful,
+ * otherwise a positive errno value. */
int
dpif_port_del(struct dpif *dpif, uint16_t port_no)
{
- int tmp = port_no;
+ int error;
+
COVERAGE_INC(dpif_port_del);
- return do_ioctl(dpif, ODP_PORT_DEL, "ODP_PORT_DEL", &tmp);
+
+ error = dpif->dpif_class->port_del(dpif, port_no);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: port_del(%"PRIu16")",
+ dpif_name(dpif), port_no);
+ } else {
+ log_operation(dpif, "port_del", error);
+ }
+ return error;
}
+/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and
+ * initializes '*port' appropriately; on failure, returns a positive errno
+ * value. */
int
dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
struct odp_port *port)
{
- memset(port, 0, sizeof *port);
- port->port = port_no;
- if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) {
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: port %"PRIu16" is device %s",
- dpif->minor, port_no, port->devname);
- return 0;
+ int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu16" is device %s",
+ dpif_name(dpif), port_no, port->devname);
} else {
- VLOG_WARN_RL(&error_rl, "dp%u: failed to query port %"PRIu16": %s",
- dpif->minor, port_no, strerror(errno));
- return errno;
+ memset(port, 0, sizeof *port);
+ VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s",
+ dpif_name(dpif), port_no, strerror(error));
}
+ return error;
}
+/* Looks up port named 'devname' in 'dpif'. On success, returns 0 and
+ * initializes '*port' appropriately; on failure, returns a positive errno
+ * value. */
int
dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
struct odp_port *port)
{
- memset(port, 0, sizeof *port);
- strncpy(port->devname, devname, sizeof port->devname);
- if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) {
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: device %s is on port %"PRIu16,
- dpif->minor, devname, port->port);
- return 0;
+ int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu16,
+ dpif_name(dpif), devname, port->port);
} else {
- VLOG_WARN_RL(&error_rl, "dp%u: failed to query port %s: %s",
- dpif->minor, devname, strerror(errno));
- return errno;
+ memset(port, 0, sizeof *port);
+
+ /* Log level is DBG here because all the current callers are interested
+ * in whether 'dpif' actually has a port 'devname', so that it's not an
+ * issue worth logging if it doesn't. */
+ VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s",
+ dpif_name(dpif), devname, strerror(error));
}
+ return error;
}
+/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies
+ * the port's name into the 'name_size' bytes in 'name', ensuring that the
+ * result is null-terminated. On failure, returns a positive errno value and
+ * makes 'name' the empty string. */
int
dpif_port_get_name(struct dpif *dpif, uint16_t port_no,
char *name, size_t name_size)
return error;
}
+/* Obtains a list of all the ports in 'dpif'.
+ *
+ * If successful, returns 0 and sets '*portsp' to point to an array of
+ * appropriately initialized port structures and '*n_portsp' to the number of
+ * ports in the array. The caller is responsible for freeing '*portp' by
+ * calling free().
+ *
+ * On failure, returns a positive errno value and sets '*portsp' to NULL and
+ * '*n_portsp' to 0. */
int
dpif_port_list(const struct dpif *dpif,
- struct odp_port **ports, size_t *n_ports)
+ struct odp_port **portsp, size_t *n_portsp)
{
- struct odp_portvec pv;
- struct odp_stats stats;
+ struct odp_port *ports;
+ size_t n_ports = 0;
int error;
- do {
+ for (;;) {
+ struct odp_stats stats;
+ int retval;
+
error = dpif_get_dp_stats(dpif, &stats);
if (error) {
- goto error;
+ goto exit;
}
- *ports = xcalloc(1, stats.n_ports * sizeof **ports);
- pv.ports = *ports;
- pv.n_ports = stats.n_ports;
- error = do_ioctl(dpif, ODP_PORT_LIST, "ODP_PORT_LIST", &pv);
- if (error) {
- free(*ports);
- goto error;
+ ports = xcalloc(stats.n_ports, sizeof *ports);
+ retval = dpif->dpif_class->port_list(dpif, ports, stats.n_ports);
+ if (retval < 0) {
+ /* Hard error. */
+ error = -retval;
+ free(ports);
+ goto exit;
+ } else if (retval <= stats.n_ports) {
+ /* Success. */
+ error = 0;
+ n_ports = retval;
+ goto exit;
+ } else {
+ /* Soft error: port count increased behind our back. Try again. */
+ free(ports);
}
- } while (pv.n_ports != stats.n_ports);
- *n_ports = pv.n_ports;
- return 0;
+ }
-error:
- *ports = NULL;
- *n_ports = 0;
+exit:
+ if (error) {
+ *portsp = NULL;
+ *n_portsp = 0;
+ } else {
+ *portsp = ports;
+ *n_portsp = n_ports;
+ }
+ log_operation(dpif, "port_list", error);
return error;
}
+/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
+ * 'dpif' has changed, this function does one of the following:
+ *
+ * - Stores the name of the device that was added to or deleted from 'dpif' in
+ * '*devnamep' and returns 0. The caller is responsible for freeing
+ * '*devnamep' (with free()) when it no longer needs it.
+ *
+ * - Returns ENOBUFS and sets '*devnamep' to NULL.
+ *
+ * This function may also return 'false positives', where it returns 0 and
+ * '*devnamep' names a device that was not actually added or deleted or it
+ * returns ENOBUFS without any change.
+ *
+ * Returns EAGAIN if the set of ports in 'dpif' has not changed. May also
+ * return other positive errno values to indicate that something has gone
+ * wrong. */
int
-dpif_port_group_set(struct dpif *dpif, uint16_t group,
- const uint16_t ports[], size_t n_ports)
+dpif_port_poll(const struct dpif *dpif, char **devnamep)
{
- struct odp_port_group pg;
-
- COVERAGE_INC(dpif_port_group_set);
- assert(n_ports <= UINT16_MAX);
- pg.group = group;
- pg.ports = (uint16_t *) ports;
- pg.n_ports = n_ports;
- return do_ioctl(dpif, ODP_PORT_GROUP_SET, "ODP_PORT_GROUP_SET", &pg);
+ int error = dpif->dpif_class->port_poll(dpif, devnamep);
+ if (error) {
+ *devnamep = NULL;
+ }
+ return error;
}
-/* Careful: '*n_out' can be greater than 'n_ports' on return, if 'n_ports' is
- * less than the number of ports in 'group'. */
-int
-dpif_port_group_get(const struct dpif *dpif, uint16_t group,
- uint16_t ports[], size_t n_ports, size_t *n_out)
+/* Arranges for the poll loop to wake up when port_poll(dpif) will return a
+ * value other than EAGAIN. */
+void
+dpif_port_poll_wait(const struct dpif *dpif)
{
- struct odp_port_group pg;
- int error;
-
- assert(n_ports <= UINT16_MAX);
- pg.group = group;
- pg.ports = ports;
- pg.n_ports = n_ports;
- error = do_ioctl(dpif, ODP_PORT_GROUP_GET, "ODP_PORT_GROUP_GET", &pg);
- *n_out = error ? 0 : pg.n_ports;
- return error;
+ dpif->dpif_class->port_poll_wait(dpif);
}
+/* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a
+ * positive errno value. */
int
dpif_flow_flush(struct dpif *dpif)
{
+ int error;
+
COVERAGE_INC(dpif_flow_flush);
- return do_ioctl(dpif, ODP_FLOW_FLUSH, "ODP_FLOW_FLUSH", NULL);
-}
-static enum vlog_level
-flow_message_log_level(int error)
-{
- return error ? VLL_WARN : VLL_DBG;
+ error = dpif->dpif_class->flow_flush(dpif);
+ log_operation(dpif, "flow_flush", error);
+ return error;
}
-static bool
-should_log_flow_message(int error)
+/* Queries 'dpif' for a flow entry matching 'flow->key'.
+ *
+ * If a flow matching 'flow->key' exists in 'dpif', stores statistics for the
+ * flow into 'flow->stats'. If 'flow->actions_len' is zero, then
+ * 'flow->actions' is ignored. If 'flow->actions_len' is nonzero, then
+ * 'flow->actions' should point to an array of the specified number of bytes.
+ * At most that many bytes of the flow's actions will be copied into that
+ * array. 'flow->actions_len' will be updated to the number of bytes of
+ * actions actually present in the flow, which may be greater than the amount
+ * stored if the flow has more actions than space available in the array.
+ *
+ * If no flow matching 'flow->key' exists in 'dpif', returns ENOENT. On other
+ * failure, returns a positive errno value. */
+int
+dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
{
- return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
- error ? &error_rl : &dpmsg_rl);
-}
+ int error;
-static void
-log_flow_message(const struct dpif *dpif, int error,
- const char *operation,
- const flow_t *flow, const struct odp_flow_stats *stats,
- const union odp_action *actions, size_t n_actions)
-{
- struct ds ds = DS_EMPTY_INITIALIZER;
- ds_put_format(&ds, "dp%u: ", dpif->minor);
- if (error) {
- ds_put_cstr(&ds, "failed to ");
+ COVERAGE_INC(dpif_flow_get);
+
+ check_rw_odp_flow(flow);
+ error = dpif->dpif_class->flow_get(dpif, flow, 1);
+ if (!error) {
+ error = flow->stats.error;
}
- ds_put_format(&ds, "%s ", operation);
if (error) {
- ds_put_format(&ds, "(%s) ", strerror(error));
+ /* Make the results predictable on error. */
+ memset(&flow->stats, 0, sizeof flow->stats);
+ flow->actions_len = 0;
}
- flow_format(&ds, flow);
- if (stats) {
- ds_put_cstr(&ds, ", ");
- format_odp_flow_stats(&ds, stats);
- }
- if (actions || n_actions) {
- ds_put_cstr(&ds, ", actions:");
- format_odp_actions(&ds, actions, n_actions);
+ if (should_log_flow_message(error)) {
+ log_flow_operation(dpif, "flow_get", error, flow);
}
- vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
- ds_destroy(&ds);
+ return error;
}
-static int
-do_flow_ioctl(const struct dpif *dpif, int cmd, struct odp_flow *flow,
- const char *operation, bool show_stats)
+/* For each flow 'flow' in the 'n' flows in 'flows':
+ *
+ * - If a flow matching 'flow->key' exists in 'dpif':
+ *
+ * Stores 0 into 'flow->stats.error' and stores statistics for the flow
+ * into 'flow->stats'.
+ *
+ * If 'flow->actions_len' is zero, then 'flow->actions' is ignored. If
+ * 'flow->actions_len' is nonzero, then 'flow->actions' should point to an
+ * array of the specified number of bytes. At most that amount of flow's
+ * actions will be copied into that array. 'flow->actions_len' will be
+ * updated to the number of bytes of actions actually present in the flow,
+ * which may be greater than the amount stored if the flow's actions are
+ * longer than the available space.
+ *
+ * - Flow-specific errors are indicated by a positive errno value in
+ * 'flow->stats.error'. In particular, ENOENT indicates that no flow
+ * matching 'flow->key' exists in 'dpif'. When an error value is stored, the
+ * contents of 'flow->key' are preserved but other members of 'flow' should
+ * be treated as indeterminate.
+ *
+ * Returns 0 if all 'n' flows in 'flows' were updated (whether they were
+ * individually successful or not is indicated by 'flow->stats.error',
+ * however). Returns a positive errno value if an error that prevented this
+ * update occurred, in which the caller must not depend on any elements in
+ * 'flows' being updated or not updated.
+ */
+int
+dpif_flow_get_multiple(const struct dpif *dpif,
+ struct odp_flow flows[], size_t n)
{
- int error = do_ioctl(dpif, cmd, NULL, flow);
- if (error && show_stats) {
- flow->n_actions = 0;
- }
- if (should_log_flow_message(error)) {
- log_flow_message(dpif, error, operation, &flow->key,
- show_stats && !error ? &flow->stats : NULL,
- flow->actions, flow->n_actions);
+ int error;
+ size_t i;
+
+ COVERAGE_ADD(dpif_flow_get, n);
+
+ for (i = 0; i < n; i++) {
+ check_rw_odp_flow(&flows[i]);
}
+
+ error = dpif->dpif_class->flow_get(dpif, flows, n);
+ log_operation(dpif, "flow_get_multiple", error);
return error;
}
+/* Adds or modifies a flow in 'dpif' as specified in 'put':
+ *
+ * - If the flow specified in 'put->flow' does not exist in 'dpif', then
+ * behavior depends on whether ODPPF_CREATE is specified in 'put->flags': if
+ * it is, the flow will be added, otherwise the operation will fail with
+ * ENOENT.
+ *
+ * - Otherwise, the flow specified in 'put->flow' does exist in 'dpif'.
+ * Behavior in this case depends on whether ODPPF_MODIFY is specified in
+ * 'put->flags': if it is, the flow's actions will be updated, otherwise the
+ * operation will fail with EEXIST. If the flow's actions are updated, then
+ * its statistics will be zeroed if ODPPF_ZERO_STATS is set in 'put->flags',
+ * left as-is otherwise.
+ *
+ * Returns 0 if successful, otherwise a positive errno value.
+ */
int
dpif_flow_put(struct dpif *dpif, struct odp_flow_put *put)
{
- int error = do_ioctl(dpif, ODP_FLOW_PUT, NULL, put);
+ int error;
+
COVERAGE_INC(dpif_flow_put);
+
+ error = dpif->dpif_class->flow_put(dpif, put);
if (should_log_flow_message(error)) {
- struct ds operation = DS_EMPTY_INITIALIZER;
- ds_put_cstr(&operation, "put");
- if (put->flags & ODPPF_CREATE) {
- ds_put_cstr(&operation, "[create]");
- }
- if (put->flags & ODPPF_MODIFY) {
- ds_put_cstr(&operation, "[modify]");
- }
- if (put->flags & ODPPF_ZERO_STATS) {
- ds_put_cstr(&operation, "[zero]");
- }
-#define ODPPF_ALL (ODPPF_CREATE | ODPPF_MODIFY | ODPPF_ZERO_STATS)
- if (put->flags & ~ODPPF_ALL) {
- ds_put_format(&operation, "[%x]", put->flags & ~ODPPF_ALL);
- }
- log_flow_message(dpif, error, ds_cstr(&operation), &put->flow.key,
- !error ? &put->flow.stats : NULL,
- put->flow.actions, put->flow.n_actions);
- ds_destroy(&operation);
+ log_flow_put(dpif, error, put);
}
return error;
}
+/* Deletes a flow matching 'flow->key' from 'dpif' or returns ENOENT if 'dpif'
+ * does not contain such a flow.
+ *
+ * If successful, updates 'flow->stats', 'flow->actions_len', and
+ * 'flow->actions' as described for dpif_flow_get(). */
int
dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
{
+ int error;
+
COVERAGE_INC(dpif_flow_del);
+
check_rw_odp_flow(flow);
memset(&flow->stats, 0, sizeof flow->stats);
- return do_flow_ioctl(dpif, ODP_FLOW_DEL, flow, "delete flow", true);
+
+ error = dpif->dpif_class->flow_del(dpif, flow);
+ if (should_log_flow_message(error)) {
+ log_flow_operation(dpif, "delete flow", error, flow);
+ }
+ return error;
}
-int
-dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
+/* Initializes 'dump' to begin dumping the flows in a dpif.
+ *
+ * This function provides no status indication. An error status for the entire
+ * dump operation is provided when it is completed by calling
+ * dpif_flow_dump_done().
+ */
+void
+dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif)
{
- COVERAGE_INC(dpif_flow_query);
- check_rw_odp_flow(flow);
- memset(&flow->stats, 0, sizeof flow->stats);
- return do_flow_ioctl(dpif, ODP_FLOW_GET, flow, "get flow", true);
+ dump->dpif = dpif;
+ dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state);
+ log_operation(dpif, "flow_dump_start", dump->error);
}
-int
-dpif_flow_get_multiple(const struct dpif *dpif,
- struct odp_flow flows[], size_t n)
+/* Attempts to retrieve another flow from 'dump', which must have been
+ * initialized with dpif_flow_dump_start(). On success, stores a new odp_flow
+ * into 'flow' and returns true. Failure might indicate an actual error or
+ * merely the end of the flow table. An error status for the entire dump
+ * operation is provided when it is completed by calling dpif_flow_dump_done().
+ *
+ * Dumping flow actions is optional. To avoid dumping actions initialize
+ * 'flow->actions' to NULL and 'flow->actions_len' to 0. Otherwise, point
+ * 'flow->actions' to an array of struct nlattr and initialize
+ * 'flow->actions_len' with the number of bytes of Netlink attributes.
+ * dpif_flow_dump_next() will fill in as many actions as will fit into the
+ * provided array and update 'flow->actions_len' with the number of bytes
+ * required (regardless of whether they fit in the provided space). */
+bool
+dpif_flow_dump_next(struct dpif_flow_dump *dump, struct odp_flow *flow)
{
- struct odp_flowvec fv;
- size_t i;
+ const struct dpif *dpif = dump->dpif;
- COVERAGE_ADD(dpif_flow_query_multiple, n);
- fv.flows = flows;
- fv.n_flows = n;
- for (i = 0; i < n; i++) {
- check_rw_odp_flow(&flows[i]);
- }
- return do_ioctl(dpif, ODP_FLOW_GET_MULTIPLE, "ODP_FLOW_GET_MULTIPLE",
- &fv);
-}
+ check_rw_odp_flow(flow);
-int
-dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
- size_t *n_out)
-{
- struct odp_flowvec fv;
- uint32_t i;
- int error;
+ if (dump->error) {
+ return false;
+ }
- COVERAGE_INC(dpif_flow_query_list);
- fv.flows = flows;
- fv.n_flows = n;
- if (RUNNING_ON_VALGRIND) {
- memset(flows, 0, n * sizeof *flows);
+ dump->error = dpif->dpif_class->flow_dump_next(dpif, dump->state, flow);
+ if (dump->error == EOF) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
} else {
- for (i = 0; i < n; i++) {
- flows[i].actions = NULL;
- flows[i].n_actions = 0;
+ if (should_log_flow_message(dump->error)) {
+ log_flow_operation(dpif, "flow_dump_next", dump->error, flow);
}
}
- error = do_ioctl(dpif, ODP_FLOW_LIST, NULL, &fv);
- if (error) {
- *n_out = 0;
- VLOG_WARN_RL(&error_rl, "dp%u: flow list failed (%s)",
- dpif->minor, strerror(error));
- } else {
- COVERAGE_ADD(dpif_flow_query_list_n, fv.n_flows);
- *n_out = fv.n_flows;
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: listed %zu flows", dpif->minor, *n_out);
+
+ if (dump->error) {
+ dpif->dpif_class->flow_dump_done(dpif, dump->state);
+ return false;
}
- return error;
+ return true;
}
+/* Completes flow table dump operation 'dump', which must have been initialized
+ * with dpif_flow_dump_start(). Returns 0 if the dump operation was
+ * error-free, otherwise a positive errno value describing the problem. */
int
-dpif_flow_list_all(const struct dpif *dpif,
- struct odp_flow **flowsp, size_t *np)
+dpif_flow_dump_done(struct dpif_flow_dump *dump)
{
- struct odp_stats stats;
- struct odp_flow *flows;
- size_t n_flows;
- int error;
-
- *flowsp = NULL;
- *np = 0;
-
- error = dpif_get_dp_stats(dpif, &stats);
- if (error) {
- return error;
+ const struct dpif *dpif = dump->dpif;
+ if (!dump->error) {
+ dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state);
+ log_operation(dpif, "flow_dump_done", dump->error);
}
-
- flows = xmalloc(sizeof *flows * stats.n_flows);
- error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
- if (error) {
- free(flows);
- return error;
- }
-
- if (stats.n_flows != n_flows) {
- VLOG_WARN_RL(&error_rl, "dp%u: datapath stats reported %"PRIu32" "
- "flows but flow listing reported %zu",
- dpif->minor, stats.n_flows, n_flows);
- }
- *flowsp = flows;
- *np = n_flows;
- return 0;
+ return dump->error == EOF ? 0 : dump->error;
}
+/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
+ * the Ethernet frame specified in 'packet'.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. */
int
-dpif_execute(struct dpif *dpif, uint16_t in_port,
- const union odp_action actions[], size_t n_actions,
+dpif_execute(struct dpif *dpif,
+ const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *buf)
{
int error;
COVERAGE_INC(dpif_execute);
- if (n_actions > 0) {
- struct odp_execute execute;
- memset(&execute, 0, sizeof execute);
- execute.in_port = in_port;
- execute.actions = (union odp_action *) actions;
- execute.n_actions = n_actions;
- execute.data = buf->data;
- execute.length = buf->size;
- error = do_ioctl(dpif, ODP_EXECUTE, NULL, &execute);
+ if (actions_len > 0) {
+ error = dpif->dpif_class->execute(dpif, actions, actions_len, buf);
} else {
error = 0;
}
if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))) {
struct ds ds = DS_EMPTY_INITIALIZER;
char *packet = ofp_packet_to_string(buf->data, buf->size, buf->size);
- ds_put_format(&ds, "dp%u: execute ", dpif->minor);
- format_odp_actions(&ds, actions, n_actions);
+ ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
+ format_odp_actions(&ds, actions, actions_len);
if (error) {
ds_put_format(&ds, " failed (%s)", strerror(error));
}
return error;
}
+/* Retrieves 'dpif''s "listen mask" into '*listen_mask'. Each ODPL_* bit set
+ * in '*listen_mask' indicates that dpif_recv() will receive messages of that
+ * type. Returns 0 if successful, otherwise a positive errno value. */
int
-dpif_recv(struct dpif *dpif, struct ofpbuf **bufp)
+dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask)
{
- struct ofpbuf *buf;
- int retval;
- int error;
-
- buf = ofpbuf_new(65536);
- retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
- if (retval < 0) {
- error = errno;
- if (error != EAGAIN) {
- VLOG_WARN_RL(&error_rl, "dp%u: read failed: %s",
- dpif->minor, strerror(error));
- }
- } else if (retval >= sizeof(struct odp_msg)) {
- struct odp_msg *msg = buf->data;
- if (msg->length <= retval) {
- buf->size += retval;
- if (VLOG_IS_DBG_ENABLED()) {
- void *payload = msg + 1;
- size_t length = buf->size - sizeof *msg;
- char *s = ofp_packet_to_string(payload, length, length);
- VLOG_DBG_RL(&dpmsg_rl, "dp%u: received %s message of length "
- "%zu on port %"PRIu16": %s", dpif->minor,
- (msg->type == _ODPL_MISS_NR ? "miss"
- : msg->type == _ODPL_ACTION_NR ? "action"
- : "<unknown>"),
- msg->length - sizeof(struct odp_msg),
- msg->port, s);
- free(s);
- }
- *bufp = buf;
- COVERAGE_INC(dpif_recv);
- return 0;
- } else {
- VLOG_WARN_RL(&error_rl, "dp%u: discarding message truncated "
- "from %zu bytes to %d",
- dpif->minor, msg->length, retval);
- error = ERANGE;
- }
- } else if (!retval) {
- VLOG_WARN_RL(&error_rl, "dp%u: unexpected end of file", dpif->minor);
- error = EPROTO;
- } else {
- VLOG_WARN_RL(&error_rl,
- "dp%u: discarding too-short message (%d bytes)",
- dpif->minor, retval);
- error = ERANGE;
+ int error = dpif->dpif_class->recv_get_mask(dpif, listen_mask);
+ if (error) {
+ *listen_mask = 0;
}
-
- *bufp = NULL;
- ofpbuf_delete(buf);
+ log_operation(dpif, "recv_get_mask", error);
return error;
}
-void
-dpif_recv_wait(struct dpif *dpif)
+/* Sets 'dpif''s "listen mask" to 'listen_mask'. Each ODPL_* bit set in
+ * '*listen_mask' requests that dpif_recv() receive messages of that type.
+ * Returns 0 if successful, otherwise a positive errno value. */
+int
+dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
{
- poll_fd_wait(dpif->fd, POLLIN);
+ int error = dpif->dpif_class->recv_set_mask(dpif, listen_mask);
+ log_operation(dpif, "recv_set_mask", error);
+ return error;
}
-\f
-struct dpifmon {
- struct dpif dpif;
- struct nl_sock *sock;
- int local_ifindex;
-};
+/* Retrieve the sFlow sampling probability. '*probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
int
-dpifmon_create(const char *datapath_name, struct dpifmon **monp)
+dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability)
{
- struct dpifmon *mon;
- char local_name[IFNAMSIZ];
- int error;
-
- mon = *monp = xmalloc(sizeof *mon);
-
- error = dpif_open(datapath_name, &mon->dpif);
- if (error) {
- goto error;
- }
- error = dpif_port_get_name(&mon->dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
+ int error = (dpif->dpif_class->get_sflow_probability
+ ? dpif->dpif_class->get_sflow_probability(dpif, probability)
+ : EOPNOTSUPP);
if (error) {
- goto error_close_dpif;
- }
-
- mon->local_ifindex = if_nametoindex(local_name);
- if (!mon->local_ifindex) {
- error = errno;
- VLOG_WARN("could not get ifindex of %s device: %s",
- local_name, strerror(errno));
- goto error_close_dpif;
+ *probability = 0;
}
+ log_operation(dpif, "get_sflow_probability", error);
+ return error;
+}
- error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &mon->sock);
- if (error) {
- VLOG_WARN("could not create rtnetlink socket: %s", strerror(error));
- goto error_close_dpif;
- }
-
- return 0;
-
-error_close_dpif:
- dpif_close(&mon->dpif);
-error:
- free(mon);
- *monp = NULL;
+/* Set the sFlow sampling probability. 'probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
+int
+dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
+{
+ int error = (dpif->dpif_class->set_sflow_probability
+ ? dpif->dpif_class->set_sflow_probability(dpif, probability)
+ : EOPNOTSUPP);
+ log_operation(dpif, "set_sflow_probability", error);
return error;
}
-void
-dpifmon_destroy(struct dpifmon *mon)
+/* Attempts to receive a message from 'dpif'. If successful, stores the
+ * message into '*packetp'. The message, if one is received, will begin with
+ * 'struct odp_msg' as a header, and will have at least DPIF_RECV_MSG_PADDING
+ * bytes of headroom. Only messages of the types selected with
+ * dpif_set_listen_mask() will ordinarily be received (but if a message type is
+ * enabled and then later disabled, some stragglers might pop up).
+ *
+ * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
+ * if no message is immediately available. */
+int
+dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
{
- if (mon) {
- dpif_close(&mon->dpif);
- nl_sock_destroy(mon->sock);
+ int error = dpif->dpif_class->recv(dpif, packetp);
+ if (!error) {
+ struct ofpbuf *buf = *packetp;
+
+ assert(ofpbuf_headroom(buf) >= DPIF_RECV_MSG_PADDING);
+ if (VLOG_IS_DBG_ENABLED()) {
+ struct odp_msg *msg = buf->data;
+ void *payload = msg + 1;
+ size_t payload_len = buf->size - sizeof *msg;
+ char *s = ofp_packet_to_string(payload, payload_len, payload_len);
+ VLOG_DBG_RL(&dpmsg_rl, "%s: received %s message of length "
+ "%zu on port %"PRIu16": %s", dpif_name(dpif),
+ (msg->type == _ODPL_MISS_NR ? "miss"
+ : msg->type == _ODPL_ACTION_NR ? "action"
+ : msg->type == _ODPL_SFLOW_NR ? "sFlow"
+ : "<unknown>"),
+ payload_len, msg->port, s);
+ free(s);
+ }
+ } else {
+ *packetp = NULL;
}
+ return error;
}
+/* Discards all messages that would otherwise be received by dpif_recv() on
+ * 'dpif'. Returns 0 if successful, otherwise a positive errno value. */
int
-dpifmon_poll(struct dpifmon *mon, char **devnamep)
+dpif_recv_purge(struct dpif *dpif)
{
- static struct vlog_rate_limit slow_rl = VLOG_RATE_LIMIT_INIT(1, 5);
- static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
- };
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- struct ofpbuf *buf;
+ struct odp_stats stats;
+ unsigned int i;
int error;
- *devnamep = NULL;
-again:
- error = nl_sock_recv(mon->sock, &buf, false);
- switch (error) {
- case 0:
- if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- VLOG_WARN_RL(&slow_rl, "received bad rtnl message");
- error = ENOBUFS;
- } else {
- const char *devname = nl_attr_get_string(attrs[IFLA_IFNAME]);
- bool for_us;
-
- if (attrs[IFLA_MASTER]) {
- uint32_t master_ifindex = nl_attr_get_u32(attrs[IFLA_MASTER]);
- for_us = master_ifindex == mon->local_ifindex;
- } else {
- /* It's for us if that device is one of our ports. This is
- * open-coded instead of using dpif_port_query_by_name() to
- * avoid logging a warning on failure. */
- struct odp_port port;
- memset(&port, 0, sizeof port);
- strncpy(port.devname, devname, sizeof port.devname);
- for_us = !ioctl(mon->dpif.fd, ODP_PORT_QUERY, &port);
- }
-
- if (!for_us) {
- /* Not for us, try again. */
- ofpbuf_delete(buf);
- COVERAGE_INC(dpifmon_poll_false_wakeup);
- goto again;
- }
- COVERAGE_INC(dpifmon_poll_changed);
- *devnamep = xstrdup(devname);
- }
- ofpbuf_delete(buf);
- break;
-
- case EAGAIN:
- /* Nothing to do. */
- break;
+ COVERAGE_INC(dpif_purge);
- case ENOBUFS:
- VLOG_WARN_RL(&slow_rl, "dpifmon socket overflowed");
- break;
+ error = dpif_get_dp_stats(dpif, &stats);
+ if (error) {
+ return error;
+ }
- default:
- VLOG_WARN_RL(&slow_rl, "error on dpifmon socket: %s", strerror(error));
- break;
+ for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) {
+ struct ofpbuf *buf;
+ error = dpif_recv(dpif, &buf);
+ if (error) {
+ return error == EAGAIN ? 0 : error;
+ }
+ ofpbuf_delete(buf);
}
- return error;
+ return 0;
}
+/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
+ * received with dpif_recv(). */
void
-dpifmon_run(struct dpifmon *mon UNUSED)
+dpif_recv_wait(struct dpif *dpif)
{
- /* Nothing to do in this implementation. */
+ dpif->dpif_class->recv_wait(dpif);
}
+/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
+ * and '*engine_id', respectively. */
void
-dpifmon_wait(struct dpifmon *mon)
+dpif_get_netflow_ids(const struct dpif *dpif,
+ uint8_t *engine_type, uint8_t *engine_id)
{
- nl_sock_wait(mon->sock, POLLIN);
+ *engine_type = dpif->netflow_engine_type;
+ *engine_id = dpif->netflow_engine_id;
}
-\f
-static int get_openvswitch_major(void);
-static int get_major(const char *target, int default_major);
-static int
-lookup_minor(const char *name, unsigned int *minor)
+/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority
+ * value for use in the ODPAT_SET_PRIORITY action. On success, returns 0 and
+ * stores the priority into '*priority'. On failure, returns a positive errno
+ * value and stores 0 into '*priority'. */
+int
+dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
+ uint32_t *priority)
{
- struct ethtool_drvinfo drvinfo;
- struct ifreq ifr;
- int error;
- int sock;
-
- *minor = -1;
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno));
- error = errno;
- goto error;
- }
-
- memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- ifr.ifr_data = (caddr_t) &drvinfo;
-
- memset(&drvinfo, 0, sizeof drvinfo);
- drvinfo.cmd = ETHTOOL_GDRVINFO;
- if (ioctl(sock, SIOCETHTOOL, &ifr)) {
- VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno));
- error = errno;
- goto error_close_sock;
- }
-
- if (strcmp(drvinfo.driver, "openvswitch")) {
- VLOG_WARN("%s is not an openvswitch device", name);
- error = EOPNOTSUPP;
- goto error_close_sock;
- }
-
- if (!isdigit(drvinfo.bus_info[0])) {
- VLOG_WARN("%s ethtool info does not contain an openvswitch minor",
- name);
- error = EPROTOTYPE;
- goto error_close_sock;
+ int error = (dpif->dpif_class->queue_to_priority
+ ? dpif->dpif_class->queue_to_priority(dpif, queue_id,
+ priority)
+ : EOPNOTSUPP);
+ if (error) {
+ *priority = 0;
}
-
- *minor = atoi(drvinfo.bus_info);
- close(sock);
- return 0;
-
-error_close_sock:
- close(sock);
-error:
+ log_operation(dpif, "queue_to_priority", error);
return error;
}
+\f
+void
+dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
+ const char *name,
+ uint8_t netflow_engine_type, uint8_t netflow_engine_id)
+{
+ dpif->dpif_class = dpif_class;
+ dpif->base_name = xstrdup(name);
+ dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
+ dpif->netflow_engine_type = netflow_engine_type;
+ dpif->netflow_engine_id = netflow_engine_id;
+}
-static int
-make_openvswitch_device(unsigned int minor, char **fnp)
+/* Undoes the results of initialization.
+ *
+ * Normally this function only needs to be called from dpif_close().
+ * However, it may be called by providers due to an error on opening
+ * that occurs after initialization. It this case dpif_close() would
+ * never be called. */
+void
+dpif_uninit(struct dpif *dpif, bool close)
{
- dev_t dev = makedev(get_openvswitch_major(), minor);
- const char dirname[] = "/dev/net";
- struct stat s;
- char fn[128];
-
- *fnp = NULL;
- sprintf(fn, "%s/dp%d", dirname, minor);
- if (!stat(fn, &s)) {
- if (!S_ISCHR(s.st_mode)) {
- VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
- fn);
- } else if (s.st_rdev != dev) {
- VLOG_WARN_RL(&error_rl,
- "%s is device %u:%u instead of %u:%u, fixing",
- fn, major(s.st_rdev), minor(s.st_rdev),
- major(dev), minor(dev));
- } else {
- goto success;
- }
- if (unlink(fn)) {
- VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
- fn, strerror(errno));
- return errno;
- }
- } else if (errno == ENOENT) {
- if (stat(dirname, &s)) {
- if (errno == ENOENT) {
- if (mkdir(dirname, 0755)) {
- VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
- dirname, strerror(errno));
- return errno;
- }
- } else {
- VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
- dirname, strerror(errno));
- return errno;
- }
- }
- } else {
- VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
- return errno;
- }
+ char *base_name = dpif->base_name;
+ char *full_name = dpif->full_name;
- /* The device needs to be created. */
- if (mknod(fn, S_IFCHR | 0700, dev)) {
- VLOG_WARN_RL(&error_rl,
- "%s: creating character device %u:%u failed (%s)",
- fn, major(dev), minor(dev), strerror(errno));
- return errno;
+ if (close) {
+ dpif->dpif_class->close(dpif);
}
-success:
- *fnp = xstrdup(fn);
- return 0;
+ free(base_name);
+ free(full_name);
}
-
-
-static int
-get_openvswitch_major(void)
+\f
+static void
+log_operation(const struct dpif *dpif, const char *operation, int error)
{
- static unsigned int openvswitch_major;
- if (!openvswitch_major) {
- enum { DEFAULT_MAJOR = 248 };
- openvswitch_major = get_major("openvswitch", DEFAULT_MAJOR);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation);
+ } else if (is_errno(error)) {
+ VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
+ dpif_name(dpif), operation, strerror(error));
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: %s failed (%d/%d)",
+ dpif_name(dpif), operation,
+ get_ofp_err_type(error), get_ofp_err_code(error));
}
- return openvswitch_major;
}
-static int
-get_major(const char *target, int default_major)
+static enum vlog_level
+flow_message_log_level(int error)
{
- const char fn[] = "/proc/devices";
- char line[128];
- FILE *file;
- int ln;
-
- file = fopen(fn, "r");
- if (!file) {
- VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
- goto error;
- }
-
- for (ln = 1; fgets(line, sizeof line, file); ln++) {
- char name[64];
- int major;
-
- if (!strncmp(line, "Character", 9) || line[0] == '\0') {
- /* Nothing to do. */
- } else if (!strncmp(line, "Block", 5)) {
- /* We only want character devices, so skip the rest of the file. */
- break;
- } else if (sscanf(line, "%d %63s", &major, name)) {
- if (!strcmp(name, target)) {
- fclose(file);
- return major;
- }
- } else {
- static bool warned;
- if (!warned) {
- VLOG_WARN("%s:%d: syntax error", fn, ln);
- }
- warned = true;
- }
- }
+ return error ? VLL_WARN : VLL_DBG;
+}
- VLOG_ERR("%s: %s major not found (is the module loaded?), using "
- "default major %d", fn, target, default_major);
-error:
- VLOG_INFO("using default major %d for %s", default_major, target);
- return default_major;
+static bool
+should_log_flow_message(int error)
+{
+ return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
+ error ? &error_rl : &dpmsg_rl);
}
-static int
-name_to_minor(const char *name, unsigned int *minor)
+static void
+log_flow_message(const struct dpif *dpif, int error, const char *operation,
+ const struct odp_flow_key *flow,
+ const struct odp_flow_stats *stats,
+ const struct nlattr *actions, size_t actions_len)
{
- if (!get_minor_from_name(name, minor)) {
- return 0;
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ ds_put_format(&ds, "%s: ", dpif_name(dpif));
+ if (error) {
+ ds_put_cstr(&ds, "failed to ");
}
- return lookup_minor(name, minor);
+ ds_put_format(&ds, "%s ", operation);
+ if (error) {
+ ds_put_format(&ds, "(%s) ", strerror(error));
+ }
+ format_odp_flow_key(&ds, flow);
+ if (stats) {
+ ds_put_cstr(&ds, ", ");
+ format_odp_flow_stats(&ds, stats);
+ }
+ if (actions || actions_len) {
+ ds_put_cstr(&ds, ", actions:");
+ format_odp_actions(&ds, actions, actions_len);
+ }
+ vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
+ ds_destroy(&ds);
}
-static int
-get_minor_from_name(const char *name, unsigned int *minor)
+static void
+log_flow_operation(const struct dpif *dpif, const char *operation, int error,
+ struct odp_flow *flow)
{
- if (!strncmp(name, "dp", 2) && isdigit(name[2])) {
- *minor = atoi(name + 2);
- return 0;
- } else if (!strncmp(name, "nl:", 3) && isdigit(name[3])) {
- /* This is for compatibility only and will be dropped. */
- *minor = atoi(name + 3);
- return 0;
- } else {
- return EINVAL;
+ if (error) {
+ flow->actions_len = 0;
}
+ log_flow_message(dpif, error, operation, &flow->key,
+ !error ? &flow->stats : NULL,
+ flow->actions, flow->actions_len);
}
-static int
-open_by_minor(unsigned int minor, struct dpif *dpif)
+static void
+log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
{
- int error;
- char *fn;
- int fd;
+ enum { ODPPF_ALL = ODPPF_CREATE | ODPPF_MODIFY | ODPPF_ZERO_STATS };
+ struct ds s;
- dpif->minor = -1;
- dpif->fd = -1;
- error = make_openvswitch_device(minor, &fn);
- if (error) {
- return error;
+ ds_init(&s);
+ ds_put_cstr(&s, "put");
+ if (put->flags & ODPPF_CREATE) {
+ ds_put_cstr(&s, "[create]");
}
-
- fd = open(fn, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- error = errno;
- VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
- free(fn);
- return error;
+ if (put->flags & ODPPF_MODIFY) {
+ ds_put_cstr(&s, "[modify]");
}
-
- free(fn);
- dpif->minor = minor;
- dpif->fd = fd;
- return 0;
+ if (put->flags & ODPPF_ZERO_STATS) {
+ ds_put_cstr(&s, "[zero]");
+ }
+ if (put->flags & ~ODPPF_ALL) {
+ ds_put_format(&s, "[%x]", put->flags & ~ODPPF_ALL);
+ }
+ log_flow_message(dpif, error, ds_cstr(&s), &put->flow.key,
+ !error ? &put->flow.stats : NULL,
+ put->flow.actions, put->flow.actions_len);
+ ds_destroy(&s);
}
-\f
+
/* There is a tendency to construct odp_flow objects on the stack and to
- * forget to properly initialize their "actions" and "n_actions" members.
+ * forget to properly initialize their "actions" and "actions_len" members.
* When this happens, we get memory corruption because the kernel
* writes through the random pointer that is in the "actions" member.
*
* easy-to-identify error later if it is dereferenced, etc.
*
* - Triggering a warning on uninitialized memory from Valgrind if
- * "actions" or "n_actions" was not initialized.
+ * "actions" or "actions_len" was not initialized.
*/
static void
check_rw_odp_flow(struct odp_flow *flow)
{
- if (flow->n_actions) {
+ if (flow->actions_len) {
memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]);
}
}