/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include <config.h>
-#include "dpif.h"
+#include "dpif-provider.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
-#include <fcntl.h>
#include <inttypes.h>
-#include <net/if.h>
-#include <linux/rtnetlink.h>
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-#include <unistd.h>
#include "coverage.h"
#include "dynamic-string.h"
#include "flow.h"
+#include "netdev.h"
#include "netlink.h"
#include "odp-util.h"
#include "ofp-print.h"
+#include "ofp-util.h"
#include "ofpbuf.h"
#include "packets.h"
#include "poll-loop.h"
+#include "shash.h"
+#include "svec.h"
+#include "timeval.h"
#include "util.h"
#include "valgrind.h"
-
#include "vlog.h"
-#define THIS_MODULE VLM_dpif
-/* A datapath interface. */
-struct dpif {
- char *name;
- unsigned int minor;
- int fd;
+VLOG_DEFINE_THIS_MODULE(dpif);
+
+COVERAGE_DEFINE(dpif_destroy);
+COVERAGE_DEFINE(dpif_port_add);
+COVERAGE_DEFINE(dpif_port_del);
+COVERAGE_DEFINE(dpif_flow_flush);
+COVERAGE_DEFINE(dpif_flow_get);
+COVERAGE_DEFINE(dpif_flow_put);
+COVERAGE_DEFINE(dpif_flow_del);
+COVERAGE_DEFINE(dpif_flow_query_list);
+COVERAGE_DEFINE(dpif_flow_query_list_n);
+COVERAGE_DEFINE(dpif_execute);
+COVERAGE_DEFINE(dpif_purge);
+
+static const struct dpif_class *base_dpif_classes[] = {
+#ifdef HAVE_NETLINK
+ &dpif_linux_class,
+#endif
+ &dpif_netdev_class,
+};
+
+struct registered_dpif_class {
+ const struct dpif_class *dpif_class;
+ int refcount;
};
+static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
/* Rate limit for individual messages going to or from the datapath, output at
* DBG level. This is very high because, if these are enabled, it is because
static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
/* Not really much point in logging many dpif errors. */
-static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
-static int get_minor_from_name(const char *name, unsigned int *minor);
-static int name_to_minor(const char *name, unsigned int *minor);
-static int lookup_minor(const char *name, unsigned int *minor);
-static int open_by_minor(unsigned int minor, struct dpif **dpifp);
-static int make_openvswitch_device(unsigned int minor, char **fnp);
-static void check_rw_odp_flow(struct odp_flow *);
+static void log_flow_message(const struct dpif *dpif, int error,
+ const char *operation,
+ const struct nlattr *key, size_t key_len,
+ const struct dpif_flow_stats *stats,
+ const struct nlattr *actions, size_t actions_len);
+static void log_operation(const struct dpif *, const char *operation,
+ int error);
+static bool should_log_flow_message(int error);
-int
-dpif_open(const char *name, struct dpif **dpifp)
+static void
+dp_initialize(void)
{
- struct dpif *dpif;
- unsigned int minor;
- int listen_mask;
- int error;
+ static int status = -1;
- *dpifp = NULL;
+ if (status < 0) {
+ int i;
- error = name_to_minor(name, &minor);
- if (error) {
- return error;
+ status = 0;
+ for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
+ dp_register_provider(base_dpif_classes[i]);
+ }
}
+}
- error = open_by_minor(minor, &dpif);
- if (error) {
- return error;
+/* Performs periodic work needed by all the various kinds of dpifs.
+ *
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_run() within its main poll loop. */
+void
+dp_run(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ if (registered_class->dpif_class->run) {
+ registered_class->dpif_class->run();
+ }
}
+}
- /* We can open the device, but that doesn't mean that it's been created.
- * If it hasn't been, then any command other than ODP_DP_CREATE will
- * return ENODEV. Try something innocuous. */
- listen_mask = 0; /* Make Valgrind happy. */
- if (ioctl(dpif->fd, ODP_GET_LISTEN_MASK, &listen_mask)) {
- error = errno;
- if (error != ENODEV) {
- VLOG_WARN("%s: probe returned unexpected error: %s",
- dpif_name(dpif), strerror(error));
+/* Arranges for poll_block() to wake up when dp_run() needs to be called.
+ *
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_wait() within its main poll loop. */
+void
+dp_wait(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ if (registered_class->dpif_class->wait) {
+ registered_class->dpif_class->wait();
}
- dpif_close(dpif);
- return error;
}
- *dpifp = dpif;
+}
+
+/* Registers a new datapath provider. After successful registration, new
+ * datapaths of that type can be opened using dpif_open(). */
+int
+dp_register_provider(const struct dpif_class *new_class)
+{
+ struct registered_dpif_class *registered_class;
+
+ if (shash_find(&dpif_classes, new_class->type)) {
+ VLOG_WARN("attempted to register duplicate datapath provider: %s",
+ new_class->type);
+ return EEXIST;
+ }
+
+ registered_class = xmalloc(sizeof *registered_class);
+ registered_class->dpif_class = new_class;
+ registered_class->refcount = 0;
+
+ shash_add(&dpif_classes, new_class->type, registered_class);
+
return 0;
}
-void
-dpif_close(struct dpif *dpif)
+/* Unregisters a datapath provider. 'type' must have been previously
+ * registered and not currently be in use by any dpifs. After unregistration
+ * new datapaths of that type cannot be opened using dpif_open(). */
+int
+dp_unregister_provider(const char *type)
{
- if (dpif) {
- free(dpif->name);
- close(dpif->fd);
- free(dpif);
+ struct shash_node *node;
+ struct registered_dpif_class *registered_class;
+
+ node = shash_find(&dpif_classes, type);
+ if (!node) {
+ VLOG_WARN("attempted to unregister a datapath provider that is not "
+ "registered: %s", type);
+ return EAFNOSUPPORT;
+ }
+
+ registered_class = node->data;
+ if (registered_class->refcount) {
+ VLOG_WARN("attempted to unregister in use datapath provider: %s", type);
+ return EBUSY;
}
+
+ shash_delete(&dpif_classes, node);
+ free(registered_class);
+
+ return 0;
}
-static int
-do_ioctl(const struct dpif *dpif, int cmd, const char *cmd_name,
- const void *arg)
+/* Clears 'types' and enumerates the types of all currently registered datapath
+ * providers into it. The caller must first initialize the svec. */
+void
+dp_enumerate_types(struct svec *types)
{
- int error = ioctl(dpif->fd, cmd, arg) ? errno : 0;
- if (cmd_name) {
- if (error) {
- VLOG_WARN_RL(&error_rl, "%s: ioctl(%s) failed (%s)",
- dpif_name(dpif), cmd_name, strerror(error));
- } else {
- VLOG_DBG_RL(&dpmsg_rl, "%s: ioctl(%s): success",
- dpif_name(dpif), cmd_name);
- }
+ struct shash_node *node;
+
+ dp_initialize();
+ svec_clear(types);
+
+ SHASH_FOR_EACH(node, &dpif_classes) {
+ const struct registered_dpif_class *registered_class = node->data;
+ svec_add(types, registered_class->dpif_class->type);
}
- return error;
}
+/* Clears 'names' and enumerates the names of all known created datapaths with
+ * the given 'type'. The caller must first initialize the svec. Returns 0 if
+ * successful, otherwise a positive errno value.
+ *
+ * Some kinds of datapaths might not be practically enumerable. This is not
+ * considered an error. */
int
-dpif_create(const char *name, struct dpif **dpifp)
+dp_enumerate_names(const char *type, struct svec *names)
{
- unsigned int minor;
+ const struct registered_dpif_class *registered_class;
+ const struct dpif_class *dpif_class;
int error;
- *dpifp = NULL;
- if (!get_minor_from_name(name, &minor)) {
- /* Minor was specified in 'name', go ahead and create it. */
- struct dpif *dpif;
+ dp_initialize();
+ svec_clear(names);
- error = open_by_minor(minor, &dpif);
- if (error) {
- return error;
- }
+ registered_class = shash_find_data(&dpif_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not enumerate unknown type: %s", type);
+ return EAFNOSUPPORT;
+ }
- error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
- if (!error) {
- *dpifp = dpif;
- } else {
- dpif_close(dpif);
- }
- return error;
+ dpif_class = registered_class->dpif_class;
+ error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0;
+
+ if (error) {
+ VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type,
+ strerror(error));
+ }
+
+ return error;
+}
+
+/* Parses 'datapath name', which is of the form type@name into its
+ * component pieces. 'name' and 'type' must be freed by the caller. */
+void
+dp_parse_name(const char *datapath_name_, char **name, char **type)
+{
+ char *datapath_name = xstrdup(datapath_name_);
+ char *separator;
+
+ separator = strchr(datapath_name, '@');
+ if (separator) {
+ *separator = '\0';
+ *type = datapath_name;
+ *name = xstrdup(separator + 1);
} else {
- for (minor = 0; minor < ODP_MAX; minor++) {
- struct dpif *dpif;
-
- error = open_by_minor(minor, &dpif);
- if (error) {
- return error;
- }
-
- error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
- if (!error) {
- *dpifp = dpif;
- return 0;
- }
- dpif_close(dpif);
- if (error != EBUSY) {
- return error;
- }
- }
- return ENOBUFS;
+ *name = datapath_name;
+ *type = NULL;
}
}
-const char *
-dpif_name(const struct dpif *dpif)
+static int
+do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
{
- return dpif->name;
+ struct dpif *dpif = NULL;
+ int error;
+ struct registered_dpif_class *registered_class;
+
+ dp_initialize();
+
+ if (!type || *type == '\0') {
+ type = "system";
+ }
+
+ registered_class = shash_find_data(&dpif_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not create datapath %s of unknown type %s", name,
+ type);
+ error = EAFNOSUPPORT;
+ goto exit;
+ }
+
+ error = registered_class->dpif_class->open(registered_class->dpif_class,
+ name, create, &dpif);
+ if (!error) {
+ assert(dpif->dpif_class == registered_class->dpif_class);
+ registered_class->refcount++;
+ }
+
+exit:
+ *dpifp = error ? NULL : dpif;
+ return error;
}
+/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
+ * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
+ * the empty string to specify the default system type. Returns 0 if
+ * successful, otherwise a positive errno value. On success stores a pointer
+ * to the datapath in '*dpifp', otherwise a null pointer. */
int
-dpif_delete(struct dpif *dpif)
+dpif_open(const char *name, const char *type, struct dpif **dpifp)
{
- COVERAGE_INC(dpif_destroy);
- return do_ioctl(dpif, ODP_DP_DESTROY, "ODP_DP_DESTROY", NULL);
+ return do_open(name, type, false, dpifp);
}
+/* Tries to create and open a new datapath with the given 'name' and 'type'.
+ * 'type' may be either NULL or the empty string to specify the default system
+ * type. Will fail if a datapath with 'name' and 'type' already exists.
+ * Returns 0 if successful, otherwise a positive errno value. On success
+ * stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */
int
-dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
+dpif_create(const char *name, const char *type, struct dpif **dpifp)
{
- memset(stats, 0, sizeof *stats);
- return do_ioctl(dpif, ODP_DP_STATS, "ODP_DP_STATS", stats);
+ return do_open(name, type, true, dpifp);
}
+/* Tries to open a datapath with the given 'name' and 'type', creating it if it
+ * does not exist. 'type' may be either NULL or the empty string to specify
+ * the default system type. Returns 0 if successful, otherwise a positive
+ * errno value. On success stores a pointer to the datapath in '*dpifp',
+ * otherwise a null pointer. */
int
-dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
+dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
{
- int tmp;
- int error = do_ioctl(dpif, ODP_GET_DROP_FRAGS, "ODP_GET_DROP_FRAGS", &tmp);
- *drop_frags = error ? tmp & 1 : false;
+ int error;
+
+ error = dpif_create(name, type, dpifp);
+ if (error == EEXIST || error == EBUSY) {
+ error = dpif_open(name, type, dpifp);
+ if (error) {
+ VLOG_WARN("datapath %s already exists but cannot be opened: %s",
+ name, strerror(error));
+ }
+ } else if (error) {
+ VLOG_WARN("failed to create datapath %s: %s", name, strerror(error));
+ }
return error;
}
+/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
+ * itself; call dpif_delete() first, instead, if that is desirable. */
+void
+dpif_close(struct dpif *dpif)
+{
+ if (dpif) {
+ struct registered_dpif_class *registered_class;
+
+ registered_class = shash_find_data(&dpif_classes,
+ dpif->dpif_class->type);
+ assert(registered_class);
+ assert(registered_class->refcount);
+
+ registered_class->refcount--;
+ dpif_uninit(dpif, true);
+ }
+}
+
+/* Returns the name of datapath 'dpif' prefixed with the type
+ * (for use in log messages). */
+const char *
+dpif_name(const struct dpif *dpif)
+{
+ return dpif->full_name;
+}
+
+/* Returns the name of datapath 'dpif' without the type
+ * (for use in device names). */
+const char *
+dpif_base_name(const struct dpif *dpif)
+{
+ return dpif->base_name;
+}
+
+/* Enumerates all names that may be used to open 'dpif' into 'all_names'. The
+ * Linux datapath, for example, supports opening a datapath both by number,
+ * e.g. "dp0", and by the name of the datapath's local port. For some
+ * datapaths, this might be an infinite set (e.g. in a file name, slashes may
+ * be duplicated any number of times), in which case only the names most likely
+ * to be used will be enumerated.
+ *
+ * The caller must already have initialized 'all_names'. Any existing names in
+ * 'all_names' will not be disturbed. */
int
-dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
+dpif_get_all_names(const struct dpif *dpif, struct svec *all_names)
{
- int tmp = drop_frags;
- return do_ioctl(dpif, ODP_SET_DROP_FRAGS, "ODP_SET_DROP_FRAGS", &tmp);
+ if (dpif->dpif_class->get_all_names) {
+ int error = dpif->dpif_class->get_all_names(dpif, all_names);
+ if (error) {
+ VLOG_WARN_RL(&error_rl,
+ "failed to retrieve names for datpath %s: %s",
+ dpif_name(dpif), strerror(error));
+ }
+ return error;
+ } else {
+ svec_add(all_names, dpif_base_name(dpif));
+ return 0;
+ }
}
+
+/* Destroys the datapath that 'dpif' is connected to, first removing all of its
+ * ports. After calling this function, it does not make sense to pass 'dpif'
+ * to any functions other than dpif_name() or dpif_close(). */
int
-dpif_recv_purge(struct dpif *dpif)
+dpif_delete(struct dpif *dpif)
{
- struct odp_stats stats;
- unsigned int i;
int error;
- COVERAGE_INC(dpif_purge);
+ COVERAGE_INC(dpif_destroy);
- error = dpif_get_dp_stats(dpif, &stats);
+ error = dpif->dpif_class->destroy(dpif);
+ log_operation(dpif, "delete", error);
+ return error;
+}
+
+/* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful,
+ * otherwise a positive errno value. */
+int
+dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
+{
+ int error = dpif->dpif_class->get_stats(dpif, stats);
if (error) {
- return error;
+ memset(stats, 0, sizeof *stats);
}
+ log_operation(dpif, "get_stats", error);
+ return error;
+}
- for (i = 0; i < stats.max_miss_queue + stats.max_action_queue; i++) {
- struct ofpbuf *buf;
- error = dpif_recv(dpif, &buf);
- if (error) {
- return error == EAGAIN ? 0 : error;
- }
- ofpbuf_delete(buf);
+/* Retrieves the current IP fragment handling policy for 'dpif' into
+ * '*drop_frags': true indicates that fragments are dropped, false indicates
+ * that fragments are treated in the same way as other IP packets (except that
+ * the L4 header cannot be read). Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
+{
+ int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
+ if (error) {
+ *drop_frags = false;
}
- return 0;
+ log_operation(dpif, "get_drop_frags", error);
+ return error;
+}
+
+/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is
+ * the same as for the get_drop_frags member function. Returns 0 if
+ * successful, otherwise a positive errno value. */
+int
+dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
+{
+ int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
+ log_operation(dpif, "set_drop_frags", error);
+ return error;
}
+/* Attempts to add 'netdev' as a port on 'dpif'. If successful, returns 0 and
+ * sets '*port_nop' to the new port's port number (if 'port_nop' is non-null).
+ * On failure, returns a positive errno value and sets '*port_nop' to
+ * UINT16_MAX (if 'port_nop' is non-null). */
int
-dpif_port_add(struct dpif *dpif, const char *devname, uint16_t flags,
- uint16_t *port_nop)
+dpif_port_add(struct dpif *dpif, struct netdev *netdev, uint16_t *port_nop)
{
- struct odp_port port;
+ const char *netdev_name = netdev_get_name(netdev);
uint16_t port_no;
int error;
COVERAGE_INC(dpif_port_add);
- memset(&port, 0, sizeof port);
- strncpy(port.devname, devname, sizeof port.devname);
- port.flags = flags;
-
- error = do_ioctl(dpif, ODP_PORT_ADD, NULL, &port);
+ error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
if (!error) {
- port_no = port.port;
VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16,
- dpif_name(dpif), devname, port_no);
+ dpif_name(dpif), netdev_name, port_no);
} else {
- port_no = UINT16_MAX;
VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
- dpif_name(dpif), devname, strerror(errno));
+ dpif_name(dpif), netdev_name, strerror(error));
+ port_no = UINT16_MAX;
}
if (port_nop) {
*port_nop = port_no;
return error;
}
+/* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful,
+ * otherwise a positive errno value. */
int
dpif_port_del(struct dpif *dpif, uint16_t port_no)
{
- int tmp = port_no;
+ int error;
+
COVERAGE_INC(dpif_port_del);
- return do_ioctl(dpif, ODP_PORT_DEL, "ODP_PORT_DEL", &tmp);
+
+ error = dpif->dpif_class->port_del(dpif, port_no);
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: port_del(%"PRIu16")",
+ dpif_name(dpif), port_no);
+ } else {
+ log_operation(dpif, "port_del", error);
+ }
+ return error;
}
+/* Makes a deep copy of 'src' into 'dst'. */
+void
+dpif_port_clone(struct dpif_port *dst, const struct dpif_port *src)
+{
+ dst->name = xstrdup(src->name);
+ dst->type = xstrdup(src->type);
+ dst->port_no = src->port_no;
+}
+
+/* Frees memory allocated to members of 'dpif_port'.
+ *
+ * Do not call this function on a dpif_port obtained from
+ * dpif_port_dump_next(): that function retains ownership of the data in the
+ * dpif_port. */
+void
+dpif_port_destroy(struct dpif_port *dpif_port)
+{
+ free(dpif_port->name);
+ free(dpif_port->type);
+}
+
+/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and
+ * initializes '*port' appropriately; on failure, returns a positive errno
+ * value.
+ *
+ * The caller owns the data in 'port' and must free it with
+ * dpif_port_destroy() when it is no longer needed. */
int
dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
- struct odp_port *port)
+ struct dpif_port *port)
{
- memset(port, 0, sizeof *port);
- port->port = port_no;
- if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) {
+ int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
+ if (!error) {
VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu16" is device %s",
- dpif_name(dpif), port_no, port->devname);
- return 0;
+ dpif_name(dpif), port_no, port->name);
} else {
+ memset(port, 0, sizeof *port);
VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s",
- dpif_name(dpif), port_no, strerror(errno));
- return errno;
+ dpif_name(dpif), port_no, strerror(error));
}
+ return error;
}
+/* Looks up port named 'devname' in 'dpif'. On success, returns 0 and
+ * initializes '*port' appropriately; on failure, returns a positive errno
+ * value.
+ *
+ * The caller owns the data in 'port' and must free it with
+ * dpif_port_destroy() when it is no longer needed. */
int
dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
- struct odp_port *port)
+ struct dpif_port *port)
{
- memset(port, 0, sizeof *port);
- strncpy(port->devname, devname, sizeof port->devname);
- if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) {
+ int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
+ if (!error) {
VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu16,
- dpif_name(dpif), devname, port->port);
- return 0;
+ dpif_name(dpif), devname, port->port_no);
} else {
+ memset(port, 0, sizeof *port);
+
/* Log level is DBG here because all the current callers are interested
* in whether 'dpif' actually has a port 'devname', so that it's not an
* issue worth logging if it doesn't. */
VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s",
- dpif_name(dpif), devname, strerror(errno));
- return errno;
+ dpif_name(dpif), devname, strerror(error));
}
+ return error;
+}
+
+/* Returns one greater than the maximum port number accepted in flow
+ * actions. */
+int
+dpif_get_max_ports(const struct dpif *dpif)
+{
+ return dpif->dpif_class->get_max_ports(dpif);
}
+/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies
+ * the port's name into the 'name_size' bytes in 'name', ensuring that the
+ * result is null-terminated. On failure, returns a positive errno value and
+ * makes 'name' the empty string. */
int
dpif_port_get_name(struct dpif *dpif, uint16_t port_no,
char *name, size_t name_size)
{
- struct odp_port port;
+ struct dpif_port port;
int error;
assert(name_size > 0);
error = dpif_port_query_by_number(dpif, port_no, &port);
if (!error) {
- ovs_strlcpy(name, port.devname, name_size);
+ ovs_strlcpy(name, port.name, name_size);
+ dpif_port_destroy(&port);
} else {
*name = '\0';
}
return error;
}
-int
-dpif_port_list(const struct dpif *dpif,
- struct odp_port **ports, size_t *n_ports)
+/* Initializes 'dump' to begin dumping the ports in a dpif.
+ *
+ * This function provides no status indication. An error status for the entire
+ * dump operation is provided when it is completed by calling
+ * dpif_port_dump_done().
+ */
+void
+dpif_port_dump_start(struct dpif_port_dump *dump, const struct dpif *dpif)
{
- struct odp_portvec pv;
- struct odp_stats stats;
- int error;
+ dump->dpif = dpif;
+ dump->error = dpif->dpif_class->port_dump_start(dpif, &dump->state);
+ log_operation(dpif, "port_dump_start", dump->error);
+}
- do {
- error = dpif_get_dp_stats(dpif, &stats);
- if (error) {
- goto error;
- }
+/* Attempts to retrieve another port from 'dump', which must have been
+ * initialized with dpif_port_dump_start(). On success, stores a new dpif_port
+ * into 'port' and returns true. On failure, returns false.
+ *
+ * Failure might indicate an actual error or merely that the last port has been
+ * dumped. An error status for the entire dump operation is provided when it
+ * is completed by calling dpif_port_dump_done().
+ *
+ * The dpif owns the data stored in 'port'. It will remain valid until at
+ * least the next time 'dump' is passed to dpif_port_dump_next() or
+ * dpif_port_dump_done(). */
+bool
+dpif_port_dump_next(struct dpif_port_dump *dump, struct dpif_port *port)
+{
+ const struct dpif *dpif = dump->dpif;
- *ports = xcalloc(1, stats.n_ports * sizeof **ports);
- pv.ports = *ports;
- pv.n_ports = stats.n_ports;
- error = do_ioctl(dpif, ODP_PORT_LIST, "ODP_PORT_LIST", &pv);
- if (error) {
- free(*ports);
- goto error;
- }
- } while (pv.n_ports != stats.n_ports);
- *n_ports = pv.n_ports;
- return 0;
+ if (dump->error) {
+ return false;
+ }
-error:
- *ports = NULL;
- *n_ports = 0;
- return error;
+ dump->error = dpif->dpif_class->port_dump_next(dpif, dump->state, port);
+ if (dump->error == EOF) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all ports", dpif_name(dpif));
+ } else {
+ log_operation(dpif, "port_dump_next", dump->error);
+ }
+
+ if (dump->error) {
+ dpif->dpif_class->port_dump_done(dpif, dump->state);
+ return false;
+ }
+ return true;
}
+/* Completes port table dump operation 'dump', which must have been initialized
+ * with dpif_port_dump_start(). Returns 0 if the dump operation was
+ * error-free, otherwise a positive errno value describing the problem. */
int
-dpif_port_group_set(struct dpif *dpif, uint16_t group,
- const uint16_t ports[], size_t n_ports)
+dpif_port_dump_done(struct dpif_port_dump *dump)
{
- struct odp_port_group pg;
-
- COVERAGE_INC(dpif_port_group_set);
- assert(n_ports <= UINT16_MAX);
- pg.group = group;
- pg.ports = (uint16_t *) ports;
- pg.n_ports = n_ports;
- return do_ioctl(dpif, ODP_PORT_GROUP_SET, "ODP_PORT_GROUP_SET", &pg);
+ const struct dpif *dpif = dump->dpif;
+ if (!dump->error) {
+ dump->error = dpif->dpif_class->port_dump_done(dpif, dump->state);
+ log_operation(dpif, "port_dump_done", dump->error);
+ }
+ return dump->error == EOF ? 0 : dump->error;
}
+/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
+ * 'dpif' has changed, this function does one of the following:
+ *
+ * - Stores the name of the device that was added to or deleted from 'dpif' in
+ * '*devnamep' and returns 0. The caller is responsible for freeing
+ * '*devnamep' (with free()) when it no longer needs it.
+ *
+ * - Returns ENOBUFS and sets '*devnamep' to NULL.
+ *
+ * This function may also return 'false positives', where it returns 0 and
+ * '*devnamep' names a device that was not actually added or deleted or it
+ * returns ENOBUFS without any change.
+ *
+ * Returns EAGAIN if the set of ports in 'dpif' has not changed. May also
+ * return other positive errno values to indicate that something has gone
+ * wrong. */
int
-dpif_port_group_get(const struct dpif *dpif, uint16_t group,
- uint16_t **ports, size_t *n_ports)
+dpif_port_poll(const struct dpif *dpif, char **devnamep)
{
- int error;
-
- *ports = NULL;
- *n_ports = 0;
- for (;;) {
- struct odp_port_group pg;
- pg.group = group;
- pg.ports = *ports;
- pg.n_ports = *n_ports;
-
- error = do_ioctl(dpif, ODP_PORT_GROUP_GET, "ODP_PORT_GROUP_GET", &pg);
- if (error) {
- /* Hard error. */
- free(*ports);
- *ports = NULL;
- *n_ports = 0;
- break;
- } else if (pg.n_ports <= *n_ports) {
- /* Success. */
- *n_ports = pg.n_ports;
- break;
- } else {
- /* Soft error: there were more ports than we expected in the
- * group. Try again. */
- free(*ports);
- *ports = xcalloc(pg.n_ports, sizeof **ports);
- *n_ports = pg.n_ports;
- }
+ int error = dpif->dpif_class->port_poll(dpif, devnamep);
+ if (error) {
+ *devnamep = NULL;
}
return error;
}
-int
-dpif_flow_flush(struct dpif *dpif)
+/* Arranges for the poll loop to wake up when port_poll(dpif) will return a
+ * value other than EAGAIN. */
+void
+dpif_port_poll_wait(const struct dpif *dpif)
{
- COVERAGE_INC(dpif_flow_flush);
- return do_ioctl(dpif, ODP_FLOW_FLUSH, "ODP_FLOW_FLUSH", NULL);
+ dpif->dpif_class->port_poll_wait(dpif);
}
-static enum vlog_level
-flow_message_log_level(int error)
+/* Appends a human-readable representation of 'stats' to 's'. */
+void
+dpif_flow_stats_format(const struct dpif_flow_stats *stats, struct ds *s)
{
- return error ? VLL_WARN : VLL_DBG;
+ ds_put_format(s, "packets:%"PRIu64", bytes:%"PRIu64", used:",
+ stats->n_packets, stats->n_bytes);
+ if (stats->used) {
+ ds_put_format(s, "%.3fs", (time_msec() - stats->used) / 1000.0);
+ } else {
+ ds_put_format(s, "never");
+ }
+ /* XXX tcp_flags? */
}
-static bool
-should_log_flow_message(int error)
+/* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+dpif_flow_flush(struct dpif *dpif)
{
- return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
- error ? &error_rl : &dpmsg_rl);
-}
+ int error;
-static void
-log_flow_message(const struct dpif *dpif, int error,
- const char *operation,
- const flow_t *flow, const struct odp_flow_stats *stats,
- const union odp_action *actions, size_t n_actions)
-{
- struct ds ds = DS_EMPTY_INITIALIZER;
- ds_put_format(&ds, "%s: ", dpif_name(dpif));
- if (error) {
- ds_put_cstr(&ds, "failed to ");
- }
- ds_put_format(&ds, "%s ", operation);
- if (error) {
- ds_put_format(&ds, "(%s) ", strerror(error));
- }
- flow_format(&ds, flow);
- if (stats) {
- ds_put_cstr(&ds, ", ");
- format_odp_flow_stats(&ds, stats);
- }
- if (actions || n_actions) {
- ds_put_cstr(&ds, ", actions:");
- format_odp_actions(&ds, actions, n_actions);
- }
- vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
- ds_destroy(&ds);
+ COVERAGE_INC(dpif_flow_flush);
+
+ error = dpif->dpif_class->flow_flush(dpif);
+ log_operation(dpif, "flow_flush", error);
+ return error;
}
-static int
-do_flow_ioctl(const struct dpif *dpif, int cmd, struct odp_flow *flow,
- const char *operation, bool show_stats)
+/* Queries 'dpif' for a flow entry. The flow is specified by the Netlink
+ * attributes with types ODP_KEY_ATTR_* in the 'key_len' bytes starting at
+ * 'key'.
+ *
+ * Returns 0 if successful. If no flow matches, returns ENOENT. On other
+ * failure, returns a positive errno value.
+ *
+ * If 'actionsp' is nonnull, then on success '*actionsp' will be set to an
+ * ofpbuf owned by the caller that contains the Netlink attributes for the
+ * flow's actions. The caller must free the ofpbuf (with ofpbuf_delete()) when
+ * it is no longer needed.
+ *
+ * If 'stats' is nonnull, then on success it will be updated with the flow's
+ * statistics. */
+int
+dpif_flow_get(const struct dpif *dpif,
+ const struct nlattr *key, size_t key_len,
+ struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
{
- int error = do_ioctl(dpif, cmd, NULL, flow);
- if (error && show_stats) {
- flow->n_actions = 0;
+ int error;
+
+ COVERAGE_INC(dpif_flow_get);
+
+ error = dpif->dpif_class->flow_get(dpif, key, key_len, actionsp, stats);
+ if (error) {
+ if (actionsp) {
+ *actionsp = NULL;
+ }
+ if (stats) {
+ memset(stats, 0, sizeof *stats);
+ }
}
if (should_log_flow_message(error)) {
- log_flow_message(dpif, error, operation, &flow->key,
- show_stats && !error ? &flow->stats : NULL,
- flow->actions, flow->n_actions);
+ const struct nlattr *actions;
+ size_t actions_len;
+
+ if (!error && actionsp) {
+ actions = (*actionsp)->data;
+ actions_len = (*actionsp)->size;
+ } else {
+ actions = NULL;
+ actions_len = 0;
+ }
+ log_flow_message(dpif, error, "flow_get", key, key_len, stats,
+ actions, actions_len);
}
return error;
}
+/* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink
+ * attributes with types ODP_KEY_ATTR_* in the 'key_len' bytes starting at
+ * 'key'. The associated actions are specified by the Netlink attributes with
+ * types ODPAT_* in the 'actions_len' bytes starting at 'actions'.
+ *
+ * - If the flow's key does not exist in 'dpif', then the flow will be added if
+ * 'flags' includes DPIF_FP_CREATE. Otherwise the operation will fail with
+ * ENOENT.
+ *
+ * If the operation succeeds, then 'stats', if nonnull, will be zeroed.
+ *
+ * - If the flow's key does exist in 'dpif', then the flow's actions will be
+ * updated if 'flags' includes DPIF_FP_MODIFY. Otherwise the operation will
+ * fail with EEXIST. If the flow's actions are updated, then its statistics
+ * will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and left as-is
+ * otherwise.
+ *
+ * If the operation succeeds, then 'stats', if nonnull, will be set to the
+ * flow's statistics before the update.
+ */
int
-dpif_flow_put(struct dpif *dpif, struct odp_flow_put *put)
+dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
+ const struct nlattr *key, size_t key_len,
+ const struct nlattr *actions, size_t actions_len,
+ struct dpif_flow_stats *stats)
{
- int error = do_ioctl(dpif, ODP_FLOW_PUT, NULL, put);
+ int error;
+
COVERAGE_INC(dpif_flow_put);
+ assert(!(flags & ~(DPIF_FP_CREATE | DPIF_FP_MODIFY | DPIF_FP_ZERO_STATS)));
+
+ error = dpif->dpif_class->flow_put(dpif, flags, key, key_len,
+ actions, actions_len, stats);
+ if (error && stats) {
+ memset(stats, 0, sizeof *stats);
+ }
if (should_log_flow_message(error)) {
- struct ds operation = DS_EMPTY_INITIALIZER;
- ds_put_cstr(&operation, "put");
- if (put->flags & ODPPF_CREATE) {
- ds_put_cstr(&operation, "[create]");
- }
- if (put->flags & ODPPF_MODIFY) {
- ds_put_cstr(&operation, "[modify]");
+ struct ds s;
+
+ ds_init(&s);
+ ds_put_cstr(&s, "put");
+ if (flags & DPIF_FP_CREATE) {
+ ds_put_cstr(&s, "[create]");
}
- if (put->flags & ODPPF_ZERO_STATS) {
- ds_put_cstr(&operation, "[zero]");
+ if (flags & DPIF_FP_MODIFY) {
+ ds_put_cstr(&s, "[modify]");
}
-#define ODPPF_ALL (ODPPF_CREATE | ODPPF_MODIFY | ODPPF_ZERO_STATS)
- if (put->flags & ~ODPPF_ALL) {
- ds_put_format(&operation, "[%x]", put->flags & ~ODPPF_ALL);
+ if (flags & DPIF_FP_ZERO_STATS) {
+ ds_put_cstr(&s, "[zero]");
}
- log_flow_message(dpif, error, ds_cstr(&operation), &put->flow.key,
- !error ? &put->flow.stats : NULL,
- put->flow.actions, put->flow.n_actions);
- ds_destroy(&operation);
+ log_flow_message(dpif, error, ds_cstr(&s), key, key_len, stats,
+ actions, actions_len);
+ ds_destroy(&s);
}
return error;
}
+/* Deletes a flow from 'dpif' and returns 0, or returns ENOENT if 'dpif' does
+ * not contain such a flow. The flow is specified by the Netlink attributes
+ * with types ODP_KEY_ATTR_* in the 'key_len' bytes starting at 'key'.
+ *
+ * If the operation succeeds, then 'stats', if nonnull, will be set to the
+ * flow's statistics before its deletion. */
int
-dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
+dpif_flow_del(struct dpif *dpif,
+ const struct nlattr *key, size_t key_len,
+ struct dpif_flow_stats *stats)
{
+ int error;
+
COVERAGE_INC(dpif_flow_del);
- check_rw_odp_flow(flow);
- memset(&flow->stats, 0, sizeof flow->stats);
- return do_flow_ioctl(dpif, ODP_FLOW_DEL, flow, "delete flow", true);
-}
-int
-dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
-{
- COVERAGE_INC(dpif_flow_query);
- check_rw_odp_flow(flow);
- memset(&flow->stats, 0, sizeof flow->stats);
- return do_flow_ioctl(dpif, ODP_FLOW_GET, flow, "get flow", true);
+ error = dpif->dpif_class->flow_del(dpif, key, key_len, stats);
+ if (error && stats) {
+ memset(stats, 0, sizeof *stats);
+ }
+ if (should_log_flow_message(error)) {
+ log_flow_message(dpif, error, "flow_del", key, key_len,
+ !error ? stats : NULL, NULL, 0);
+ }
+ return error;
}
-int
-dpif_flow_get_multiple(const struct dpif *dpif,
- struct odp_flow flows[], size_t n)
+/* Initializes 'dump' to begin dumping the flows in a dpif.
+ *
+ * This function provides no status indication. An error status for the entire
+ * dump operation is provided when it is completed by calling
+ * dpif_flow_dump_done().
+ */
+void
+dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif)
{
- struct odp_flowvec fv;
- size_t i;
-
- COVERAGE_ADD(dpif_flow_query_multiple, n);
- fv.flows = flows;
- fv.n_flows = n;
- for (i = 0; i < n; i++) {
- check_rw_odp_flow(&flows[i]);
- }
- return do_ioctl(dpif, ODP_FLOW_GET_MULTIPLE, "ODP_FLOW_GET_MULTIPLE",
- &fv);
+ dump->dpif = dpif;
+ dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state);
+ log_operation(dpif, "flow_dump_start", dump->error);
}
-int
-dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
- size_t *n_out)
+/* Attempts to retrieve another flow from 'dump', which must have been
+ * initialized with dpif_flow_dump_start(). On success, updates the output
+ * parameters as described below and returns true. Otherwise, returns false.
+ * Failure might indicate an actual error or merely the end of the flow table.
+ * An error status for the entire dump operation is provided when it is
+ * completed by calling dpif_flow_dump_done().
+ *
+ * On success, if 'key' and 'key_len' are nonnull then '*key' and '*key_len'
+ * will be set to Netlink attributes with types ODP_KEY_ATTR_* representing the
+ * dumped flow's key. If 'actions' and 'actions_len' are nonnull then they are
+ * set to Netlink attributes with types ODPAT_* representing the dumped flow's
+ * actions. If 'stats' is nonnull then it will be set to the dumped flow's
+ * statistics.
+ *
+ * All of the returned data is owned by 'dpif', not by the caller, and the
+ * caller must not modify or free it. 'dpif' guarantees that it remains
+ * accessible and unchanging until at least the next call to 'flow_dump_next'
+ * or 'flow_dump_done' for 'dump'. */
+bool
+dpif_flow_dump_next(struct dpif_flow_dump *dump,
+ const struct nlattr **key, size_t *key_len,
+ const struct nlattr **actions, size_t *actions_len,
+ const struct dpif_flow_stats **stats)
{
- struct odp_flowvec fv;
- uint32_t i;
- int error;
+ const struct dpif *dpif = dump->dpif;
+ int error = dump->error;
- COVERAGE_INC(dpif_flow_query_list);
- fv.flows = flows;
- fv.n_flows = n;
- if (RUNNING_ON_VALGRIND) {
- memset(flows, 0, n * sizeof *flows);
- } else {
- for (i = 0; i < n; i++) {
- flows[i].actions = NULL;
- flows[i].n_actions = 0;
+ if (!error) {
+ error = dpif->dpif_class->flow_dump_next(dpif, dump->state,
+ key, key_len,
+ actions, actions_len,
+ stats);
+ if (error) {
+ dpif->dpif_class->flow_dump_done(dpif, dump->state);
}
}
- error = do_ioctl(dpif, ODP_FLOW_LIST, NULL, &fv);
if (error) {
- *n_out = 0;
- VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)",
- dpif_name(dpif), strerror(error));
- } else {
- COVERAGE_ADD(dpif_flow_query_list_n, fv.n_flows);
- *n_out = fv.n_flows;
- VLOG_DBG_RL(&dpmsg_rl, "%s: listed %zu flows",
- dpif_name(dpif), *n_out);
+ if (key) {
+ *key = NULL;
+ *key_len = 0;
+ }
+ if (actions) {
+ *actions = NULL;
+ *actions_len = 0;
+ }
+ if (stats) {
+ *stats = NULL;
+ }
}
- return error;
+ if (!dump->error) {
+ if (error == EOF) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
+ } else if (should_log_flow_message(error)) {
+ log_flow_message(dpif, error, "flow_dump",
+ key ? *key : NULL, key ? *key_len : 0,
+ stats ? *stats : NULL, actions ? *actions : NULL,
+ actions ? *actions_len : 0);
+ }
+ }
+ dump->error = error;
+ return !error;
}
+/* Completes flow table dump operation 'dump', which must have been initialized
+ * with dpif_flow_dump_start(). Returns 0 if the dump operation was
+ * error-free, otherwise a positive errno value describing the problem. */
int
-dpif_flow_list_all(const struct dpif *dpif,
- struct odp_flow **flowsp, size_t *np)
+dpif_flow_dump_done(struct dpif_flow_dump *dump)
{
- struct odp_stats stats;
- struct odp_flow *flows;
- size_t n_flows;
- int error;
-
- *flowsp = NULL;
- *np = 0;
-
- error = dpif_get_dp_stats(dpif, &stats);
- if (error) {
- return error;
+ const struct dpif *dpif = dump->dpif;
+ if (!dump->error) {
+ dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state);
+ log_operation(dpif, "flow_dump_done", dump->error);
}
-
- flows = xmalloc(sizeof *flows * stats.n_flows);
- error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
- if (error) {
- free(flows);
- return error;
- }
-
- if (stats.n_flows != n_flows) {
- VLOG_WARN_RL(&error_rl, "%s: datapath stats reported %"PRIu32" "
- "flows but flow listing reported %zu",
- dpif_name(dpif), stats.n_flows, n_flows);
- }
- *flowsp = flows;
- *np = n_flows;
- return 0;
+ return dump->error == EOF ? 0 : dump->error;
}
+/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
+ * the Ethernet frame specified in 'packet'.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. */
int
-dpif_execute(struct dpif *dpif, uint16_t in_port,
- const union odp_action actions[], size_t n_actions,
+dpif_execute(struct dpif *dpif,
+ const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *buf)
{
int error;
COVERAGE_INC(dpif_execute);
- if (n_actions > 0) {
- struct odp_execute execute;
- memset(&execute, 0, sizeof execute);
- execute.in_port = in_port;
- execute.actions = (union odp_action *) actions;
- execute.n_actions = n_actions;
- execute.data = buf->data;
- execute.length = buf->size;
- error = do_ioctl(dpif, ODP_EXECUTE, NULL, &execute);
+ if (actions_len > 0) {
+ error = dpif->dpif_class->execute(dpif, actions, actions_len, buf);
} else {
error = 0;
}
struct ds ds = DS_EMPTY_INITIALIZER;
char *packet = ofp_packet_to_string(buf->data, buf->size, buf->size);
ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
- format_odp_actions(&ds, actions, n_actions);
+ format_odp_actions(&ds, actions, actions_len);
if (error) {
ds_put_format(&ds, " failed (%s)", strerror(error));
}
return error;
}
+static bool OVS_UNUSED
+is_valid_listen_mask(int listen_mask)
+{
+ return !(listen_mask & ~((1u << DPIF_UC_MISS) |
+ (1u << DPIF_UC_ACTION) |
+ (1u << DPIF_UC_SAMPLE)));
+}
+
+/* Retrieves 'dpif''s "listen mask" into '*listen_mask'. A 1-bit of value 2**X
+ * set in '*listen_mask' indicates that dpif_recv() will receive messages of
+ * the type (from "enum dpif_upcall_type") with value X. Returns 0 if
+ * successful, otherwise a positive errno value. */
int
dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask)
{
- int error = do_ioctl(dpif, ODP_GET_LISTEN_MASK, "ODP_GET_LISTEN_MASK",
- listen_mask);
+ int error = dpif->dpif_class->recv_get_mask(dpif, listen_mask);
if (error) {
*listen_mask = 0;
}
+ assert(is_valid_listen_mask(*listen_mask));
+ log_operation(dpif, "recv_get_mask", error);
return error;
}
+/* Sets 'dpif''s "listen mask" to 'listen_mask'. A 1-bit of value 2**X set in
+ * '*listen_mask' requests that dpif_recv() will receive messages of the type
+ * (from "enum dpif_upcall_type") with value X. Returns 0 if successful,
+ * otherwise a positive errno value. */
int
dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
{
- return do_ioctl(dpif, ODP_SET_LISTEN_MASK, "ODP_SET_LISTEN_MASK",
- &listen_mask);
-}
-
-int
-dpif_recv(struct dpif *dpif, struct ofpbuf **bufp)
-{
- struct ofpbuf *buf;
- int retval;
int error;
- buf = ofpbuf_new(65536);
- retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
- if (retval < 0) {
- error = errno;
- if (error != EAGAIN) {
- VLOG_WARN_RL(&error_rl, "%s: read failed: %s",
- dpif_name(dpif), strerror(error));
- }
- } else if (retval >= sizeof(struct odp_msg)) {
- struct odp_msg *msg = buf->data;
- if (msg->length <= retval) {
- buf->size += retval;
- if (VLOG_IS_DBG_ENABLED()) {
- void *payload = msg + 1;
- size_t length = buf->size - sizeof *msg;
- char *s = ofp_packet_to_string(payload, length, length);
- VLOG_DBG_RL(&dpmsg_rl, "%s: received %s message of length "
- "%zu on port %"PRIu16": %s", dpif_name(dpif),
- (msg->type == _ODPL_MISS_NR ? "miss"
- : msg->type == _ODPL_ACTION_NR ? "action"
- : "<unknown>"),
- msg->length - sizeof(struct odp_msg),
- msg->port, s);
- free(s);
- }
- *bufp = buf;
- COVERAGE_INC(dpif_recv);
- return 0;
- } else {
- VLOG_WARN_RL(&error_rl, "%s: discarding message truncated "
- "from %zu bytes to %d",
- dpif_name(dpif), msg->length, retval);
- error = ERANGE;
- }
- } else if (!retval) {
- VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif));
- error = EPROTO;
- } else {
- VLOG_WARN_RL(&error_rl,
- "%s: discarding too-short message (%d bytes)",
- dpif_name(dpif), retval);
- error = ERANGE;
- }
+ assert(is_valid_listen_mask(listen_mask));
- *bufp = NULL;
- ofpbuf_delete(buf);
+ error = dpif->dpif_class->recv_set_mask(dpif, listen_mask);
+ log_operation(dpif, "recv_set_mask", error);
return error;
}
-void
-dpif_recv_wait(struct dpif *dpif)
-{
- poll_fd_wait(dpif->fd, POLLIN);
-}
-
-void
-dpif_get_netflow_ids(const struct dpif *dpif,
- uint8_t *engine_type, uint8_t *engine_id)
-{
- *engine_type = *engine_id = dpif->minor;
-}
-\f
-struct dpifmon {
- struct dpif *dpif;
- struct nl_sock *sock;
- int local_ifindex;
-};
-
+/* Retrieve the sFlow sampling probability. '*probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
int
-dpifmon_create(const char *datapath_name, struct dpifmon **monp)
+dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability)
{
- struct dpifmon *mon;
- char local_name[IFNAMSIZ];
- int error;
-
- mon = *monp = xmalloc(sizeof *mon);
-
- error = dpif_open(datapath_name, &mon->dpif);
- if (error) {
- goto error;
- }
- error = dpif_port_get_name(mon->dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
- if (error) {
- goto error_close_dpif;
- }
-
- mon->local_ifindex = if_nametoindex(local_name);
- if (!mon->local_ifindex) {
- error = errno;
- VLOG_WARN("could not get ifindex of %s device: %s",
- local_name, strerror(errno));
- goto error_close_dpif;
- }
-
- error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &mon->sock);
+ int error = (dpif->dpif_class->get_sflow_probability
+ ? dpif->dpif_class->get_sflow_probability(dpif, probability)
+ : EOPNOTSUPP);
if (error) {
- VLOG_WARN("could not create rtnetlink socket: %s", strerror(error));
- goto error_close_dpif;
+ *probability = 0;
}
-
- return 0;
-
-error_close_dpif:
- dpif_close(mon->dpif);
-error:
- free(mon);
- *monp = NULL;
+ log_operation(dpif, "get_sflow_probability", error);
return error;
}
-void
-dpifmon_destroy(struct dpifmon *mon)
+/* Set the sFlow sampling probability. 'probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
+int
+dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
{
- if (mon) {
- dpif_close(mon->dpif);
- nl_sock_destroy(mon->sock);
- }
+ int error = (dpif->dpif_class->set_sflow_probability
+ ? dpif->dpif_class->set_sflow_probability(dpif, probability)
+ : EOPNOTSUPP);
+ log_operation(dpif, "set_sflow_probability", error);
+ return error;
}
+/* Polls for an upcall from 'dpif'. If successful, stores the upcall into
+ * '*upcall'. Only upcalls of the types selected with dpif_recv_set_mask()
+ * member function will ordinarily be received (but if a message type is
+ * enabled and then later disabled, some stragglers might pop up).
+ *
+ * The caller takes ownership of the data that 'upcall' points to.
+ * 'upcall->key' and 'upcall->actions' (if nonnull) point into data owned by
+ * 'upcall->packet', so their memory cannot be freed separately. (This is
+ * hardly a great way to do things but it works out OK for the dpif providers
+ * and clients that exist so far.)
+ *
+ * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
+ * if no upcall is immediately available. */
int
-dpifmon_poll(struct dpifmon *mon, char **devnamep)
+dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall)
{
- static struct vlog_rate_limit slow_rl = VLOG_RATE_LIMIT_INIT(1, 5);
- static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
- };
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- struct ofpbuf *buf;
- int error;
-
- *devnamep = NULL;
-again:
- error = nl_sock_recv(mon->sock, &buf, false);
- switch (error) {
- case 0:
- if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- VLOG_WARN_RL(&slow_rl, "received bad rtnl message");
- error = ENOBUFS;
- } else {
- const char *devname = nl_attr_get_string(attrs[IFLA_IFNAME]);
- bool for_us;
-
- if (attrs[IFLA_MASTER]) {
- uint32_t master_ifindex = nl_attr_get_u32(attrs[IFLA_MASTER]);
- for_us = master_ifindex == mon->local_ifindex;
- } else {
- /* It's for us if that device is one of our ports. */
- struct odp_port port;
- for_us = !dpif_port_query_by_name(mon->dpif, devname, &port);
- }
-
- if (!for_us) {
- /* Not for us, try again. */
- ofpbuf_delete(buf);
- COVERAGE_INC(dpifmon_poll_false_wakeup);
- goto again;
- }
- COVERAGE_INC(dpifmon_poll_changed);
- *devnamep = xstrdup(devname);
- }
- ofpbuf_delete(buf);
- break;
-
- case EAGAIN:
- /* Nothing to do. */
- break;
-
- case ENOBUFS:
- VLOG_WARN_RL(&slow_rl, "dpifmon socket overflowed");
- break;
-
- default:
- VLOG_WARN_RL(&slow_rl, "error on dpifmon socket: %s", strerror(error));
- break;
+ int error = dpif->dpif_class->recv(dpif, upcall);
+ if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) {
+ struct flow flow;
+ char *s;
+
+ s = ofp_packet_to_string(upcall->packet->data,
+ upcall->packet->size, upcall->packet->size);
+ odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow);
+
+ VLOG_DBG("%s: %s upcall on port %"PRIu16": %s", dpif_name(dpif),
+ (upcall->type == DPIF_UC_MISS ? "miss"
+ : upcall->type == DPIF_UC_ACTION ? "action"
+ : upcall->type == DPIF_UC_SAMPLE ? "sample"
+ : "<unknown>"),
+ flow.in_port, s);
+ free(s);
}
return error;
}
+/* Discards all messages that would otherwise be received by dpif_recv() on
+ * 'dpif'. */
void
-dpifmon_run(struct dpifmon *mon UNUSED)
+dpif_recv_purge(struct dpif *dpif)
{
- /* Nothing to do in this implementation. */
+ COVERAGE_INC(dpif_purge);
+ if (dpif->dpif_class->recv_purge) {
+ dpif->dpif_class->recv_purge(dpif);
+ }
}
+/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
+ * received with dpif_recv(). */
void
-dpifmon_wait(struct dpifmon *mon)
+dpif_recv_wait(struct dpif *dpif)
{
- nl_sock_wait(mon->sock, POLLIN);
+ dpif->dpif_class->recv_wait(dpif);
}
-\f
-static int get_openvswitch_major(void);
-static int get_major(const char *target, int default_major);
-static int
-lookup_minor(const char *name, unsigned int *minor)
+/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
+ * and '*engine_id', respectively. */
+void
+dpif_get_netflow_ids(const struct dpif *dpif,
+ uint8_t *engine_type, uint8_t *engine_id)
{
- struct ethtool_drvinfo drvinfo;
- struct ifreq ifr;
- int error;
- int sock;
-
- *minor = -1;
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno));
- error = errno;
- goto error;
- }
-
- memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- ifr.ifr_data = (caddr_t) &drvinfo;
-
- memset(&drvinfo, 0, sizeof drvinfo);
- drvinfo.cmd = ETHTOOL_GDRVINFO;
- if (ioctl(sock, SIOCETHTOOL, &ifr)) {
- VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno));
- error = errno;
- goto error_close_sock;
- }
-
- if (strcmp(drvinfo.driver, "openvswitch")) {
- VLOG_WARN("%s is not an openvswitch device", name);
- error = EOPNOTSUPP;
- goto error_close_sock;
- }
-
- if (!isdigit(drvinfo.bus_info[0])) {
- VLOG_WARN("%s ethtool info does not contain an openvswitch minor",
- name);
- error = EPROTOTYPE;
- goto error_close_sock;
- }
-
- *minor = atoi(drvinfo.bus_info);
- close(sock);
- return 0;
-
-error_close_sock:
- close(sock);
-error:
- return error;
+ *engine_type = dpif->netflow_engine_type;
+ *engine_id = dpif->netflow_engine_id;
}
-static int
-make_openvswitch_device(unsigned int minor, char **fnp)
+/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority
+ * value for use in the ODPAT_SET_PRIORITY action. On success, returns 0 and
+ * stores the priority into '*priority'. On failure, returns a positive errno
+ * value and stores 0 into '*priority'. */
+int
+dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
+ uint32_t *priority)
{
- dev_t dev = makedev(get_openvswitch_major(), minor);
- const char dirname[] = "/dev/net";
- struct stat s;
- char fn[128];
-
- *fnp = NULL;
- sprintf(fn, "%s/dp%d", dirname, minor);
- if (!stat(fn, &s)) {
- if (!S_ISCHR(s.st_mode)) {
- VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
- fn);
- } else if (s.st_rdev != dev) {
- VLOG_WARN_RL(&error_rl,
- "%s is device %u:%u instead of %u:%u, fixing",
- fn, major(s.st_rdev), minor(s.st_rdev),
- major(dev), minor(dev));
- } else {
- goto success;
- }
- if (unlink(fn)) {
- VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
- fn, strerror(errno));
- return errno;
- }
- } else if (errno == ENOENT) {
- if (stat(dirname, &s)) {
- if (errno == ENOENT) {
- if (mkdir(dirname, 0755)) {
- VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
- dirname, strerror(errno));
- return errno;
- }
- } else {
- VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
- dirname, strerror(errno));
- return errno;
- }
- }
- } else {
- VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
- return errno;
- }
-
- /* The device needs to be created. */
- if (mknod(fn, S_IFCHR | 0700, dev)) {
- VLOG_WARN_RL(&error_rl,
- "%s: creating character device %u:%u failed (%s)",
- fn, major(dev), minor(dev), strerror(errno));
- return errno;
+ int error = (dpif->dpif_class->queue_to_priority
+ ? dpif->dpif_class->queue_to_priority(dpif, queue_id,
+ priority)
+ : EOPNOTSUPP);
+ if (error) {
+ *priority = 0;
}
-
-success:
- *fnp = xstrdup(fn);
- return 0;
+ log_operation(dpif, "queue_to_priority", error);
+ return error;
}
-
-
-static int
-get_openvswitch_major(void)
+\f
+void
+dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
+ const char *name,
+ uint8_t netflow_engine_type, uint8_t netflow_engine_id)
{
- static unsigned int openvswitch_major;
- if (!openvswitch_major) {
- enum { DEFAULT_MAJOR = 248 };
- openvswitch_major = get_major("openvswitch", DEFAULT_MAJOR);
- }
- return openvswitch_major;
+ dpif->dpif_class = dpif_class;
+ dpif->base_name = xstrdup(name);
+ dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
+ dpif->netflow_engine_type = netflow_engine_type;
+ dpif->netflow_engine_id = netflow_engine_id;
}
-static int
-get_major(const char *target, int default_major)
+/* Undoes the results of initialization.
+ *
+ * Normally this function only needs to be called from dpif_close().
+ * However, it may be called by providers due to an error on opening
+ * that occurs after initialization. It this case dpif_close() would
+ * never be called. */
+void
+dpif_uninit(struct dpif *dpif, bool close)
{
- const char fn[] = "/proc/devices";
- char line[128];
- FILE *file;
- int ln;
-
- file = fopen(fn, "r");
- if (!file) {
- VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
- goto error;
- }
+ char *base_name = dpif->base_name;
+ char *full_name = dpif->full_name;
- for (ln = 1; fgets(line, sizeof line, file); ln++) {
- char name[64];
- int major;
-
- if (!strncmp(line, "Character", 9) || line[0] == '\0') {
- /* Nothing to do. */
- } else if (!strncmp(line, "Block", 5)) {
- /* We only want character devices, so skip the rest of the file. */
- break;
- } else if (sscanf(line, "%d %63s", &major, name)) {
- if (!strcmp(name, target)) {
- fclose(file);
- return major;
- }
- } else {
- static bool warned;
- if (!warned) {
- VLOG_WARN("%s:%d: syntax error", fn, ln);
- }
- warned = true;
- }
+ if (close) {
+ dpif->dpif_class->close(dpif);
}
- VLOG_ERR("%s: %s major not found (is the module loaded?), using "
- "default major %d", fn, target, default_major);
-error:
- VLOG_INFO("using default major %d for %s", default_major, target);
- return default_major;
+ free(base_name);
+ free(full_name);
}
-
-static int
-name_to_minor(const char *name, unsigned int *minor)
+\f
+static void
+log_operation(const struct dpif *dpif, const char *operation, int error)
{
- if (!get_minor_from_name(name, minor)) {
- return 0;
+ if (!error) {
+ VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation);
+ } else if (is_errno(error)) {
+ VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
+ dpif_name(dpif), operation, strerror(error));
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: %s failed (%d/%d)",
+ dpif_name(dpif), operation,
+ get_ofp_err_type(error), get_ofp_err_code(error));
}
- return lookup_minor(name, minor);
}
-static int
-get_minor_from_name(const char *name, unsigned int *minor)
+static enum vlog_level
+flow_message_log_level(int error)
{
- if (!strncmp(name, "dp", 2) && isdigit(name[2])) {
- *minor = atoi(name + 2);
- return 0;
- } else {
- return EINVAL;
- }
+ return error ? VLL_WARN : VLL_DBG;
}
-static int
-open_by_minor(unsigned int minor, struct dpif **dpifp)
+static bool
+should_log_flow_message(int error)
{
- struct dpif *dpif;
- int error;
- char *fn;
- int fd;
+ return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
+ error ? &error_rl : &dpmsg_rl);
+}
- *dpifp = NULL;
- error = make_openvswitch_device(minor, &fn);
+static void
+log_flow_message(const struct dpif *dpif, int error, const char *operation,
+ const struct nlattr *key, size_t key_len,
+ const struct dpif_flow_stats *stats,
+ const struct nlattr *actions, size_t actions_len)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ ds_put_format(&ds, "%s: ", dpif_name(dpif));
if (error) {
- return error;
+ ds_put_cstr(&ds, "failed to ");
}
-
- fd = open(fn, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- error = errno;
- VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
- free(fn);
- return error;
+ ds_put_format(&ds, "%s ", operation);
+ if (error) {
+ ds_put_format(&ds, "(%s) ", strerror(error));
}
- free(fn);
-
- dpif = xmalloc(sizeof *dpif);
- dpif->name = xasprintf("dp%u", dpif->minor);
- dpif->minor = minor;
- dpif->fd = fd;
- *dpifp = dpif;
- return 0;
-}
-\f
-/* There is a tendency to construct odp_flow objects on the stack and to
- * forget to properly initialize their "actions" and "n_actions" members.
- * When this happens, we get memory corruption because the kernel
- * writes through the random pointer that is in the "actions" member.
- *
- * This function attempts to combat the problem by:
- *
- * - Forcing a segfault if "actions" points to an invalid region (instead
- * of just getting back EFAULT, which can be easily missed in the log).
- *
- * - Storing a distinctive value that is likely to cause an
- * easy-to-identify error later if it is dereferenced, etc.
- *
- * - Triggering a warning on uninitialized memory from Valgrind if
- * "actions" or "n_actions" was not initialized.
- */
-static void
-check_rw_odp_flow(struct odp_flow *flow)
-{
- if (flow->n_actions) {
- memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]);
+ odp_flow_key_format(key, key_len, &ds);
+ if (stats) {
+ ds_put_cstr(&ds, ", ");
+ dpif_flow_stats_format(stats, &ds);
}
+ if (actions || actions_len) {
+ ds_put_cstr(&ds, ", actions:");
+ format_odp_actions(&ds, actions, actions_len);
+ }
+ vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
+ ds_destroy(&ds);
}