#include "netlink.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
+#include "openvswitch/internal_dev.h"
#include "openvswitch/gre.h"
#include "packets.h"
#include "poll-loop.h"
#include "shash.h"
#include "svec.h"
-#ifndef GRE_IOCTL_ONLY
-#include <linux/if_link.h>
-#endif
-
#define THIS_MODULE VLM_netdev_linux
#include "vlog.h"
\f
#endif
static struct rtnetlink_notifier netdev_linux_cache_notifier;
-static struct shash cache_map = SHASH_INITIALIZER(&cache_map);
+static int cache_notifier_refcount;
enum {
VALID_IFINDEX = 1 << 0,
VALID_IN6 = 1 << 3,
VALID_MTU = 1 << 4,
VALID_CARRIER = 1 << 5,
- VALID_IS_INTERNAL = 1 << 6
+ VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */
};
struct tap_state {
int fd;
};
+struct patch_state {
+ char *peer;
+};
+
struct netdev_dev_linux {
struct netdev_dev netdev_dev;
struct shash_node *shash_node;
unsigned int cache_valid;
+ /* The following are figured out "on demand" only. They are only valid
+ * when the corresponding VALID_* bit in 'cache_valid' is set. */
int ifindex;
uint8_t etheraddr[ETH_ADDR_LEN];
struct in_addr address, netmask;
struct in6_addr in6;
int mtu;
int carrier;
- bool is_internal;
+ bool is_internal; /* Is this an openvswitch internal device? */
+ bool is_tap; /* Is this a tuntap device? */
union {
struct tap_state tap;
+ struct patch_state patch;
} state;
};
struct netdev_linux {
struct netdev netdev;
-
- /* File descriptors. For ordinary network devices, the two fds below are
- * the same; for tap devices, they differ. */
- int netdev_fd; /* Network device. */
- int tap_fd; /* TAP character device, if any, otherwise the
- * network device. */
+ int fd;
};
/* An AF_INET socket (used for ioctl operations). */
static int af_inet_sock = -1;
-struct gre_config {
- uint32_t local_ip;
- uint32_t remote_ip;
- uint32_t in_key;
- uint32_t out_key;
- bool have_in_key;
- bool have_out_key;
- bool in_csum;
- bool out_csum;
-};
-
-static struct {
- union {
- struct nl_sock *nl_sock;
- int ioctl_fd;
- };
- bool use_ioctl;
-} gre_descriptors;
-
struct netdev_linux_notifier {
struct netdev_notifier notifier;
struct list node;
* additional log messages. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-static int netdev_linux_do_ethtool(const struct netdev *, struct ethtool_cmd *,
+static int netdev_linux_init(void);
+
+static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
int cmd, const char *cmd_name);
static int netdev_linux_do_ioctl(const char *name, struct ifreq *, int cmd,
const char *cmd_name);
static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats);
static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats);
+static bool
+is_netdev_linux_class(const struct netdev_class *netdev_class)
+{
+ return netdev_class->init == netdev_linux_init;
+}
+
static struct netdev_dev_linux *
netdev_dev_linux_cast(const struct netdev_dev *netdev_dev)
{
- const char *type = netdev_dev_get_type(netdev_dev);
- assert(!strcmp(type, "system") || !strcmp(type, "tap")
- || !strcmp(type, "gre"));
+ const struct netdev_class *netdev_class = netdev_dev_get_class(netdev_dev);
+ assert(is_netdev_linux_class(netdev_class));
+
return CONTAINER_OF(netdev_dev, struct netdev_dev_linux, netdev_dev);
}
static struct netdev_linux *
netdev_linux_cast(const struct netdev *netdev)
{
- const char *type = netdev_get_type(netdev);
- assert(!strcmp(type, "system") || !strcmp(type, "tap")
- || !strcmp(type, "gre"));
+ struct netdev_dev *netdev_dev = netdev_get_dev(netdev);
+ const struct netdev_class *netdev_class = netdev_dev_get_class(netdev_dev);
+ assert(is_netdev_linux_class(netdev_class));
+
return CONTAINER_OF(netdev, struct netdev_linux, netdev);
}
static void
netdev_linux_cache_cb(const struct rtnetlink_change *change,
- void *aux UNUSED)
+ void *aux OVS_UNUSED)
{
struct netdev_dev_linux *dev;
if (change) {
- dev = shash_find_data(&cache_map, change->ifname);
- if (dev) {
- dev->cache_valid = 0;
+ struct netdev_dev *base_dev = netdev_dev_from_name(change->ifname);
+ if (base_dev) {
+ const struct netdev_class *netdev_class =
+ netdev_dev_get_class(base_dev);
+
+ if (is_netdev_linux_class(netdev_class)) {
+ dev = netdev_dev_linux_cast(base_dev);
+ dev->cache_valid = 0;
+ }
}
} else {
+ struct shash device_shash;
struct shash_node *node;
- SHASH_FOR_EACH (node, &cache_map) {
+
+ shash_init(&device_shash);
+ netdev_dev_get_devices(&netdev_linux_class, &device_shash);
+ SHASH_FOR_EACH (node, &device_shash) {
dev = node->data;
dev->cache_valid = 0;
}
+ shash_destroy(&device_shash);
}
}
-/* The arguments are marked as unused to prevent warnings on platforms where
- * the Netlink interface isn't supported. */
static int
-setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED,
- bool create UNUSED)
+if_up(const char *name)
{
-#ifdef GRE_IOCTL_ONLY
- return EOPNOTSUPP;
-#else
- int error;
- struct ofpbuf request, *reply;
- unsigned int nl_flags;
- struct ifinfomsg ifinfomsg;
- struct nlattr *linkinfo_hdr;
- struct nlattr *info_data_hdr;
- uint16_t iflags = 0;
- uint16_t oflags = 0;
- uint8_t pmtudisc = 0;
-
- if (!gre_descriptors.nl_sock) {
- error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0,
- &gre_descriptors.nl_sock);
- if (error) {
- VLOG_WARN("couldn't create netlink socket: %s\n", strerror(error));
- gre_descriptors.nl_sock = NULL;
- goto error;
- }
- }
+ struct ifreq ifr;
- ofpbuf_init(&request, 0);
+ strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
+ ifr.ifr_flags = IFF_UP;
- nl_flags = NLM_F_REQUEST;
- if (create) {
- nl_flags |= NLM_F_CREATE|NLM_F_EXCL;
+ if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) {
+ VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s",
+ name, strerror(errno));
+ return errno;
}
- /* We over-reserve space, because we do some pointer arithmetic
- * and don't want the buffer address shifting under us. */
- nl_msg_put_nlmsghdr(&request, gre_descriptors.nl_sock, 2048, RTM_NEWLINK,
- nl_flags);
-
- memset(&ifinfomsg, 0, sizeof ifinfomsg);
- ifinfomsg.ifi_family = AF_UNSPEC;
- nl_msg_put(&request, &ifinfomsg, sizeof ifinfomsg);
-
- linkinfo_hdr = ofpbuf_tail(&request);
- nl_msg_put_unspec(&request, IFLA_LINKINFO, NULL, 0);
-
- nl_msg_put_unspec(&request, IFLA_INFO_KIND, "gretap", 6);
-
- info_data_hdr = ofpbuf_tail(&request);
- nl_msg_put_unspec(&request, IFLA_INFO_DATA, NULL, 0);
+ return 0;
+}
- /* Set flags */
- if (config->have_in_key) {
- iflags |= GRE_KEY;
- }
- if (config->have_out_key) {
- oflags |= GRE_KEY;
- }
+/* A veth may be created using the 'command' "+<name>,<peer>". A veth may
+ * be destroyed by using the 'command' "-<name>", where <name> can be
+ * either side of the device.
+ */
+static int
+modify_veth(const char *format, ...)
+{
+ FILE *veth_file;
+ va_list args;
+ int retval;
- if (config->in_csum) {
- iflags |= GRE_CSUM;
+ veth_file = fopen("/sys/class/net/veth_pairs", "w");
+ if (!veth_file) {
+ VLOG_WARN_RL(&rl, "could not open veth device. Are you running a "
+ "supported XenServer with the kernel module loaded?");
+ return ENODEV;
}
- if (config->out_csum) {
- oflags |= GRE_CSUM;
- }
-
- /* Add options */
- nl_msg_put_u32(&request, IFLA_GRE_IKEY, config->in_key);
- nl_msg_put_u32(&request, IFLA_GRE_OKEY, config->out_key);
- nl_msg_put_u16(&request, IFLA_GRE_IFLAGS, iflags);
- nl_msg_put_u16(&request, IFLA_GRE_OFLAGS, oflags);
- nl_msg_put_u32(&request, IFLA_GRE_LOCAL, config->local_ip);
- nl_msg_put_u32(&request, IFLA_GRE_REMOTE, config->remote_ip);
- nl_msg_put_u8(&request, IFLA_GRE_PMTUDISC, pmtudisc);
- nl_msg_put_u8(&request, IFLA_GRE_TTL, 0);
- nl_msg_put_u8(&request, IFLA_GRE_TOS, 0);
-
- info_data_hdr->nla_len = (char *)ofpbuf_tail(&request)
- - (char *)info_data_hdr;
- linkinfo_hdr->nla_len = (char *)ofpbuf_tail(&request)
- - (char *)linkinfo_hdr;
+ setvbuf(veth_file, NULL, _IONBF, 0);
- nl_msg_put_string(&request, IFLA_IFNAME, name);
+ va_start(args, format);
+ retval = vfprintf(veth_file, format, args);
+ va_end(args);
- error = nl_sock_transact(gre_descriptors.nl_sock, &request, &reply);
- ofpbuf_uninit(&request);
- if (error) {
- VLOG_WARN("couldn't transact netlink socket: %s\n", strerror(error));
- goto error;
+ fclose(veth_file);
+ if (retval < 0) {
+ VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno));
+ return errno;
}
- ofpbuf_delete(reply);
-error:
- return error;
-#endif
+ return 0;
}
static int
-setup_gre_ioctl(const char *name, struct gre_config *config, bool create)
+create_patch(const char *name, const char *peer)
{
- struct ip_tunnel_parm p;
- struct ifreq ifr;
-
- memset(&p, 0, sizeof p);
-
- strncpy(p.name, name, IFNAMSIZ);
+ int retval;
+ struct netdev_dev *peer_nd;
- p.iph.version = 4;
- p.iph.ihl = 5;
- p.iph.protocol = IPPROTO_GRE;
- p.iph.saddr = config->local_ip;
- p.iph.daddr = config->remote_ip;
- if (config->have_in_key) {
- p.i_flags |= GRE_KEY;
- p.i_key = config->in_key;
- }
- if (config->have_out_key) {
- p.o_flags |= GRE_KEY;
- p.o_key = config->out_key;
+ /* Only create the veth if the peer didn't already do it. */
+ peer_nd = netdev_dev_from_name(peer);
+ if (peer_nd) {
+ if (!strcmp("patch", netdev_dev_get_type(peer_nd))) {
+ struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd);
+ if (!strcmp(name, ndl->state.patch.peer)) {
+ return 0;
+ } else {
+ VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'",
+ peer, ndl->state.patch.peer);
+ return EINVAL;
+ }
+ } else {
+ VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer);
+ return EINVAL;
+ }
}
- if (config->in_csum) {
- p.i_flags |= GRE_CSUM;
- }
- if (config->out_csum) {
- p.o_flags |= GRE_CSUM;
+ retval = modify_veth("+%s,%s", name, peer);
+ if (retval) {
+ return retval;
}
- strncpy(ifr.ifr_name, create ? GRE_IOCTL_DEVICE : name, IFNAMSIZ);
- ifr.ifr_ifru.ifru_data = (void *)&p;
-
- if (!gre_descriptors.ioctl_fd) {
- gre_descriptors.ioctl_fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (gre_descriptors.ioctl_fd < 0) {
- VLOG_WARN("couldn't create gre ioctl socket: %s\n", strerror(errno));
- gre_descriptors.ioctl_fd = 0;
- return errno;
- }
+ retval = if_up(name);
+ if (retval) {
+ return retval;
}
- if (ioctl(gre_descriptors.ioctl_fd, create ? SIOCADDGRETAP : SIOCCHGGRETAP,
- &ifr) < 0) {
- VLOG_WARN("couldn't do gre ioctl: %s\n", strerror(errno));
- return errno;
+ retval = if_up(peer);
+ if (retval) {
+ return retval;
}
return 0;
}
static int
-setup_gre(const char *name, const struct shash *args, bool create)
+setup_patch(const char *name, const struct shash *args, char **peer_)
{
- int error;
- struct in_addr in_addr;
- struct shash_node *node;
- struct gre_config config;
-
- memset(&config, 0, sizeof config);
- config.in_csum = true;
- config.out_csum = true;
-
- SHASH_FOR_EACH (node, args) {
- if (!strcmp(node->name, "remote_ip")) {
- if (lookup_ip(node->data, &in_addr)) {
- VLOG_WARN("bad 'remote_ip' for gre device %s ", name);
- } else {
- config.remote_ip = in_addr.s_addr;
- }
- } else if (!strcmp(node->name, "local_ip")) {
- if (lookup_ip(node->data, &in_addr)) {
- VLOG_WARN("bad 'local_ip' for gre device %s ", name);
- } else {
- config.local_ip = in_addr.s_addr;
- }
- } else if (!strcmp(node->name, "key")) {
- config.have_in_key = true;
- config.have_out_key = true;
- config.in_key = htonl(atoi(node->data));
- config.out_key = htonl(atoi(node->data));
- } else if (!strcmp(node->name, "in_key")) {
- config.have_in_key = true;
- config.in_key = htonl(atoi(node->data));
- } else if (!strcmp(node->name, "out_key")) {
- config.have_out_key = true;
- config.out_key = htonl(atoi(node->data));
- } else if (!strcmp(node->name, "csum")) {
- if (!strcmp(node->data, "false")) {
- config.in_csum = false;
- config.out_csum = false;
- }
- } else {
- VLOG_WARN("unknown gre argument '%s'", node->name);
- }
- }
+ const char *peer;
- if (!config.remote_ip) {
- VLOG_WARN("gre type requires valid 'remote_ip' argument");
- error = EINVAL;
- goto error;
+ peer = shash_find_data(args, "peer");
+ if (!peer) {
+ VLOG_WARN("patch type requires valid 'peer' argument");
+ return EINVAL;
}
- if (!gre_descriptors.use_ioctl) {
- error = setup_gre_netlink(name, &config, create);
- if (error == EOPNOTSUPP) {
- gre_descriptors.use_ioctl = true;
- }
+ if (shash_count(args) > 1) {
+ VLOG_WARN("patch type takes only a 'peer' argument");
+ return EINVAL;
}
- if (gre_descriptors.use_ioctl) {
- error = setup_gre_ioctl(name, &config, create);
+
+ if (strlen(peer) >= IFNAMSIZ) {
+ VLOG_WARN_RL(&rl, "patch 'peer' arg too long");
+ return EINVAL;
}
-error:
- return error;
+ *peer_ = xstrdup(peer);
+ return create_patch(name, peer);
}
/* Creates the netdev device of 'type' with 'name'. */
static int
-netdev_linux_create_system(const char *name, const char *type UNUSED,
+netdev_linux_create_system(const char *name, const char *type OVS_UNUSED,
const struct shash *args, struct netdev_dev **netdev_devp)
{
struct netdev_dev_linux *netdev_dev;
VLOG_WARN("%s: arguments for system devices should be empty", name);
}
- if (shash_is_empty(&cache_map)) {
+ if (!cache_notifier_refcount) {
error = rtnetlink_notifier_register(&netdev_linux_cache_notifier,
netdev_linux_cache_cb, NULL);
if (error) {
return error;
}
}
+ cache_notifier_refcount++;
netdev_dev = xzalloc(sizeof *netdev_dev);
- netdev_dev->shash_node = shash_add(&cache_map, name, &netdev_dev);
-
netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_linux_class);
+
*netdev_devp = &netdev_dev->netdev_dev;
return 0;
}
+/* For most types of netdevs we open the device for each call of
+ * netdev_open(). However, this is not the case with tap devices,
+ * since it is only possible to open the device once. In this
+ * situation we share a single file descriptor, and consequently
+ * buffers, across all readers. Therefore once data is read it will
+ * be unavailable to other reads for tap devices. */
static int
-netdev_linux_create_tap(const char *name, const char *type UNUSED,
+netdev_linux_create_tap(const char *name, const char *type OVS_UNUSED,
const struct shash *args, struct netdev_dev **netdev_devp)
{
struct netdev_dev_linux *netdev_dev;
}
static int
-if_up(const char *name)
-{
- struct ifreq ifr;
-
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- ifr.ifr_flags = IFF_UP;
-
- if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) {
- VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s",
- name, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int
-netdev_linux_create_gre(const char *name, const char *type UNUSED,
+netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED,
const struct shash *args, struct netdev_dev **netdev_devp)
{
struct netdev_dev_linux *netdev_dev;
+ char *peer = NULL;
int error;
- netdev_dev = xzalloc(sizeof *netdev_dev);
-
- error = setup_gre(name, args, true);
- if (error) {
- goto error;
- }
-
- error = if_up(name);
+ error = setup_patch(name, args, &peer);
if (error) {
- goto error;
+ free(peer);
+ return error;
}
- netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_gre_class);
+ netdev_dev = xzalloc(sizeof *netdev_dev);
+ netdev_dev->state.patch.peer = peer;
+ netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class);
*netdev_devp = &netdev_dev->netdev_dev;
- return 0;
-
-error:
- free(netdev_dev);
- return error;
-}
-
-static int
-netdev_linux_reconfigure_gre(struct netdev_dev *netdev_dev_,
- const struct shash *args)
-{
- const char *name = netdev_dev_get_name(netdev_dev_);
-
- return setup_gre(name, args, false);
-}
-
-/* The arguments are marked as unused to prevent warnings on platforms where
- * the Netlink interface isn't supported. */
-static int
-destroy_gre_netlink(struct netdev_dev_linux *netdev_dev UNUSED)
-{
-#ifdef GRE_IOCTL_ONLY
- return EOPNOTSUPP;
-#else
- const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
- int error;
- struct ofpbuf request, *reply;
- struct ifinfomsg ifinfomsg;
- int ifindex;
-
- ofpbuf_init(&request, 0);
-
- nl_msg_put_nlmsghdr(&request, gre_descriptors.nl_sock, 0, RTM_DELLINK,
- NLM_F_REQUEST);
-
- memset(&ifinfomsg, 0, sizeof ifinfomsg);
- ifinfomsg.ifi_family = AF_UNSPEC;
- nl_msg_put(&request, &ifinfomsg, sizeof ifinfomsg);
-
- ifindex = do_get_ifindex(name);
- nl_msg_put_u32(&request, IFLA_LINK, ifindex);
-
- nl_msg_put_string(&request, IFLA_IFNAME, name);
-
- error = nl_sock_transact(gre_descriptors.nl_sock, &request, &reply);
- ofpbuf_uninit(&request);
- if (error) {
- VLOG_WARN("couldn't transact netlink socket: %s\n", strerror(error));
- goto error;
- }
- ofpbuf_delete(reply);
-error:
return 0;
-#endif
}
-static int
-destroy_gre_ioctl(struct netdev_dev_linux *netdev_dev)
+static void
+destroy_tap(struct netdev_dev_linux *netdev_dev)
{
- const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
- struct ip_tunnel_parm p;
- struct ifreq ifr;
-
- memset(&p, 0, sizeof p);
- strncpy(p.name, name, IFNAMSIZ);
-
- strncpy(ifr.ifr_name, name, IFNAMSIZ);
- ifr.ifr_ifru.ifru_data = (void *)&p;
+ struct tap_state *state = &netdev_dev->state.tap;
- if (ioctl(gre_descriptors.ioctl_fd, SIOCDELGRETAP, &ifr) < 0) {
- VLOG_WARN("couldn't do gre ioctl: %s\n", strerror(errno));
- return errno;
+ if (state->fd >= 0) {
+ close(state->fd);
}
-
- return 0;
}
static void
-destroy_tap(struct netdev_dev_linux *netdev_dev)
+destroy_patch(struct netdev_dev_linux *netdev_dev)
{
- struct tap_state *state = &netdev_dev->state.tap;
+ const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
+ struct patch_state *state = &netdev_dev->state.patch;
- if (state->fd >= 0) {
- close(state->fd);
+ /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */
+ if (!netdev_dev_from_name(state->peer)) {
+ modify_veth("-%s", name);
}
+ free(state->peer);
}
/* Destroys the netdev device 'netdev_dev_'. */
const char *type = netdev_dev_get_type(netdev_dev_);
if (!strcmp(type, "system")) {
- shash_delete(&cache_map, netdev_dev->shash_node);
+ cache_notifier_refcount--;
- if (shash_is_empty(&cache_map)) {
+ if (!cache_notifier_refcount) {
rtnetlink_notifier_unregister(&netdev_linux_cache_notifier);
}
} else if (!strcmp(type, "tap")) {
destroy_tap(netdev_dev);
- } else if (!strcmp(type, "gre")) {
- if (gre_descriptors.use_ioctl) {
- destroy_gre_ioctl(netdev_dev);
- } else {
- destroy_gre_netlink(netdev_dev);
- }
+ } else if (!strcmp(type, "patch")) {
+ destroy_patch(netdev_dev);
}
- free(netdev_dev_);
+ free(netdev_dev);
}
static int
-netdev_linux_open(struct netdev_dev *netdev_dev, int ethertype,
+netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
struct netdev **netdevp)
{
+ struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_);
struct netdev_linux *netdev;
enum netdev_flags flags;
int error;
/* Allocate network device. */
netdev = xzalloc(sizeof *netdev);
- netdev_init(&netdev->netdev, netdev_dev);
- netdev->netdev_fd = -1;
- netdev->tap_fd = -1;
-
- if (!strcmp(netdev_dev_get_type(netdev_dev), "tap")) {
- static const char tap_dev[] = "/dev/net/tun";
- struct ifreq ifr;
-
- /* Open tap device. */
- netdev->tap_fd = open(tap_dev, O_RDWR);
- if (netdev->tap_fd < 0) {
- error = errno;
- VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error));
- goto error;
- }
-
- /* Create tap device. */
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strncpy(ifr.ifr_name, netdev_dev_get_name(netdev_dev),
- sizeof ifr.ifr_name);
- if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
- VLOG_WARN("%s: creating tap device failed: %s",
- netdev_dev_get_name(netdev_dev),
- strerror(errno));
- error = errno;
- goto error;
- }
-
- /* Make non-blocking. */
- error = set_nonblocking(netdev->tap_fd);
- if (error) {
- goto error;
- }
- }
+ netdev->fd = -1;
+ netdev_init(&netdev->netdev, netdev_dev_);
error = netdev_get_flags(&netdev->netdev, &flags);
if (error == ENODEV) {
goto error;
}
- if (netdev->tap_fd >= 0 || ethertype != NETDEV_ETH_TYPE_NONE) {
+ if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap")) {
+ netdev->fd = netdev_dev->state.tap.fd;
+ } else if (ethertype != NETDEV_ETH_TYPE_NONE) {
struct sockaddr_ll sll;
int protocol;
int ifindex;
protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
: ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
: ethertype);
- netdev->netdev_fd = socket(PF_PACKET, SOCK_RAW, htons(protocol));
- if (netdev->netdev_fd < 0) {
+ netdev->fd = socket(PF_PACKET, SOCK_RAW, htons(protocol));
+ if (netdev->fd < 0) {
error = errno;
goto error;
}
- if (netdev->tap_fd < 0) {
- netdev->tap_fd = netdev->netdev_fd;
- }
/* Set non-blocking mode. */
- error = set_nonblocking(netdev->netdev_fd);
+ error = set_nonblocking(netdev->fd);
if (error) {
goto error;
}
memset(&sll, 0, sizeof sll);
sll.sll_family = AF_PACKET;
sll.sll_ifindex = ifindex;
- if (bind(netdev->netdev_fd,
+ if (bind(netdev->fd,
(struct sockaddr *) &sll, sizeof sll) < 0) {
error = errno;
- VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev),
+ VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_),
strerror(error));
goto error;
}
* packets of the requested type on all system interfaces. We do not
* want to receive that data, but there is no way to avoid it. So we
* must now drain out the receive queue. */
- error = drain_rcvbuf(netdev->netdev_fd);
+ error = drain_rcvbuf(netdev->fd);
if (error) {
goto error;
}
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (netdev->netdev_fd >= 0) {
- close(netdev->netdev_fd);
- }
- if (netdev->tap_fd >= 0 && netdev->netdev_fd != netdev->tap_fd) {
- close(netdev->tap_fd);
+ if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) {
+ close(netdev->fd);
}
free(netdev);
}
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (netdev->tap_fd < 0) {
+ if (netdev->fd < 0) {
/* Device was opened with NETDEV_ETH_TYPE_NONE. */
return -EAGAIN;
}
for (;;) {
- ssize_t retval = read(netdev->tap_fd, data, size);
+ ssize_t retval = read(netdev->fd, data, size);
if (retval >= 0) {
return retval;
} else if (errno != EINTR) {
netdev_linux_recv_wait(struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (netdev->tap_fd >= 0) {
- poll_fd_wait(netdev->tap_fd, POLLIN);
+ if (netdev->fd >= 0) {
+ poll_fd_wait(netdev->fd, POLLIN);
}
}
netdev_linux_drain(struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) {
+ if (netdev->fd < 0) {
return 0;
- } else if (netdev->tap_fd != netdev->netdev_fd) {
+ } else if (!strcmp(netdev_get_type(netdev_), "tap")) {
struct ifreq ifr;
int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr,
SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
if (error) {
return error;
}
- drain_fd(netdev->tap_fd, ifr.ifr_qlen);
+ drain_fd(netdev->fd, ifr.ifr_qlen);
return 0;
} else {
- return drain_rcvbuf(netdev->netdev_fd);
+ return drain_rcvbuf(netdev->fd);
}
}
/* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE.
*/
- if (netdev->tap_fd < 0) {
+ if (netdev->fd < 0) {
return EPIPE;
}
for (;;) {
- ssize_t retval = write(netdev->tap_fd, data, size);
+ ssize_t retval = write(netdev->fd, data, size);
if (retval < 0) {
/* The Linux AF_PACKET implementation never blocks waiting for room
* for packets, instead returning ENOBUFS. Translate this into
netdev_linux_send_wait(struct netdev *netdev_)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) {
+ if (netdev->fd < 0) {
/* Nothing to do. */
- } else if (netdev->tap_fd == netdev->netdev_fd) {
- poll_fd_wait(netdev->tap_fd, POLLOUT);
+ } else if (strcmp(netdev_get_type(netdev_), "tap")) {
+ poll_fd_wait(netdev->fd, POLLOUT);
} else {
/* TAP device always accepts packets.*/
poll_immediate_wake();
}
}
+/* Brings the 'is_internal' and 'is_tap' members of 'netdev_dev' up-to-date. */
+static void
+netdev_linux_update_is_pseudo(struct netdev_dev_linux *netdev_dev)
+{
+ if (!(netdev_dev->cache_valid & VALID_IS_PSEUDO)) {
+ const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
+ const char *type = netdev_dev_get_type(&netdev_dev->netdev_dev);
+
+ netdev_dev->is_tap = !strcmp(type, "tap");
+ netdev_dev->is_internal = false;
+ if (!netdev_dev->is_tap) {
+ struct ethtool_drvinfo drvinfo;
+ int error;
+
+ memset(&drvinfo, 0, sizeof drvinfo);
+ error = netdev_linux_do_ethtool(name,
+ (struct ethtool_cmd *)&drvinfo,
+ ETHTOOL_GDRVINFO,
+ "ETHTOOL_GDRVINFO");
+
+ if (!error && !strcmp(drvinfo.driver, "openvswitch")) {
+ netdev_dev->is_internal = true;
+ }
+ }
+
+ netdev_dev->cache_valid |= VALID_IS_PSEUDO;
+ }
+}
+
/* Retrieves current device stats for 'netdev'.
*
* XXX All of the members of struct netdev_stats are 64 bits wide, but on
COVERAGE_INC(netdev_get_stats);
- if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) {
- netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_),
- "tap");
-
- if (!netdev_dev->is_internal) {
- struct ethtool_drvinfo drvinfo;
-
- memset(&drvinfo, 0, sizeof drvinfo);
- error = netdev_linux_do_ethtool(netdev_,
- (struct ethtool_cmd *)&drvinfo,
- ETHTOOL_GDRVINFO,
- "ETHTOOL_GDRVINFO");
-
- if (!error) {
- netdev_dev->is_internal = !strcmp(drvinfo.driver,
- "openvswitch");
- }
- }
-
- netdev_dev->cache_valid |= VALID_IS_INTERNAL;
- }
-
+ netdev_linux_update_is_pseudo(netdev_dev);
if (netdev_dev->is_internal) {
collect_stats = &raw_stats;
}
* will appear to be swapped relative to the other ports since we are the
* one sending the data, not a remote computer. For consistency, we swap
* them back here. */
- if (netdev_dev->is_internal) {
+ if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) {
stats->rx_packets = raw_stats.tx_packets;
stats->tx_packets = raw_stats.rx_packets;
stats->rx_bytes = raw_stats.tx_bytes;
return error;
}
+static int
+netdev_linux_set_stats(struct netdev *netdev,
+ const struct netdev_stats *stats)
+{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
+ struct internal_dev_stats dp_dev_stats;
+ struct ifreq ifr;
+
+ /* We must reject this call if 'netdev' is not an Open vSwitch internal
+ * port, because the ioctl that we are about to execute is in the "device
+ * private ioctls" range, which means that executing it on a device that
+ * is not the type we expect could do any random thing.
+ *
+ * (Amusingly, these ioctl numbers are commented "THESE IOCTLS ARE
+ * _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X" in linux/sockios.h. I guess
+ * DaveM is a little behind on that.) */
+ netdev_linux_update_is_pseudo(netdev_dev);
+ if (!netdev_dev->is_internal) {
+ return EOPNOTSUPP;
+ }
+
+ /* This actually only sets the *offset* that the dp_dev applies, but in our
+ * usage for fake bond devices the dp_dev never has any traffic of it own
+ * so it has the same effect. */
+ dp_dev_stats.rx_packets = stats->rx_packets;
+ dp_dev_stats.rx_bytes = stats->rx_bytes;
+ dp_dev_stats.tx_packets = stats->tx_packets;
+ dp_dev_stats.tx_bytes = stats->tx_bytes;
+ ifr.ifr_data = (void *) &dp_dev_stats;
+ return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr,
+ INTERNAL_DEV_SET_STATS,
+ "INTERNAL_DEV_SET_STATS");
+}
+
/* Stores the features supported by 'netdev' into each of '*current',
* '*advertised', '*supported', and '*peer' that are non-null. Each value is a
* bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if
int error;
memset(&ecmd, 0, sizeof ecmd);
- error = netdev_linux_do_ethtool(netdev, &ecmd,
+ error = netdev_linux_do_ethtool(netdev_get_name(netdev), &ecmd,
ETHTOOL_GSET, "ETHTOOL_GSET");
if (error) {
return error;
int error;
memset(&ecmd, 0, sizeof ecmd);
- error = netdev_linux_do_ethtool(netdev, &ecmd,
+ error = netdev_linux_do_ethtool(netdev_get_name(netdev), &ecmd,
ETHTOOL_GSET, "ETHTOOL_GSET");
if (error) {
return error;
if (advertise & OFPPF_PAUSE_ASYM) {
ecmd.advertising |= ADVERTISED_Asym_Pause;
}
- return netdev_linux_do_ethtool(netdev, &ecmd,
+ return netdev_linux_do_ethtool(netdev_get_name(netdev), &ecmd,
ETHTOOL_SSET, "ETHTOOL_SSET");
}
COVERAGE_INC(netdev_set_policing);
if (kbits_rate) {
if (!kbits_burst) {
- /* Default to 10 kilobits if not specified. */
- kbits_burst = 10;
+ /* Default to 1000 kilobits if not specified. */
+ kbits_burst = 1000;
}
/* xxx This should be more careful about only adding if it
/* Adds 'router' as a default IP gateway. */
static int
-netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router)
+netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router)
{
struct in_addr any = { INADDR_ANY };
struct rtentry rt;
static void
netdev_linux_poll_cb(const struct rtnetlink_change *change,
- void *aux UNUSED)
+ void *aux OVS_UNUSED)
{
if (change) {
struct list *list = shash_find_data(&netdev_linux_notifiers,
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ netdev_linux_set_stats,
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_poll_remove,
};
-const struct netdev_class netdev_gre_class = {
- "gre",
+const struct netdev_class netdev_patch_class = {
+ "patch",
netdev_linux_init,
netdev_linux_run,
netdev_linux_wait,
- netdev_linux_create_gre,
+ netdev_linux_create_patch,
netdev_linux_destroy,
- netdev_linux_reconfigure_gre,
+ NULL, /* reconfigure */
netdev_linux_open,
netdev_linux_close,
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_poll_add,
netdev_linux_poll_remove,
};
+
\f
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
}
static int
-netdev_linux_do_ethtool(const struct netdev *netdev, struct ethtool_cmd *ecmd,
+netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *ecmd,
int cmd, const char *cmd_name)
{
struct ifreq ifr;
memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, netdev_get_name(netdev), sizeof ifr.ifr_name);
+ strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
ifr.ifr_data = (caddr_t) ecmd;
ecmd->cmd = cmd;
} else {
if (errno != EOPNOTSUPP) {
VLOG_WARN_RL(&rl, "ethtool command %s on network device %s "
- "failed: %s", cmd_name, netdev_get_name(netdev),
- strerror(errno));
+ "failed: %s", cmd_name, name, strerror(errno));
} else {
/* The device doesn't support this operation. That's pretty
* common, so there's no point in logging anything. */