#include "dynamic-string.h"
#include "fatal-signal.h"
#include "netdev-provider.h"
+#include "netdev-vport.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
-#include "openvswitch/internal_dev.h"
#include "openvswitch/gre.h"
#include "packets.h"
#include "poll-loop.h"
static int cache_notifier_refcount;
enum {
- VALID_IFINDEX = 1 << 0,
- VALID_ETHERADDR = 1 << 1,
- VALID_IN4 = 1 << 2,
- VALID_IN6 = 1 << 3,
- VALID_MTU = 1 << 4,
- VALID_CARRIER = 1 << 5,
- VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */
- VALID_POLICING = 1 << 7
+ VALID_IFINDEX = 1 << 0,
+ VALID_ETHERADDR = 1 << 1,
+ VALID_IN4 = 1 << 2,
+ VALID_IN6 = 1 << 3,
+ VALID_MTU = 1 << 4,
+ VALID_CARRIER = 1 << 5,
+ VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */
+ VALID_POLICING = 1 << 7,
+ VALID_HAVE_VPORT_STATS = 1 << 8
};
struct tap_state {
int fd;
-};
-
-struct patch_state {
- char *peer;
+ bool opened;
};
struct netdev_dev_linux {
bool is_tap; /* Is this a tuntap device? */
uint32_t kbits_rate; /* Policing data. */
uint32_t kbits_burst;
+ bool have_vport_stats;
union {
struct tap_state tap;
- struct patch_state patch;
} state;
};
}
}
-static int
-if_up(const char *name)
-{
- struct ifreq ifr;
-
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- ifr.ifr_flags = IFF_UP;
-
- if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) {
- VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s",
- name, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-/* A veth may be created using the 'command' "+<name>,<peer>". A veth may
- * be destroyed by using the 'command' "-<name>", where <name> can be
- * either side of the device.
- */
-static int
-modify_veth(const char *format, ...)
-{
- FILE *veth_file;
- va_list args;
- int retval;
-
- veth_file = fopen("/sys/class/net/veth_pairs", "w");
- if (!veth_file) {
- VLOG_WARN_RL(&rl, "could not open veth device. Are you running a "
- "supported XenServer with the kernel module loaded?");
- return ENODEV;
- }
- setvbuf(veth_file, NULL, _IONBF, 0);
-
- va_start(args, format);
- retval = vfprintf(veth_file, format, args);
- va_end(args);
-
- fclose(veth_file);
- if (retval < 0) {
- VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int
-create_patch(const char *name, const char *peer)
-{
- int retval;
- struct netdev_dev *peer_nd;
-
-
- /* Only create the veth if the peer didn't already do it. */
- peer_nd = netdev_dev_from_name(peer);
- if (peer_nd) {
- if (!strcmp("patch", netdev_dev_get_type(peer_nd))) {
- struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd);
- if (!strcmp(name, ndl->state.patch.peer)) {
- return 0;
- } else {
- VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'",
- peer, ndl->state.patch.peer);
- return EINVAL;
- }
- } else {
- VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer);
- return EINVAL;
- }
- }
-
- retval = modify_veth("+%s,%s", name, peer);
- if (retval) {
- return retval;
- }
-
- retval = if_up(name);
- if (retval) {
- return retval;
- }
-
- retval = if_up(peer);
- if (retval) {
- return retval;
- }
-
- return 0;
-}
-
-static int
-setup_patch(const char *name, const struct shash *args, char **peer_)
-{
- const char *peer;
-
- peer = shash_find_data(args, "peer");
- if (!peer) {
- VLOG_WARN("patch type requires valid 'peer' argument");
- return EINVAL;
- }
-
- if (shash_count(args) > 1) {
- VLOG_WARN("patch type takes only a 'peer' argument");
- return EINVAL;
- }
-
- if (strlen(peer) >= IFNAMSIZ) {
- VLOG_WARN_RL(&rl, "patch 'peer' arg too long");
- return EINVAL;
- }
-
- *peer_ = xstrdup(peer);
- return create_patch(name, peer);
-}
-
/* Creates the netdev device of 'type' with 'name'. */
static int
netdev_linux_create_system(const char *name, const char *type OVS_UNUSED,
return error;
}
-static int
-netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED,
- const struct shash *args, struct netdev_dev **netdev_devp)
-{
- struct netdev_dev_linux *netdev_dev;
- char *peer = NULL;
- int error;
-
- error = setup_patch(name, args, &peer);
- if (error) {
- free(peer);
- return error;
- }
-
- netdev_dev = xzalloc(sizeof *netdev_dev);
- netdev_dev->state.patch.peer = peer;
- netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class);
- *netdev_devp = &netdev_dev->netdev_dev;
-
- return 0;
-}
-
static void
destroy_tap(struct netdev_dev_linux *netdev_dev)
{
}
}
-static void
-destroy_patch(struct netdev_dev_linux *netdev_dev)
-{
- const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
- struct patch_state *state = &netdev_dev->state.patch;
-
- /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */
- if (!netdev_dev_from_name(state->peer)) {
- modify_veth("-%s", name);
- }
- free(state->peer);
-}
-
/* Destroys the netdev device 'netdev_dev_'. */
static void
netdev_linux_destroy(struct netdev_dev *netdev_dev_)
}
} else if (!strcmp(type, "tap")) {
destroy_tap(netdev_dev);
- } else if (!strcmp(type, "patch")) {
- destroy_patch(netdev_dev);
}
free(netdev_dev);
goto error;
}
- if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap")) {
+ if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap") &&
+ !netdev_dev->state.tap.opened) {
+
+ /* We assume that the first user of the tap device is the primary user
+ * and give them the tap FD. Subsequent users probably just expect
+ * this to be a system device so open it normally to avoid send/receive
+ * directions appearing to be reversed. */
netdev->fd = netdev_dev->state.tap.fd;
+ netdev_dev->state.tap.opened = true;
} else if (ethertype != NETDEV_ETH_TYPE_NONE) {
struct sockaddr_ll sll;
int protocol;
}
}
-/* Retrieves current device stats for 'netdev'.
- *
- * XXX All of the members of struct netdev_stats are 64 bits wide, but on
- * 32-bit architectures the Linux network stats are only 32 bits. */
+static void
+swap_uint64(uint64_t *a, uint64_t *b)
+{
+ *a ^= *b;
+ *b ^= *a;
+ *a ^= *b;
+}
+
+/* Retrieves current device stats for 'netdev'. */
static int
netdev_linux_get_stats(const struct netdev *netdev_,
struct netdev_stats *stats)
netdev_dev_linux_cast(netdev_get_dev(netdev_));
static int use_netlink_stats = -1;
int error;
- struct netdev_stats raw_stats;
- struct netdev_stats *collect_stats = stats;
COVERAGE_INC(netdev_get_stats);
- netdev_linux_update_is_pseudo(netdev_dev);
- if (netdev_dev->is_internal) {
- collect_stats = &raw_stats;
- }
+ if (netdev_dev->have_vport_stats ||
+ !(netdev_dev->cache_valid & VALID_HAVE_VPORT_STATS)) {
- if (use_netlink_stats < 0) {
- use_netlink_stats = check_for_working_netlink_stats();
+ error = netdev_vport_get_stats(netdev_, stats);
+ netdev_dev->have_vport_stats = !error;
+ netdev_dev->cache_valid |= VALID_HAVE_VPORT_STATS;
}
- if (use_netlink_stats) {
- int ifindex;
- error = get_ifindex(netdev_, &ifindex);
- if (!error) {
- error = get_stats_via_netlink(ifindex, collect_stats);
+ if (!netdev_dev->have_vport_stats) {
+ if (use_netlink_stats < 0) {
+ use_netlink_stats = check_for_working_netlink_stats();
+ }
+ if (use_netlink_stats) {
+ int ifindex;
+
+ error = get_ifindex(netdev_, &ifindex);
+ if (!error) {
+ error = get_stats_via_netlink(ifindex, stats);
+ }
+ } else {
+ error = get_stats_via_proc(netdev_get_name(netdev_), stats);
}
- } else {
- error = get_stats_via_proc(netdev_get_name(netdev_), collect_stats);
}
/* If this port is an internal port then the transmit and receive stats
* will appear to be swapped relative to the other ports since we are the
* one sending the data, not a remote computer. For consistency, we swap
- * them back here. */
- if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) {
- stats->rx_packets = raw_stats.tx_packets;
- stats->tx_packets = raw_stats.rx_packets;
- stats->rx_bytes = raw_stats.tx_bytes;
- stats->tx_bytes = raw_stats.rx_bytes;
- stats->rx_errors = raw_stats.tx_errors;
- stats->tx_errors = raw_stats.rx_errors;
- stats->rx_dropped = raw_stats.tx_dropped;
- stats->tx_dropped = raw_stats.rx_dropped;
- stats->multicast = raw_stats.multicast;
- stats->collisions = raw_stats.collisions;
+ * them back here. This does not apply if we are getting stats from the
+ * vport layer because it always tracks stats from the perspective of the
+ * switch. */
+ netdev_linux_update_is_pseudo(netdev_dev);
+ if (!error && !netdev_dev->have_vport_stats &&
+ (netdev_dev->is_internal || netdev_dev->is_tap)) {
+ swap_uint64(&stats->rx_packets, &stats->tx_packets);
+ swap_uint64(&stats->rx_bytes, &stats->tx_bytes);
+ swap_uint64(&stats->rx_errors, &stats->tx_errors);
+ swap_uint64(&stats->rx_dropped, &stats->tx_dropped);
stats->rx_length_errors = 0;
stats->rx_over_errors = 0;
stats->rx_crc_errors = 0;
return error;
}
-static int
-netdev_linux_set_stats(struct netdev *netdev,
- const struct netdev_stats *stats)
-{
- struct netdev_dev_linux *netdev_dev =
- netdev_dev_linux_cast(netdev_get_dev(netdev));
- struct internal_dev_stats dp_dev_stats;
- struct ifreq ifr;
-
- /* We must reject this call if 'netdev' is not an Open vSwitch internal
- * port, because the ioctl that we are about to execute is in the "device
- * private ioctls" range, which means that executing it on a device that
- * is not the type we expect could do any random thing.
- *
- * (Amusingly, these ioctl numbers are commented "THESE IOCTLS ARE
- * _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X" in linux/sockios.h. I guess
- * DaveM is a little behind on that.) */
- netdev_linux_update_is_pseudo(netdev_dev);
- if (!netdev_dev->is_internal) {
- return EOPNOTSUPP;
- }
-
- /* This actually only sets the *offset* that the dp_dev applies, but in our
- * usage for fake bond devices the dp_dev never has any traffic of it own
- * so it has the same effect. */
- dp_dev_stats.rx_packets = stats->rx_packets;
- dp_dev_stats.rx_bytes = stats->rx_bytes;
- dp_dev_stats.tx_packets = stats->tx_packets;
- dp_dev_stats.tx_bytes = stats->tx_bytes;
- ifr.ifr_data = (void *) &dp_dev_stats;
- return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr,
- INTERNAL_DEV_SET_STATS,
- "INTERNAL_DEV_SET_STATS");
-}
-
/* Stores the features supported by 'netdev' into each of '*current',
* '*advertised', '*supported', and '*peer' that are non-null. Each value is a
* bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if
error = nl_sock_transact(rtnl_sock, &request, &reply);
ofpbuf_uninit(&request);
ofpbuf_delete(reply);
- if (error && error != ENOENT) {
+ if (error && error != ENOENT && error != EINVAL) {
VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
netdev_name, strerror(error));
return error;
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
- netdev_linux_set_stats,
+ netdev_vport_set_stats,
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_poll_remove,
};
-const struct netdev_class netdev_patch_class = {
- "patch",
-
- netdev_linux_init,
- netdev_linux_run,
- netdev_linux_wait,
-
- netdev_linux_create_patch,
- netdev_linux_destroy,
- NULL, /* reconfigure */
-
- netdev_linux_open,
- netdev_linux_close,
-
- NULL, /* enumerate */
-
- netdev_linux_recv,
- netdev_linux_recv_wait,
- netdev_linux_drain,
-
- netdev_linux_send,
- netdev_linux_send_wait,
-
- netdev_linux_set_etheraddr,
- netdev_linux_get_etheraddr,
- netdev_linux_get_mtu,
- netdev_linux_get_ifindex,
- netdev_linux_get_carrier,
- netdev_linux_get_stats,
- NULL, /* set_stats */
-
- netdev_linux_get_features,
- netdev_linux_set_advertisements,
- netdev_linux_get_vlan_vid,
- netdev_linux_set_policing,
-
- netdev_linux_get_in4,
- netdev_linux_set_in4,
- netdev_linux_get_in6,
- netdev_linux_add_router,
- netdev_linux_get_next_hop,
- netdev_linux_arp_lookup,
-
- netdev_linux_update_flags,
-
- netdev_linux_poll_add,
- netdev_linux_poll_remove,
-};
-
\f
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)