#include <fcntl.h>
#include <arpa/inet.h>
#include <inttypes.h>
+#include <linux/filter.h>
#include <linux/gen_stats.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
VALID_DRVINFO = 1 << 7,
VALID_FEATURES = 1 << 8,
};
-
-struct tap_state {
- int fd;
-};
\f
/* Traffic control. */
struct tc_queue {
struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */
unsigned int queue_id; /* OpenFlow queue ID. */
+ long long int created; /* Time queue was created, in msecs. */
};
/* A particular kind of traffic control. Each implementation generally maps to
struct netdev_linux {
struct netdev up;
- struct shash_node *shash_node;
unsigned int cache_valid;
unsigned int change_seq;
enum netdev_features current; /* Cached from ETHTOOL_GSET. */
enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */
enum netdev_features supported; /* Cached from ETHTOOL_GSET. */
- enum netdev_features peer; /* Cached from ETHTOOL_GSET. */
struct ethtool_drvinfo drvinfo; /* Cached from ETHTOOL_GDRVINFO. */
struct tc *tc;
- union {
- struct tap_state tap;
- } state;
+ /* For devices of class netdev_tap_class only. */
+ int tap_fd;
};
struct netdev_rx_linux {
/* Sockets used for ioctl operations. */
static int af_inet_sock = -1; /* AF_INET, SOCK_DGRAM. */
-/* A Netlink routing socket that is not subscribed to any multicast groups. */
-static struct nl_sock *rtnl_sock;
-
/* This is set pretty low because we probably won't learn anything from the
* additional log messages. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0);
status = af_inet_sock >= 0 ? 0 : errno;
if (status) {
- VLOG_ERR("failed to create inet socket: %s", strerror(status));
- }
-
- /* Create rtnetlink socket. */
- if (!status) {
- status = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
- if (status) {
- VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
- strerror(status));
- }
+ VLOG_ERR("failed to create inet socket: %s", ovs_strerror(status));
}
}
return status;
netdev_linux_cache_cb(const struct rtnetlink_link_change *change,
void *aux OVS_UNUSED)
{
- struct netdev_linux *dev;
if (change) {
struct netdev *base_dev = netdev_from_name(change->ifname);
if (base_dev && is_netdev_linux_class(netdev_get_class(base_dev))) {
netdev_linux_update(netdev_linux_cast(base_dev), change);
+ netdev_close(base_dev);
}
} else {
struct shash device_shash;
shash_init(&device_shash);
netdev_get_devices(&netdev_linux_class, &device_shash);
SHASH_FOR_EACH (node, &device_shash) {
+ struct netdev *netdev = node->data;
+ struct netdev_linux *dev = netdev_linux_cast(netdev);
unsigned int flags;
- dev = node->data;
-
get_flags(&dev->up, &flags);
netdev_linux_changed(dev, flags, 0);
+ netdev_close(netdev);
}
shash_destroy(&device_shash);
}
const char *name, struct netdev **netdevp)
{
struct netdev_linux *netdev;
- struct tap_state *state;
static const char tap_dev[] = "/dev/net/tun";
struct ifreq ifr;
int error;
netdev = xzalloc(sizeof *netdev);
- state = &netdev->state.tap;
+ netdev->change_seq = 1;
error = cache_notifier_ref();
if (error) {
}
/* Open tap device. */
- state->fd = open(tap_dev, O_RDWR);
- if (state->fd < 0) {
+ netdev->tap_fd = open(tap_dev, O_RDWR);
+ if (netdev->tap_fd < 0) {
error = errno;
- VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error));
+ VLOG_WARN("opening \"%s\" failed: %s", tap_dev, ovs_strerror(error));
goto error_unref_notifier;
}
/* Create tap device. */
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- if (ioctl(state->fd, TUNSETIFF, &ifr) == -1) {
+ if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
VLOG_WARN("%s: creating tap device failed: %s", name,
- strerror(errno));
+ ovs_strerror(errno));
error = errno;
- goto error_unref_notifier;
+ goto error_close;
}
/* Make non-blocking. */
- error = set_nonblocking(state->fd);
+ error = set_nonblocking(netdev->tap_fd);
if (error) {
- goto error_unref_notifier;
+ goto error_close;
}
netdev_init(&netdev->up, name, &netdev_tap_class);
*netdevp = &netdev->up;
return 0;
+error_close:
+ close(netdev->tap_fd);
error_unref_notifier:
cache_notifier_unref();
error:
return error;
}
-static void
-destroy_tap(struct netdev_linux *netdev)
-{
- struct tap_state *state = &netdev->state.tap;
-
- if (state->fd >= 0) {
- close(state->fd);
- }
-}
-
-/* Destroys the netdev device 'netdev_'. */
static void
netdev_linux_destroy(struct netdev *netdev_)
{
netdev->tc->ops->tc_destroy(netdev->tc);
}
- if (netdev_get_class(netdev_) == &netdev_tap_class) {
- destroy_tap(netdev);
+ if (netdev_get_class(netdev_) == &netdev_tap_class
+ && netdev->tap_fd >= 0)
+ {
+ close(netdev->tap_fd);
}
free(netdev);
int fd;
if (is_tap) {
- fd = netdev->state.tap.fd;
+ fd = netdev->tap_fd;
} else {
struct sockaddr_ll sll;
int ifindex;
+ /* Result of tcpdump -dd inbound */
+ static struct sock_filter filt[] = {
+ { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */
+ { 0x15, 0, 1, 0x00000004 }, /* jeq #4 jt 2 jf 3 */
+ { 0x6, 0, 0, 0x00000000 }, /* ret #0 */
+ { 0x6, 0, 0, 0x0000ffff } /* ret #65535 */
+ };
+ static struct sock_fprog fprog = { ARRAY_SIZE(filt), filt };
/* Create file descriptor. */
fd = socket(PF_PACKET, SOCK_RAW, 0);
if (fd < 0) {
error = errno;
- VLOG_ERR("failed to create raw socket (%s)", strerror(error));
+ VLOG_ERR("failed to create raw socket (%s)", ovs_strerror(error));
goto error;
}
if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
error = errno;
VLOG_ERR("%s: failed to bind raw socket (%s)",
- netdev_get_name(netdev_), strerror(error));
+ netdev_get_name(netdev_), ovs_strerror(error));
+ goto error;
+ }
+
+ /* Filter for only inbound packets. */
+ error = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog,
+ sizeof fprog);
+ if (error) {
+ error = errno;
+ VLOG_ERR("%s: failed attach filter (%s)",
+ netdev_get_name(netdev_), ovs_strerror(error));
goto error;
}
}
: recv(rx->fd, data, size, MSG_TRUNC));
} while (retval < 0 && errno == EINTR);
- if (retval > size) {
- return -EMSGSIZE;
- } else if (retval >= 0) {
- return retval;
+ if (retval >= 0) {
+ return retval > size ? -EMSGSIZE : retval;
} else {
if (errno != EAGAIN) {
VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
- strerror(errno), netdev_rx_get_name(rx_));
+ ovs_strerror(errno), netdev_rx_get_name(rx_));
}
return -errno;
}
/* Use the tap fd to send to this device. This is essential for
* tap devices, because packets sent to a tap device with an
* AF_PACKET socket will loop back to be *received* again on the
- * tap device. */
+ * tap device. This doesn't occur on other interface types
+ * because we attach a socket filter to the rx socket. */
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- retval = write(netdev->state.tap.fd, data, size);
+ retval = write(netdev->tap_fd, data, size);
}
if (retval < 0) {
continue;
} else if (errno != EAGAIN) {
VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
- netdev_get_name(netdev_), strerror(errno));
+ netdev_get_name(netdev_), ovs_strerror(errno));
}
return errno;
} else if (retval != size) {
/* Tap devices must be brought down before setting the address. */
if (is_tap_netdev(netdev_)) {
- enum netdev_flags flags;
-
- if (!netdev_get_flags(netdev_, &flags) && (flags & NETDEV_UP)) {
- netdev_turn_flags_off(netdev_, NETDEV_UP, &sf);
- }
+ netdev_turn_flags_off(netdev_, NETDEV_UP, &sf);
}
error = set_etheraddr(netdev_get_name(netdev_), mac);
if (!error || error == ENODEV) {
shash_init(&device_shash);
netdev_get_devices(&netdev_linux_class, &device_shash);
SHASH_FOR_EACH (node, &device_shash) {
- struct netdev_linux *dev = node->data;
+ struct netdev *netdev = node->data;
+ struct netdev_linux *dev = netdev_linux_cast(netdev);
bool miimon;
if (dev->miimon_interval <= 0 || !timer_expired(&dev->miimon_timer)) {
+ netdev_close(netdev);
continue;
}
}
timer_set_duration(&dev->miimon_timer, dev->miimon_interval);
+ netdev_close(netdev);
}
shash_destroy(&device_shash);
shash_init(&device_shash);
netdev_get_devices(&netdev_linux_class, &device_shash);
SHASH_FOR_EACH (node, &device_shash) {
- struct netdev_linux *dev = node->data;
+ struct netdev *netdev = node->data;
+ struct netdev_linux *dev = netdev_linux_cast(netdev);
if (dev->miimon_interval > 0) {
timer_wait(&dev->miimon_timer);
}
+ netdev_close(netdev);
}
shash_destroy(&device_shash);
}
} else {
VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats "
"via proc (you are probably running a pre-2.6.19 "
- "kernel)", strerror(error));
+ "kernel)", ovs_strerror(error));
return false;
}
}
error = get_stats_via_vport__(netdev_, stats);
if (error && error != ENOENT) {
VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed "
- "(%s)", netdev_get_name(netdev_), strerror(error));
+ "(%s)",
+ netdev_get_name(netdev_), ovs_strerror(error));
}
netdev->vport_stats_error = error;
netdev->cache_valid |= VALID_VPORT_STAT_ERROR;
netdev_linux_sys_get_stats(const struct netdev *netdev_,
struct netdev_stats *stats)
{
- static int use_netlink_stats = -1;
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+ static int use_netlink_stats;
int error;
- if (use_netlink_stats < 0) {
+ if (ovsthread_once_start(&once)) {
use_netlink_stats = check_for_working_netlink_stats();
+ ovsthread_once_done(&once);
}
if (use_netlink_stats) {
/* Retrieves current device stats for 'netdev-tap' netdev or
* netdev-internal. */
static int
-netdev_tap_get_stats(const struct netdev *netdev_,
- struct netdev_stats *stats)
+netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
struct netdev_stats dev_stats;
netdev->current |= NETDEV_F_AUTONEG;
}
- /* Peer advertisements. */
- netdev->peer = 0; /* XXX */
-
out:
netdev->cache_valid |= VALID_FEATURES;
netdev->get_features_error = error;
}
-/* Stores the features supported by 'netdev' into each of '*current',
- * '*advertised', '*supported', and '*peer' that are non-null. Each value is a
- * bitmap of NETDEV_* bits. Returns 0 if successful, otherwise a positive
- * errno value. */
+/* Stores the features supported by 'netdev' into of '*current', '*advertised',
+ * '*supported', and '*peer'. Each value is a bitmap of NETDEV_* bits.
+ * Returns 0 if successful, otherwise a positive errno value. */
static int
netdev_linux_get_features(const struct netdev *netdev_,
enum netdev_features *current,
*current = netdev->current;
*advertised = netdev->advertised;
*supported = netdev->supported;
- *peer = netdev->peer;
+ *peer = 0; /* XXX */
}
return netdev->get_features_error;
}
error = tc_add_del_ingress_qdisc(netdev_, false);
if (error) {
VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
- netdev_name, strerror(error));
+ netdev_name, ovs_strerror(error));
goto out;
}
error = tc_add_del_ingress_qdisc(netdev_, true);
if (error) {
VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
- netdev_name, strerror(error));
+ netdev_name, ovs_strerror(error));
goto out;
}
error = tc_add_policer(netdev_, kbits_rate, kbits_burst);
if (error){
VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s",
- netdev_name, strerror(error));
+ netdev_name, ovs_strerror(error));
goto out;
}
}
return EOPNOTSUPP;
} else {
const struct tc_queue *queue = tc_find_queue(netdev_, queue_id);
- return (queue
- ? netdev->tc->ops->class_get_stats(netdev_, queue, stats)
- : ENOENT);
+ if (!queue) {
+ return ENOENT;
+ }
+ stats->created = queue->created;
+ return netdev->tc->ops->class_get_stats(netdev_, queue, stats);
}
}
return false;
}
tcmsg->tcm_parent = 0;
- nl_dump_start(dump, rtnl_sock, &request);
+ nl_dump_start(dump, NETLINK_ROUTE, &request);
ofpbuf_uninit(&request);
return true;
}
rt.rt_flags = RTF_UP | RTF_GATEWAY;
error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0;
if (error) {
- VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error));
+ VLOG_WARN("ioctl(SIOCADDRT): %s", ovs_strerror(error));
}
return error;
}
*netdev_name = NULL;
stream = fopen(fn, "r");
if (stream == NULL) {
- VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno));
return errno;
}
memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
} else if (retval != ENXIO) {
VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
- netdev_get_name(netdev), IP_ARGS(ip), strerror(retval));
+ netdev_get_name(netdev), IP_ARGS(ip),
+ ovs_strerror(retval));
}
return retval;
}
tc_get_major(handle), tc_get_minor(handle),
tc_get_major(parent), tc_get_minor(parent),
class->min_rate, class->max_rate,
- class->burst, class->priority, strerror(error));
+ class->burst, class->priority, ovs_strerror(error));
}
return error;
}
hcp = xmalloc(sizeof *hcp);
queue = &hcp->tc_queue;
queue->queue_id = queue_id;
+ queue->created = time_msec();
hmap_insert(&htb->tc.queues, &queue->hmap_node, hash);
}
hcp = xmalloc(sizeof *hcp);
queue = &hcp->tc_queue;
queue->queue_id = queue_id;
+ queue->created = time_msec();
hmap_insert(&hfsc->tc.queues, &queue->hmap_node, hash);
}
netdev_get_name(netdev),
tc_get_major(handle), tc_get_minor(handle),
tc_get_major(parent), tc_get_minor(parent),
- class->min_rate, class->max_rate, strerror(error));
+ class->min_rate, class->max_rate, ovs_strerror(error));
}
return error;
static int
tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
{
- int error = nl_sock_transact(rtnl_sock, request, replyp);
+ int error = nl_transact(NETLINK_ROUTE, request, replyp);
ofpbuf_uninit(request);
return error;
}
* [5] 2.6.32.21.22 (approx.) from Ubuntu 10.04 on VMware Fusion
* [6] 2.6.34 from kernel.org on KVM
*/
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
static const char fn[] = "/proc/net/psched";
unsigned int a, b, c, d;
FILE *stream;
+ if (!ovsthread_once_start(&once)) {
+ return;
+ }
+
ticks_per_s = 1.0;
buffer_hz = 100;
stream = fopen(fn, "r");
if (!stream) {
- VLOG_WARN("%s: open failed: %s", fn, strerror(errno));
- return;
+ VLOG_WARN("%s: open failed: %s", fn, ovs_strerror(errno));
+ goto exit;
}
if (fscanf(stream, "%x %x %x %x", &a, &b, &c, &d) != 4) {
VLOG_WARN("%s: read failed", fn);
fclose(stream);
- return;
+ goto exit;
}
VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d);
fclose(stream);
if (!a || !c) {
VLOG_WARN("%s: invalid scheduler parameters", fn);
- return;
+ goto exit;
}
ticks_per_s = (double) a * c / b;
fn, a, b, c, d);
}
VLOG_DBG("%s: ticks_per_s=%f buffer_hz=%u", fn, ticks_per_s, buffer_hz);
+
+exit:
+ ovsthread_once_done(&once);
}
/* Returns the number of bytes that can be transmitted in 'ticks' ticks at a
static unsigned int
tc_ticks_to_bytes(unsigned int rate, unsigned int ticks)
{
- if (!buffer_hz) {
- read_psched();
- }
+ read_psched();
return (rate * ticks) / ticks_per_s;
}
static unsigned int
tc_bytes_to_ticks(unsigned int rate, unsigned int size)
{
- if (!buffer_hz) {
- read_psched();
- }
+ read_psched();
return rate ? ((unsigned long long int) ticks_per_s * size) / rate : 0;
}
static unsigned int
tc_buffer_per_jiffy(unsigned int rate)
{
- if (!buffer_hz) {
- read_psched();
- }
+ read_psched();
return rate / buffer_hz;
}
netdev_get_name(netdev),
tc_get_major(handle), tc_get_minor(handle),
tc_get_major(parent), tc_get_minor(parent),
- strerror(error));
+ ovs_strerror(error));
}
return error;
}
VLOG_WARN_RL(&rl, "delete %s class %u:%u failed (%s)",
netdev_get_name(netdev),
tc_get_major(handle), tc_get_minor(handle),
- strerror(error));
+ ovs_strerror(error));
}
return error;
}
} else {
/* Who knows? Maybe the device got deleted. */
VLOG_WARN_RL(&rl, "query %s qdisc failed (%s)",
- netdev_get_name(netdev_), strerror(error));
+ netdev_get_name(netdev_), ovs_strerror(error));
ops = &tc_ops_other;
}
ifi = ofpbuf_put_zeros(&request, sizeof *ifi);
ifi->ifi_family = PF_UNSPEC;
ifi->ifi_index = ifindex;
- error = nl_sock_transact(rtnl_sock, &request, &reply);
+ error = nl_transact(NETLINK_ROUTE, &request, &reply);
ofpbuf_uninit(&request);
if (error) {
return error;
stream = fopen(fn, "r");
if (!stream) {
- VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno));
return errno;
}
COVERAGE_INC(netdev_get_ifindex);
if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) {
VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s",
- netdev_name, strerror(errno));
+ netdev_name, ovs_strerror(errno));
return -errno;
}
return ifr.ifr_ifindex;
* to INFO for that case. */
VLOG(errno == ENODEV ? VLL_INFO : VLL_ERR,
"ioctl(SIOCGIFHWADDR) on %s device failed: %s",
- netdev_name, strerror(errno));
+ netdev_name, ovs_strerror(errno));
return errno;
}
hwaddr_family = ifr.ifr_hwaddr.sa_family;
COVERAGE_INC(netdev_set_hwaddr);
if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) {
VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s",
- netdev_name, strerror(errno));
+ netdev_name, ovs_strerror(errno));
return errno;
}
return 0;
} else {
if (errno != EOPNOTSUPP) {
VLOG_WARN_RL(&rl, "ethtool command %s on network device %s "
- "failed: %s", cmd_name, name, strerror(errno));
+ "failed: %s", cmd_name, name, ovs_strerror(errno));
} else {
/* The device doesn't support this operation. That's pretty
* common, so there's no point in logging anything. */
ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name);
if (ioctl(af_inet_sock, cmd, ifr) == -1) {
VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name,
- strerror(errno));
+ ovs_strerror(errno));
return errno;
}
return 0;
ifr.ifr_addr.sa_family = AF_INET;
error = netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr, cmd, cmd_name);
if (!error) {
- const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr;
+ const struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *,
+ &ifr.ifr_addr);
*ip = sin->sin_addr;
}
return error;
static int
af_packet_sock(void)
{
- static int sock = INT_MIN;
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+ static int sock;
- if (sock == INT_MIN) {
+ if (ovsthread_once_start(&once)) {
sock = socket(AF_PACKET, SOCK_RAW, 0);
if (sock >= 0) {
int error = set_nonblocking(sock);
}
} else {
sock = -errno;
- VLOG_ERR("failed to create packet socket: %s", strerror(errno));
+ VLOG_ERR("failed to create packet socket: %s",
+ ovs_strerror(errno));
}
+ ovsthread_once_done(&once);
}
return sock;