X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=301a7544dcb3853a437172e75e04ba679effa6dc;hb=d017eeb9f9ebcb46c24a67fd301b3e36cd26a04e;hp=d73115be92b0eee1d95163acdc9df61bb6e8323c;hpb=bbd5b6f44bcc798a6636f52ed843b1cef1372f43;p=sliver-openvswitch.git diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index d73115be9..301a7544d 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,7 @@ struct tc { struct tc_queue { struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */ unsigned int queue_id; /* OpenFlow queue ID. */ + long long int created; /* Time queue was created, in msecs. */ }; /* A particular kind of traffic control. Each implementation generally maps to @@ -408,9 +410,6 @@ static const struct netdev_rx_class netdev_rx_linux_class; /* Sockets used for ioctl operations. */ static int af_inet_sock = -1; /* AF_INET, SOCK_DGRAM. */ -/* A Netlink routing socket that is not subscribed to any multicast groups. */ -static struct nl_sock *rtnl_sock; - /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -474,16 +473,7 @@ netdev_linux_init(void) af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0); status = af_inet_sock >= 0 ? 0 : errno; if (status) { - VLOG_ERR("failed to create inet socket: %s", strerror(status)); - } - - /* Create rtnetlink socket. */ - if (!status) { - status = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); - if (status) { - VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", - strerror(status)); - } + VLOG_ERR("failed to create inet socket: %s", ovs_strerror(status)); } } return status; @@ -671,7 +661,7 @@ netdev_linux_create_tap(const struct netdev_class *class OVS_UNUSED, state->fd = open(tap_dev, O_RDWR); if (state->fd < 0) { error = errno; - VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error)); + VLOG_WARN("opening \"%s\" failed: %s", tap_dev, ovs_strerror(error)); goto error_unref_notifier; } @@ -680,7 +670,7 @@ netdev_linux_create_tap(const struct netdev_class *class OVS_UNUSED, ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); if (ioctl(state->fd, TUNSETIFF, &ifr) == -1) { VLOG_WARN("%s: creating tap device failed: %s", name, - strerror(errno)); + ovs_strerror(errno)); error = errno; goto error_unref_notifier; } @@ -744,12 +734,20 @@ netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp) } else { struct sockaddr_ll sll; int ifindex; + /* Result of tcpdump -dd inbound */ + static struct sock_filter filt[] = { + { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */ + { 0x15, 0, 1, 0x00000004 }, /* jeq #4 jt 2 jf 3 */ + { 0x6, 0, 0, 0x00000000 }, /* ret #0 */ + { 0x6, 0, 0, 0x0000ffff } /* ret #65535 */ + }; + static struct sock_fprog fprog = { ARRAY_SIZE(filt), filt }; /* Create file descriptor. */ fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { error = errno; - VLOG_ERR("failed to create raw socket (%s)", strerror(error)); + VLOG_ERR("failed to create raw socket (%s)", ovs_strerror(error)); goto error; } @@ -773,7 +771,17 @@ netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp) if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) { error = errno; VLOG_ERR("%s: failed to bind raw socket (%s)", - netdev_get_name(netdev_), strerror(error)); + netdev_get_name(netdev_), ovs_strerror(error)); + goto error; + } + + /* Filter for only inbound packets. */ + error = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, + sizeof fprog); + if (error) { + error = errno; + VLOG_ERR("%s: failed attach filter (%s)", + netdev_get_name(netdev_), ovs_strerror(error)); goto error; } } @@ -816,14 +824,12 @@ netdev_rx_linux_recv(struct netdev_rx *rx_, void *data, size_t size) : recv(rx->fd, data, size, MSG_TRUNC)); } while (retval < 0 && errno == EINTR); - if (retval > size) { - return -EMSGSIZE; - } else if (retval >= 0) { - return retval; + if (retval >= 0) { + return retval > size ? -EMSGSIZE : retval; } else { if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", - strerror(errno), netdev_rx_get_name(rx_)); + ovs_strerror(errno), netdev_rx_get_name(rx_)); } return -errno; } @@ -910,7 +916,8 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size) /* Use the tap fd to send to this device. This is essential for * tap devices, because packets sent to a tap device with an * AF_PACKET socket will loop back to be *received* again on the - * tap device. */ + * tap device. This doesn't occur on other interface types + * because we attach a socket filter to the rx socket. */ struct netdev_linux *netdev = netdev_linux_cast(netdev_); retval = write(netdev->state.tap.fd, data, size); @@ -926,7 +933,7 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size) continue; } else if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", - netdev_get_name(netdev_), strerror(errno)); + netdev_get_name(netdev_), ovs_strerror(errno)); } return errno; } else if (retval != size) { @@ -1243,7 +1250,7 @@ check_for_working_netlink_stats(void) } else { VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats " "via proc (you are probably running a pre-2.6.19 " - "kernel)", strerror(error)); + "kernel)", ovs_strerror(error)); return false; } } @@ -1322,7 +1329,8 @@ get_stats_via_vport(const struct netdev *netdev_, error = get_stats_via_vport__(netdev_, stats); if (error && error != ENOENT) { VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed " - "(%s)", netdev_get_name(netdev_), strerror(error)); + "(%s)", + netdev_get_name(netdev_), ovs_strerror(error)); } netdev->vport_stats_error = error; netdev->cache_valid |= VALID_VPORT_STAT_ERROR; @@ -1333,11 +1341,13 @@ static int netdev_linux_sys_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { - static int use_netlink_stats = -1; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; + static int use_netlink_stats; int error; - if (use_netlink_stats < 0) { + if (ovsthread_once_start(&once)) { use_netlink_stats = check_for_working_netlink_stats(); + ovsthread_once_done(&once); } if (use_netlink_stats) { @@ -1757,7 +1767,7 @@ netdev_linux_set_policing(struct netdev *netdev_, error = tc_add_del_ingress_qdisc(netdev_, false); if (error) { VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", - netdev_name, strerror(error)); + netdev_name, ovs_strerror(error)); goto out; } @@ -1765,14 +1775,14 @@ netdev_linux_set_policing(struct netdev *netdev_, error = tc_add_del_ingress_qdisc(netdev_, true); if (error) { VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s", - netdev_name, strerror(error)); + netdev_name, ovs_strerror(error)); goto out; } error = tc_add_policer(netdev_, kbits_rate, kbits_burst); if (error){ VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s", - netdev_name, strerror(error)); + netdev_name, ovs_strerror(error)); goto out; } } @@ -1989,9 +1999,11 @@ netdev_linux_get_queue_stats(const struct netdev *netdev_, return EOPNOTSUPP; } else { const struct tc_queue *queue = tc_find_queue(netdev_, queue_id); - return (queue - ? netdev->tc->ops->class_get_stats(netdev_, queue, stats) - : ENOENT); + if (!queue) { + return ENOENT; + } + stats->created = queue->created; + return netdev->tc->ops->class_get_stats(netdev_, queue, stats); } } @@ -2006,7 +2018,7 @@ start_queue_dump(const struct netdev *netdev, struct nl_dump *dump) return false; } tcmsg->tcm_parent = 0; - nl_dump_start(dump, rtnl_sock, &request); + nl_dump_start(dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); return true; } @@ -2215,7 +2227,7 @@ netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router) rt.rt_flags = RTF_UP | RTF_GATEWAY; error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0; if (error) { - VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error)); + VLOG_WARN("ioctl(SIOCADDRT): %s", ovs_strerror(error)); } return error; } @@ -2232,7 +2244,7 @@ netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop, *netdev_name = NULL; stream = fopen(fn, "r"); if (stream == NULL) { - VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno)); return errno; } @@ -2344,7 +2356,8 @@ netdev_linux_arp_lookup(const struct netdev *netdev, memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN); } else if (retval != ENXIO) { VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s", - netdev_get_name(netdev), IP_ARGS(ip), strerror(retval)); + netdev_get_name(netdev), IP_ARGS(ip), + ovs_strerror(retval)); } return retval; } @@ -2613,7 +2626,7 @@ htb_setup_class__(struct netdev *netdev, unsigned int handle, tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), class->min_rate, class->max_rate, - class->burst, class->priority, strerror(error)); + class->burst, class->priority, ovs_strerror(error)); } return error; } @@ -2797,6 +2810,7 @@ htb_update_queue__(struct netdev *netdev, unsigned int queue_id, hcp = xmalloc(sizeof *hcp); queue = &hcp->tc_queue; queue->queue_id = queue_id; + queue->created = time_msec(); hmap_insert(&htb->tc.queues, &queue->hmap_node, hash); } @@ -3030,6 +3044,7 @@ hfsc_update_queue__(struct netdev *netdev, unsigned int queue_id, hcp = xmalloc(sizeof *hcp); queue = &hcp->tc_queue; queue->queue_id = queue_id; + queue->created = time_msec(); hmap_insert(&hfsc->tc.queues, &queue->hmap_node, hash); } @@ -3273,7 +3288,7 @@ hfsc_setup_class__(struct netdev *netdev, unsigned int handle, netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), - class->min_rate, class->max_rate, strerror(error)); + class->min_rate, class->max_rate, ovs_strerror(error)); } return error; @@ -3624,7 +3639,7 @@ tc_make_request(const struct netdev *netdev, int type, unsigned int flags, static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) { - int error = nl_sock_transact(rtnl_sock, request, replyp); + int error = nl_transact(NETLINK_ROUTE, request, replyp); ofpbuf_uninit(request); return error; } @@ -3769,30 +3784,35 @@ read_psched(void) * [5] 2.6.32.21.22 (approx.) from Ubuntu 10.04 on VMware Fusion * [6] 2.6.34 from kernel.org on KVM */ + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static const char fn[] = "/proc/net/psched"; unsigned int a, b, c, d; FILE *stream; + if (!ovsthread_once_start(&once)) { + return; + } + ticks_per_s = 1.0; buffer_hz = 100; stream = fopen(fn, "r"); if (!stream) { - VLOG_WARN("%s: open failed: %s", fn, strerror(errno)); - return; + VLOG_WARN("%s: open failed: %s", fn, ovs_strerror(errno)); + goto exit; } if (fscanf(stream, "%x %x %x %x", &a, &b, &c, &d) != 4) { VLOG_WARN("%s: read failed", fn); fclose(stream); - return; + goto exit; } VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d); fclose(stream); if (!a || !c) { VLOG_WARN("%s: invalid scheduler parameters", fn); - return; + goto exit; } ticks_per_s = (double) a * c / b; @@ -3803,6 +3823,9 @@ read_psched(void) fn, a, b, c, d); } VLOG_DBG("%s: ticks_per_s=%f buffer_hz=%u", fn, ticks_per_s, buffer_hz); + +exit: + ovsthread_once_done(&once); } /* Returns the number of bytes that can be transmitted in 'ticks' ticks at a @@ -3810,9 +3833,7 @@ read_psched(void) static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks) { - if (!buffer_hz) { - read_psched(); - } + read_psched(); return (rate * ticks) / ticks_per_s; } @@ -3821,9 +3842,7 @@ tc_ticks_to_bytes(unsigned int rate, unsigned int ticks) static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size) { - if (!buffer_hz) { - read_psched(); - } + read_psched(); return rate ? ((unsigned long long int) ticks_per_s * size) / rate : 0; } @@ -3832,9 +3851,7 @@ tc_bytes_to_ticks(unsigned int rate, unsigned int size) static unsigned int tc_buffer_per_jiffy(unsigned int rate) { - if (!buffer_hz) { - read_psched(); - } + read_psched(); return rate / buffer_hz; } @@ -3982,7 +3999,7 @@ tc_query_class(const struct netdev *netdev, netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), - strerror(error)); + ovs_strerror(error)); } return error; } @@ -4007,7 +4024,7 @@ tc_delete_class(const struct netdev *netdev, unsigned int handle) VLOG_WARN_RL(&rl, "delete %s class %u:%u failed (%s)", netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), - strerror(error)); + ovs_strerror(error)); } return error; } @@ -4107,7 +4124,7 @@ tc_query_qdisc(const struct netdev *netdev_) } else { /* Who knows? Maybe the device got deleted. */ VLOG_WARN_RL(&rl, "query %s qdisc failed (%s)", - netdev_get_name(netdev_), strerror(error)); + netdev_get_name(netdev_), ovs_strerror(error)); ops = &tc_ops_other; } @@ -4300,7 +4317,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) ifi = ofpbuf_put_zeros(&request, sizeof *ifi); ifi->ifi_family = PF_UNSPEC; ifi->ifi_index = ifindex; - error = nl_sock_transact(rtnl_sock, &request, &reply); + error = nl_transact(NETLINK_ROUTE, &request, &reply); ofpbuf_uninit(&request); if (error) { return error; @@ -4336,7 +4353,7 @@ get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats) stream = fopen(fn, "r"); if (!stream) { - VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno)); return errno; } @@ -4416,7 +4433,7 @@ do_get_ifindex(const char *netdev_name) COVERAGE_INC(netdev_get_ifindex); if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) { VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s", - netdev_name, strerror(errno)); + netdev_name, ovs_strerror(errno)); return -errno; } return ifr.ifr_ifindex; @@ -4459,7 +4476,7 @@ get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]) * to INFO for that case. */ VLOG(errno == ENODEV ? VLL_INFO : VLL_ERR, "ioctl(SIOCGIFHWADDR) on %s device failed: %s", - netdev_name, strerror(errno)); + netdev_name, ovs_strerror(errno)); return errno; } hwaddr_family = ifr.ifr_hwaddr.sa_family; @@ -4484,7 +4501,7 @@ set_etheraddr(const char *netdev_name, COVERAGE_INC(netdev_set_hwaddr); if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) { VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s", - netdev_name, strerror(errno)); + netdev_name, ovs_strerror(errno)); return errno; } return 0; @@ -4506,7 +4523,7 @@ netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *ecmd, } else { if (errno != EOPNOTSUPP) { VLOG_WARN_RL(&rl, "ethtool command %s on network device %s " - "failed: %s", cmd_name, name, strerror(errno)); + "failed: %s", cmd_name, name, ovs_strerror(errno)); } else { /* The device doesn't support this operation. That's pretty * common, so there's no point in logging anything. */ @@ -4522,7 +4539,7 @@ netdev_linux_do_ioctl(const char *name, struct ifreq *ifr, int cmd, ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name); if (ioctl(af_inet_sock, cmd, ifr) == -1) { VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name, - strerror(errno)); + ovs_strerror(errno)); return errno; } return 0; @@ -4538,7 +4555,8 @@ netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, ifr.ifr_addr.sa_family = AF_INET; error = netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr, cmd, cmd_name); if (!error) { - const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; + const struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *, + &ifr.ifr_addr); *ip = sin->sin_addr; } return error; @@ -4548,9 +4566,10 @@ netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, static int af_packet_sock(void) { - static int sock = INT_MIN; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; + static int sock; - if (sock == INT_MIN) { + if (ovsthread_once_start(&once)) { sock = socket(AF_PACKET, SOCK_RAW, 0); if (sock >= 0) { int error = set_nonblocking(sock); @@ -4560,8 +4579,10 @@ af_packet_sock(void) } } else { sock = -errno; - VLOG_ERR("failed to create packet socket: %s", strerror(errno)); + VLOG_ERR("failed to create packet socket: %s", + ovs_strerror(errno)); } + ovsthread_once_done(&once); } return sock;