From: Ben Pfaff Date: Tue, 7 Oct 2008 17:08:45 +0000 (-0700) Subject: Add support for TAP virtual network devices in netdev. X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=3bcc21747fd1c8b11a92624d97ce593ee3db1bd1;p=sliver-openvswitch.git Add support for TAP virtual network devices in netdev. --- diff --git a/include/netdev.h b/include/netdev.h index f36b920d2..7c841715a 100644 --- a/include/netdev.h +++ b/include/netdev.h @@ -66,11 +66,14 @@ enum netdev_pseudo_ethertype { }; struct netdev; + int netdev_open(const char *name, int ethertype, struct netdev **); +int netdev_open_tap(const char *name, struct netdev **); void netdev_close(struct netdev *); + int netdev_recv(struct netdev *, struct ofpbuf *); void netdev_recv_wait(struct netdev *); -void netdev_drain(struct netdev *); +int netdev_drain(struct netdev *); int netdev_send(struct netdev *, const struct ofpbuf *); const uint8_t *netdev_get_etheraddr(const struct netdev *); const char *netdev_get_name(const struct netdev *); diff --git a/include/socket-util.h b/include/socket-util.h index fe9c154e5..4ae106865 100644 --- a/include/socket-util.h +++ b/include/socket-util.h @@ -43,6 +43,7 @@ int lookup_ip(const char *host_name, struct in_addr *address); int get_socket_error(int sock); int check_connection_completion(int fd); int drain_rcvbuf(int fd); +void drain_fd(int fd, size_t n_packets); int make_unix_socket(int style, bool nonblock, bool passcred, const char *bind_path, const char *connect_path); int get_unix_name_len(socklen_t sun_len); diff --git a/lib/netdev.c b/lib/netdev.c index 3926fed5b..92c1a349a 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -17,7 +17,7 @@ * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * THE SOFTWRE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS @@ -36,8 +36,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -70,11 +72,20 @@ struct netdev { struct list node; char *name; + + /* File descriptors. For ordinary network devices, the two fds below are + * the same; for tap devices, they differ. */ + int netdev_fd; /* Network device. */ + int tap_fd; /* TAP character device, if any, otherwise the + * network device. */ + + /* Cached network device information. */ int ifindex; - int fd; uint8_t etheraddr[ETH_ADDR_LEN]; + struct in6_addr in6; int speed; int mtu; + int txqlen; /* Bitmaps of OFPPF_* that describe features. All bits disabled if * unsupported or unavailable. */ @@ -83,7 +94,6 @@ struct netdev { uint32_t supported; /* Features supported by the port. */ uint32_t peer; /* Features advertised by the peer. */ - struct in6_addr in6; int save_flags; /* Initial device flags. */ int changed_flags; /* Flags that we changed. */ }; @@ -99,9 +109,11 @@ static int af_inet_sock = -1; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static void init_netdev(void); +static int do_open_netdev(const char *name, int ethertype, int tap_fd, + struct netdev **netdev_); static int restore_flags(struct netdev *netdev); -static int get_flags(const struct netdev *, int *flagsp); -static int set_flags(struct netdev *, int flags); +static int get_flags(const struct netdev *, int fd, int *flagsp); +static int set_flags(struct netdev *, int fd, int flags); /* Obtains the IPv6 address for 'name' into 'in6'. */ static void @@ -157,7 +169,7 @@ do_ethtool(struct netdev *netdev) memset(&ecmd, 0, sizeof ecmd); ecmd.cmd = ETHTOOL_GSET; - if (ioctl(netdev->fd, SIOCETHTOOL, &ifr) == 0) { + if (ioctl(netdev->netdev_fd, SIOCETHTOOL, &ifr) == 0) { if (ecmd.supported & SUPPORTED_10baseT_Half) { netdev->supported |= OFPPF_10MB_HD; } @@ -267,7 +279,7 @@ do_ethtool(struct netdev *netdev) } /* Opens the network device named 'name' (e.g. "eth0") and returns zero if - * successful, otherwise a positive errno value. On success, sets '*netdev' + * successful, otherwise a positive errno value. On success, sets '*netdevp' * to the new network device, otherwise to null. * * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to @@ -275,34 +287,98 @@ do_ethtool(struct netdev *netdev) * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those * categories. */ int -netdev_open(const char *name, int ethertype, struct netdev **netdev_) +netdev_open(const char *name, int ethertype, struct netdev **netdevp) { - int fd; + if (!strncmp(name, "tap:", 4)) { + return netdev_open_tap(name + 4, netdevp); + } else { + return do_open_netdev(name, ethertype, -1, netdevp); + } +} + +/* Opens a TAP virtual network device. If 'name' is a nonnull, non-empty + * string, attempts to assign that name to the TAP device (failing if the name + * is already in use); otherwise, a name is automatically assigned. Returns + * zero if successful, otherwise a positive errno value. On success, sets + * '*netdevp' to the new network device, otherwise to null. */ +int +netdev_open_tap(const char *name, struct netdev **netdevp) +{ + static const char tap_dev[] = "/dev/net/tun"; + struct ifreq ifr; + int error; + int tap_fd; + + tap_fd = open(tap_dev, O_RDWR); + if (tap_fd < 0) { + ofp_error(errno, "opening \"%s\" failed", tap_dev); + return errno; + } + + memset(&ifr, 0, sizeof ifr); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + if (name) { + strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); + } + if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) { + int error = errno; + ofp_error(error, "ioctl(TUNSETIFF) on \"%s\" failed", tap_dev); + close(tap_fd); + return error; + } + + error = set_nonblocking(tap_fd); + if (error) { + ofp_error(error, "set_nonblocking on \"%s\" failed", tap_dev); + close(tap_fd); + return error; + } + + error = do_open_netdev(ifr.ifr_name, NETDEV_ETH_TYPE_NONE, tap_fd, + netdevp); + if (error) { + close(tap_fd); + } + return error; +} + +static int +do_open_netdev(const char *name, int ethertype, int tap_fd, + struct netdev **netdev_) +{ + int netdev_fd; struct sockaddr_ll sll; struct ifreq ifr; unsigned int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; struct in6_addr in6; int mtu; + int txqlen; int error; struct netdev *netdev; - *netdev_ = NULL; init_netdev(); + *netdev_ = NULL; /* Create raw socket. */ - fd = socket(PF_PACKET, SOCK_RAW, - htons(ethertype == NETDEV_ETH_TYPE_NONE ? 0 - : ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL - : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 - : ethertype)); - if (fd < 0) { + netdev_fd = socket(PF_PACKET, SOCK_RAW, + htons(ethertype == NETDEV_ETH_TYPE_NONE ? 0 + : ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL + : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 + : ethertype)); + if (netdev_fd < 0) { return errno; } + /* Set non-blocking mode. */ + error = set_nonblocking(netdev_fd); + if (error) { + goto error_already_set; + } + /* Get ethernet device index. */ strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - if (ioctl(fd, SIOCGIFINDEX, &ifr) < 0) { + if (ioctl(netdev_fd, SIOCGIFINDEX, &ifr) < 0) { VLOG_ERR("ioctl(SIOCGIFINDEX) on %s device failed: %s", name, strerror(errno)); goto error; @@ -313,7 +389,7 @@ netdev_open(const char *name, int ethertype, struct netdev **netdev_) memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; - if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) { + if (bind(netdev_fd, (struct sockaddr *) &sll, sizeof sll) < 0) { VLOG_ERR("bind to %s failed: %s", name, strerror(errno)); goto error; } @@ -323,14 +399,14 @@ netdev_open(const char *name, int ethertype, struct netdev **netdev_) * packets of the requested type on all system interfaces. We do not * want to receive that data, but there is no way to avoid it. So we * must now drain out the receive queue. */ - error = drain_rcvbuf(fd); + error = drain_rcvbuf(netdev_fd); if (error) { goto error; } } /* Get MAC address. */ - if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { + if (ioctl(netdev_fd, SIOCGIFHWADDR, &ifr) < 0) { VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s", name, strerror(errno)); goto error; @@ -343,20 +419,30 @@ netdev_open(const char *name, int ethertype, struct netdev **netdev_) memcpy(etheraddr, ifr.ifr_hwaddr.sa_data, sizeof etheraddr); /* Get MTU. */ - if (ioctl(fd, SIOCGIFMTU, &ifr) < 0) { + if (ioctl(netdev_fd, SIOCGIFMTU, &ifr) < 0) { VLOG_ERR("ioctl(SIOCGIFMTU) on %s device failed: %s", name, strerror(errno)); goto error; } mtu = ifr.ifr_mtu; + /* Get TX queue length. */ + if (ioctl(netdev_fd, SIOCGIFTXQLEN, &ifr) < 0) { + VLOG_ERR("ioctl(SIOCGIFTXQLEN) on %s device failed: %s", + name, strerror(errno)); + goto error; + } + txqlen = ifr.ifr_qlen; + get_ipv6_address(name, &in6); /* Allocate network device. */ netdev = xmalloc(sizeof *netdev); netdev->name = xstrdup(name); netdev->ifindex = ifindex; - netdev->fd = fd; + netdev->txqlen = txqlen; + netdev->netdev_fd = netdev_fd; + netdev->tap_fd = tap_fd < 0 ? netdev_fd : tap_fd; memcpy(netdev->etheraddr, etheraddr, sizeof etheraddr); netdev->mtu = mtu; netdev->in6 = in6; @@ -365,9 +451,9 @@ netdev_open(const char *name, int ethertype, struct netdev **netdev_) do_ethtool(netdev); /* Save flags to restore at close or exit. */ - error = get_flags(netdev, &netdev->save_flags); + error = get_flags(netdev, netdev_fd, &netdev->save_flags); if (error) { - goto preset_error; + goto error_already_set; } netdev->changed_flags = 0; fatal_signal_block(); @@ -380,8 +466,11 @@ netdev_open(const char *name, int ethertype, struct netdev **netdev_) error: error = errno; -preset_error: - close(fd); +error_already_set: + close(netdev_fd); + if (tap_fd >= 0) { + close(tap_fd); + } return error; } @@ -404,7 +493,10 @@ netdev_close(struct netdev *netdev) /* Free. */ free(netdev->name); - close(netdev->fd); + close(netdev->netdev_fd); + if (netdev->netdev_fd != netdev->tap_fd) { + close(netdev->tap_fd); + } free(netdev); } } @@ -440,9 +532,8 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer) assert(buffer->size == 0); assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN); do { - n_bytes = recv(netdev->fd, - ofpbuf_tail(buffer), ofpbuf_tailroom(buffer), - MSG_DONTWAIT); + n_bytes = read(netdev->tap_fd, + ofpbuf_tail(buffer), ofpbuf_tailroom(buffer)); } while (n_bytes < 0 && errno == EINTR); if (n_bytes < 0) { if (errno != EAGAIN) { @@ -468,14 +559,19 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer) void netdev_recv_wait(struct netdev *netdev) { - poll_fd_wait(netdev->fd, POLLIN); + poll_fd_wait(netdev->tap_fd, POLLIN); } /* Discards all packets waiting to be received from 'netdev'. */ -void +int netdev_drain(struct netdev *netdev) { - drain_rcvbuf(netdev->fd); + if (netdev->tap_fd != netdev->netdev_fd) { + drain_fd(netdev->tap_fd, netdev->txqlen); + return 0; + } else { + return drain_rcvbuf(netdev->netdev_fd); + } } /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive @@ -493,7 +589,7 @@ netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) ssize_t n_bytes; do { - n_bytes = sendto(netdev->fd, buffer->data, buffer->size, 0, NULL, 0); + n_bytes = write(netdev->tap_fd, buffer->data, buffer->size); } while (n_bytes < 0 && errno == EINTR); if (n_bytes < 0) { @@ -527,7 +623,12 @@ netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) void netdev_send_wait(struct netdev *netdev) { - poll_fd_wait(netdev->fd, POLLOUT); + if (netdev->tap_fd == netdev->netdev_fd) { + poll_fd_wait(netdev->tap_fd, POLLOUT); + } else { + /* TAP device always accepts packets.*/ + poll_immediate_wake(); + } } /* Returns a pointer to 'netdev''s MAC address. The caller must not modify or @@ -569,7 +670,7 @@ netdev_get_link_status(const struct netdev *netdev) memset(&edata, 0, sizeof edata); edata.cmd = ETHTOOL_GLINK; - if (ioctl(netdev->fd, SIOCETHTOOL, &ifr) == 0) { + if (ioctl(netdev->netdev_fd, SIOCETHTOOL, &ifr) == 0) { if (edata.data) { return 1; } else { @@ -708,7 +809,7 @@ netdev_get_flags(const struct netdev *netdev, enum netdev_flags *flagsp) { int error, flags; - error = get_flags(netdev, &flags); + error = get_flags(netdev, netdev->netdev_fd, &flags); if (error) { return error; } @@ -741,13 +842,13 @@ nd_to_iff_flags(enum netdev_flags nd) * will be reverted when 'netdev' is closed or the program exits. Returns 0 if * successful, otherwise a positive errno value. */ static int -do_update_flags(struct netdev *netdev, enum netdev_flags off, +do_update_flags(struct netdev *netdev, int fd, enum netdev_flags off, enum netdev_flags on, bool permanent) { int old_flags, new_flags; int error; - error = get_flags(netdev, &old_flags); + error = get_flags(netdev, fd, &old_flags); if (error) { return error; } @@ -757,7 +858,7 @@ do_update_flags(struct netdev *netdev, enum netdev_flags off, netdev->changed_flags |= new_flags ^ old_flags; } if (new_flags != old_flags) { - error = set_flags(netdev, new_flags); + error = set_flags(netdev, fd, new_flags); } return error; } @@ -770,7 +871,7 @@ int netdev_set_flags(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, -1, flags, permanent); + return do_update_flags(netdev, netdev->netdev_fd, -1, flags, permanent); } /* Turns on the specified 'flags' on 'netdev'. @@ -781,7 +882,7 @@ int netdev_turn_flags_on(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, 0, flags, permanent); + return do_update_flags(netdev, netdev->netdev_fd, 0, flags, permanent); } /* Turns off the specified 'flags' on 'netdev'. @@ -792,7 +893,7 @@ int netdev_turn_flags_off(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, flags, 0, permanent); + return do_update_flags(netdev, netdev->netdev_fd, flags, 0, permanent); } /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be @@ -856,7 +957,7 @@ restore_flags(struct netdev *netdev) /* Get current flags. */ strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); - if (ioctl(netdev->fd, SIOCGIFFLAGS, &ifr) < 0) { + if (ioctl(netdev->netdev_fd, SIOCGIFFLAGS, &ifr) < 0) { return errno; } @@ -865,7 +966,7 @@ restore_flags(struct netdev *netdev) if ((ifr.ifr_flags ^ netdev->save_flags) & restore_flags) { ifr.ifr_flags &= ~restore_flags; ifr.ifr_flags |= netdev->save_flags & restore_flags; - if (ioctl(netdev->fd, SIOCSIFFLAGS, &ifr) < 0) { + if (ioctl(netdev->netdev_fd, SIOCSIFFLAGS, &ifr) < 0) { return errno; } } @@ -885,11 +986,11 @@ restore_all_flags(void *aux UNUSED) } static int -get_flags(const struct netdev *netdev, int *flags) +get_flags(const struct netdev *netdev, int fd, int *flags) { struct ifreq ifr; strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); - if (ioctl(netdev->fd, SIOCGIFFLAGS, &ifr) < 0) { + if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) { VLOG_ERR("ioctl(SIOCGIFFLAGS) on %s device failed: %s", netdev->name, strerror(errno)); return errno; @@ -899,12 +1000,12 @@ get_flags(const struct netdev *netdev, int *flags) } static int -set_flags(struct netdev *netdev, int flags) +set_flags(struct netdev *netdev, int fd, int flags) { struct ifreq ifr; strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); ifr.ifr_flags = flags; - if (ioctl(netdev->fd, SIOCSIFFLAGS, &ifr) < 0) { + if (ioctl(netdev->netdev_fd, SIOCSIFFLAGS, &ifr) < 0) { VLOG_ERR("ioctl(SIOCSIFFLAGS) on %s device failed: %s", netdev->name, strerror(errno)); return errno; diff --git a/lib/socket-util.c b/lib/socket-util.c index 7d54a4818..781bcb8fe 100644 --- a/lib/socket-util.c +++ b/lib/socket-util.c @@ -170,6 +170,24 @@ drain_rcvbuf(int fd) return 0; } +/* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no + * more data can be immediately read. ('fd' should therefore be in + * non-blocking mode.)*/ +void +drain_fd(int fd, size_t n_packets) +{ + for (; n_packets > 0; n_packets--) { + /* 'buffer' only needs to be 1 byte long in most circumstances. This + * size is defensive against the possibility that we someday want to + * use a Linux tap device without TUN_NO_PI, in which case a buffer + * smaller than sizeof(struct tun_pi) will give EINVAL on read. */ + char buffer[128]; + if (read(fd, buffer, sizeof buffer) <= 0) { + break; + } + } +} + /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in * '*un_len' the size of the sockaddr_un. */ static void