2 * Copyright (c) 2012 Giuseppe Lettieri
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/types.h>
22 #include <arpa/inet.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
26 #include <net/if_arp.h>
27 #include <linux/if_tun.h>
28 #include <netinet/in.h>
33 #include "dpif-netdev.h"
34 #include "netdev-provider.h"
36 #include "ofp-print.h"
39 #include "poll-loop.h"
43 #include "socket-util.h"
47 VLOG_DEFINE_THIS_MODULE(netdev_pltap);
49 /* Protects 'sync_list'. */
50 static struct ovs_mutex sync_list_mutex = OVS_MUTEX_INITIALIZER;
52 static struct list sync_list OVS_GUARDED_BY(sync_list_mutex)
53 = LIST_INITIALIZER(&sync_list);
59 struct list sync_list OVS_GUARDED_BY(sync_list_mutex);
61 /* Protects all members below. */
62 struct ovs_mutex mutex OVS_ACQ_AFTER(sync_list_mutex);
65 struct netdev_stats stats;
66 enum netdev_flags new_flags;
67 enum netdev_flags flags;
69 struct sockaddr_in local_addr;
72 bool valid_local_netmask;
73 bool sync_flags_needed;
74 unsigned int change_seq;
78 struct netdev_rxq_pltap {
83 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
85 /* Protects 'pltap_netdevs' */
86 static struct ovs_mutex pltap_netdevs_mutex = OVS_MUTEX_INITIALIZER;
87 static struct shash pltap_netdevs OVS_GUARDED_BY(pltap_netdevs_mutex)
88 = SHASH_INITIALIZER(&pltap_netdevs);
90 static int netdev_pltap_construct(struct netdev *netdev_);
92 static void netdev_pltap_update_seq(struct netdev_pltap *)
93 OVS_REQUIRES(dev->mutex);
94 static int get_flags(struct netdev_pltap *dev, enum netdev_flags *flags)
95 OVS_REQUIRES(dev->mutex);
98 netdev_pltap_finalized(struct netdev_pltap *dev)
99 OVS_REQUIRES(dev->mutex)
101 return dev->valid_local_ip && dev->valid_local_netmask;
105 is_netdev_pltap_class(const struct netdev_class *class)
107 return class->construct == netdev_pltap_construct;
110 static struct netdev_pltap *
111 netdev_pltap_cast(const struct netdev *netdev)
113 ovs_assert(is_netdev_pltap_class(netdev_get_class(netdev)));
114 return CONTAINER_OF(netdev, struct netdev_pltap, up);
117 static struct netdev_rxq_pltap*
118 netdev_rxq_pltap_cast(const struct netdev_rxq *rx)
120 ovs_assert(is_netdev_pltap_class(netdev_get_class(rx->netdev)));
121 return CONTAINER_OF(rx, struct netdev_rxq_pltap, up);
124 static void sync_needed(struct netdev_pltap *dev)
125 OVS_REQUIRES(dev->mutex, sync_list_mutex)
127 if (dev->sync_flags_needed)
130 dev->sync_flags_needed = true;
131 list_insert(&sync_list, &dev->sync_list);
134 static void sync_done(struct netdev_pltap *dev)
135 OVS_REQUIRES(dev->mutex, sync_list_mutex)
137 if (!dev->sync_flags_needed)
140 (void) list_remove(&dev->sync_list);
141 dev->sync_flags_needed = false;
144 static struct netdev *
145 netdev_pltap_alloc(void)
147 struct netdev_pltap *netdev = xzalloc(sizeof *netdev);
152 netdev_pltap_construct(struct netdev *netdev_)
154 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
157 ovs_mutex_init(&netdev->mutex);
158 netdev->real_name = xzalloc(IFNAMSIZ + 1);
159 memset(&netdev->local_addr, 0, sizeof(netdev->local_addr));
160 netdev->valid_local_ip = false;
161 netdev->valid_local_netmask = false;
163 netdev->sync_flags_needed = false;
164 netdev->change_seq = 1;
167 /* Open tap device. */
168 netdev->fd = tun_alloc(IFF_TAP, netdev->real_name);
169 if (netdev->fd < 0) {
171 VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s",
172 netdev_get_name(netdev_), ovs_strerror(error));
175 VLOG_DBG("real_name = %s", netdev->real_name);
177 /* Make non-blocking. */
178 error = set_nonblocking(netdev->fd);
183 ovs_mutex_lock(&pltap_netdevs_mutex);
184 shash_add(&pltap_netdevs, netdev_get_name(netdev_), netdev);
185 ovs_mutex_unlock(&pltap_netdevs_mutex);
190 netdev_pltap_destruct(struct netdev *netdev_)
192 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
194 ovs_mutex_lock(&pltap_netdevs_mutex);
195 if (netdev->fd != -1)
198 if (netdev->sync_flags_needed) {
199 ovs_mutex_lock(&sync_list_mutex);
200 (void) list_remove(&netdev->sync_list);
201 ovs_mutex_unlock(&sync_list_mutex);
204 shash_find_and_delete(&pltap_netdevs,
205 netdev_get_name(netdev_));
206 ovs_mutex_unlock(&pltap_netdevs_mutex);
207 ovs_mutex_destroy(&netdev->mutex);
211 netdev_pltap_dealloc(struct netdev *netdev_)
213 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
217 static int netdev_pltap_up(struct netdev_pltap *dev) OVS_REQUIRES(dev->mutex);
219 static struct netdev_rxq *
220 netdev_pltap_rxq_alloc(void)
222 struct netdev_rxq_pltap *rx = xzalloc(sizeof *rx);
227 netdev_pltap_rxq_construct(struct netdev_rxq *rx_)
229 struct netdev_rxq_pltap *rx = netdev_rxq_pltap_cast(rx_);
230 struct netdev *netdev_ = rx->up.netdev;
231 struct netdev_pltap *netdev =
232 netdev_pltap_cast(netdev_);
235 ovs_mutex_lock(&netdev->mutex);
237 if (!netdev_pltap_finalized(netdev))
239 error = netdev_pltap_up(netdev);
244 ovs_mutex_unlock(&netdev->mutex);
249 netdev_pltap_rxq_destruct(struct netdev_rxq *rx_ OVS_UNUSED)
254 netdev_pltap_rxq_dealloc(struct netdev_rxq *rx_)
256 struct netdev_rxq_pltap *rx = netdev_rxq_pltap_cast(rx_);
261 static int vsys_transaction(const char *script,
262 const char **preply, char *format, ...)
264 char *msg = NULL, *reply = NULL;
265 const size_t reply_size = 1024;
266 int ifd = -1, ofd = -1, maxfd;
267 size_t bytes_to_write, bytes_to_read,
268 bytes_written = 0, bytes_read = 0;
270 char *ofname = NULL, *ifname = NULL;
273 va_start(args, format);
274 msg = xvasprintf(format, args);
276 reply = (char*)xmalloc(reply_size);
277 if (!msg || !reply) {
278 VLOG_ERR("Out of memory");
283 ofname = xasprintf("/vsys/%s.out", script);
284 ifname = xasprintf("/vsys/%s.in", script);
285 if (!ofname || !ifname) {
286 VLOG_ERR("Out of memory");
291 ofd = open(ofname, O_RDONLY | O_NONBLOCK);
293 VLOG_ERR("Cannot open %s: %s", ofname, ovs_strerror(errno));
297 ifd = open(ifname, O_WRONLY | O_NONBLOCK);
299 VLOG_ERR("Cannot open %s: %s", ifname, ovs_strerror(errno));
303 maxfd = (ifd < ofd) ? ofd : ifd;
305 bytes_to_write = strlen(msg);
306 bytes_to_read = reply_size;
307 while (bytes_to_write || bytes_to_read) {
308 fd_set readset, writeset, errorset;
313 if (bytes_to_write) {
314 FD_SET(ifd, &writeset);
315 FD_SET(ifd, &errorset);
317 FD_SET(ofd, &readset);
318 FD_SET(ofd, &errorset);
319 if (select(maxfd + 1, &readset, &writeset, &errorset, NULL) < 0) {
322 VLOG_ERR("selec error: %s", ovs_strerror(errno));
326 if (FD_ISSET(ifd, &errorset) || FD_ISSET(ofd, &errorset)) {
327 VLOG_ERR("error condition on ifd or ofd");
330 if (FD_ISSET(ifd, &writeset)) {
331 ssize_t n = write(ifd, msg + bytes_written, bytes_to_write);
333 if (errno != EAGAIN && errno != EINTR) {
334 VLOG_ERR("write on %s: %s", ifname, ovs_strerror(errno));
341 if (bytes_to_write == 0)
345 if (FD_ISSET(ofd, &readset)) {
346 ssize_t n = read(ofd, reply + bytes_read, bytes_to_read);
348 if (errno != EAGAIN && errno != EINTR) {
349 VLOG_ERR("read on %s: %s", ofname, ovs_strerror(errno));
362 reply[bytes_read] = '\0';
365 reply = NULL; /* prevent freeing the reply msg */
367 VLOG_ERR("%s returned: %s", script, reply);
384 netdev_pltap_up(struct netdev_pltap *dev)
385 OVS_REQUIRES(dev->mutex)
387 if (!netdev_pltap_finalized(dev)) {
391 return vsys_transaction("vif_up", NULL, "%s\n"IP_FMT"\n%d\n",
393 IP_ARGS(dev->local_addr.sin_addr.s_addr),
398 netdev_pltap_down(struct netdev_pltap *dev)
399 OVS_REQUIRES(dev->mutex)
401 if (!netdev_pltap_finalized(dev)) {
405 return vsys_transaction("vif_down", NULL, "%s\n", dev->real_name);
409 netdev_pltap_promisc(struct netdev_pltap *dev, bool promisc)
410 OVS_REQUIRES(dev-mutex)
412 if (!netdev_pltap_finalized(dev)) {
416 return vsys_transaction("promisc", NULL, "%s\n%s",
418 (promisc ? "" : "-\n"));
422 netdev_pltap_sync_flags(struct netdev_pltap *dev)
423 OVS_REQUIRES(sync_list_mutex)
426 ovs_mutex_lock(&dev->mutex);
428 if (dev->fd < 0 || !netdev_pltap_finalized(dev)) {
432 VLOG_DBG("sync_flags(%s): current: %s %s target: %s %s",
434 (dev->flags & NETDEV_UP ? "UP" : "-"),
435 (dev->flags & NETDEV_PROMISC ? "PROMISC" : "-"),
436 (dev->new_flags & NETDEV_UP ? "UP" : "-"),
437 (dev->new_flags & NETDEV_PROMISC ? "PROMISC" : "-"));
439 if ((dev->new_flags & NETDEV_UP) && !(dev->flags & NETDEV_UP)) {
440 (void) netdev_pltap_up(dev);
441 } else if (!(dev->new_flags & NETDEV_UP) && (dev->flags & NETDEV_UP)) {
442 (void) netdev_pltap_down(dev);
445 if ((dev->new_flags & NETDEV_PROMISC) ^ (dev->flags & NETDEV_PROMISC)) {
446 (void) netdev_pltap_promisc(dev, dev->new_flags & NETDEV_PROMISC);
449 netdev_pltap_update_seq(dev);
453 ovs_mutex_unlock(&dev->mutex);
458 netdev_pltap_get_config(const struct netdev *dev_, struct smap *args)
460 struct netdev_pltap *netdev = netdev_pltap_cast(dev_);
462 ovs_mutex_lock(&netdev->mutex);
463 if (netdev->valid_local_ip)
464 smap_add_format(args, "local_ip", IP_FMT,
465 IP_ARGS(netdev->local_addr.sin_addr.s_addr));
466 if (netdev->valid_local_netmask)
467 smap_add_format(args, "local_netmask", "%"PRIu32,
468 ntohs(netdev->local_netmask));
469 ovs_mutex_unlock(&netdev->mutex);
474 netdev_pltap_set_config(struct netdev *dev_, const struct smap *args)
476 struct netdev_pltap *netdev = netdev_pltap_cast(dev_);
477 struct shash_node *node;
479 ovs_mutex_lock(&sync_list_mutex);
480 ovs_mutex_lock(&netdev->mutex);
481 VLOG_DBG("pltap_set_config(%s)", netdev_get_name(dev_));
482 SMAP_FOR_EACH(node, args) {
483 VLOG_DBG("arg: %s->%s", node->name, (char*)node->data);
484 if (!strcmp(node->name, "local_ip")) {
486 if (lookup_ip(node->data, &addr)) {
487 VLOG_WARN("%s: bad 'local_ip'", node->name);
489 netdev->local_addr.sin_addr = addr;
490 netdev->valid_local_ip = true;
492 } else if (!strcmp(node->name, "local_netmask")) {
493 netdev->local_netmask = atoi(node->data);
494 // XXX check valididy
495 netdev->valid_local_netmask = true;
497 VLOG_WARN("%s: unknown argument '%s'",
498 netdev_get_name(dev_), node->name);
501 if (netdev_pltap_finalized(netdev)) {
502 netdev->new_flags |= NETDEV_UP;
505 ovs_mutex_unlock(&netdev->mutex);
506 ovs_mutex_unlock(&sync_list_mutex);
511 netdev_pltap_rxq_recv(struct netdev_rxq *rx_, struct ofpbuf **packet, int *c)
513 struct netdev_rxq_pltap *rx = netdev_rxq_pltap_cast(rx_);
515 struct iovec iov[2] = {
516 { .iov_base = &pi, .iov_len = sizeof(pi) },
518 struct ofpbuf *buffer = NULL;
522 buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + ETH_PAYLOAD_MAX,
524 size = ofpbuf_tailroom(buffer);
525 iov[1].iov_base = buffer->data;
526 iov[1].iov_len = size;
529 retval = readv(rx->fd, iov, 2);
531 if (retval <= size) {
532 buffer->size += retval;
538 } else if (errno != EINTR) {
539 if (errno != EAGAIN) {
540 VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s",
541 netdev_rxq_get_name(rx_), ovs_strerror(errno));
549 ofpbuf_delete(buffer);
551 dp_packet_pad(buffer);
560 netdev_pltap_rxq_wait(struct netdev_rxq *rx_)
562 struct netdev_rxq_pltap *rx = netdev_rxq_pltap_cast(rx_);
563 struct netdev_pltap *netdev =
564 netdev_pltap_cast(rx->up.netdev);
565 if (rx->fd >= 0 && netdev_pltap_finalized(netdev)) {
566 poll_fd_wait(rx->fd, POLLIN);
571 netdev_pltap_send(struct netdev *netdev_, struct ofpbuf *pkt, bool may_steal)
573 const void *buffer = pkt->data;
574 size_t size = pkt->size;
575 struct netdev_pltap *dev =
576 netdev_pltap_cast(netdev_);
578 struct tun_pi pi = { 0, 0x86 };
579 struct iovec iov[2] = {
580 { .iov_base = &pi, .iov_len = sizeof(pi) },
581 { .iov_base = (char*) buffer, .iov_len = size }
589 retval = writev(dev->fd, iov, 2);
591 if (retval != size + 4) {
592 VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIdSIZE" bytes of %"PRIuSIZE") on %s",
593 retval, size + 4, netdev_get_name(netdev_));
596 } else if (errno != EINTR) {
597 if (errno != EAGAIN) {
598 VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
599 netdev_get_name(netdev_), ovs_strerror(errno));
613 netdev_pltap_send_wait(struct netdev *netdev_)
615 struct netdev_pltap *dev =
616 netdev_pltap_cast(netdev_);
617 if (dev->fd >= 0 && netdev_pltap_finalized(dev)) {
618 poll_fd_wait(dev->fd, POLLOUT);
623 netdev_pltap_rxq_drain(struct netdev_rxq *rx_)
625 struct netdev_rxq_pltap *rx = netdev_rxq_pltap_cast(rx_);
632 error = recv(rx->fd, buffer, 128, MSG_TRUNC);
634 if (error == -EAGAIN)
636 else if (error != -EMSGSIZE)
644 netdev_pltap_set_etheraddr(struct netdev *netdevi OVS_UNUSED,
645 const uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED)
651 // XXX from netdev-linux.c
653 get_etheraddr(struct netdev_pltap *dev, uint8_t ea[ETH_ADDR_LEN])
654 OVS_REQUIRES(dev->mutex)
660 memset(&ifr, 0, sizeof ifr);
661 ovs_strzcpy(ifr.ifr_name, dev->real_name, sizeof ifr.ifr_name);
662 error = af_inet_ifreq_ioctl(dev->real_name, &ifr,
663 SIOCGIFHWADDR, "SIOCGIFHWADDR");
667 hwaddr_family = ifr.ifr_hwaddr.sa_family;
668 if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
669 VLOG_WARN("%s device has unknown hardware address family %d",
670 dev->real_name, hwaddr_family);
672 memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
677 get_flags(struct netdev_pltap *dev, enum netdev_flags *flags)
678 OVS_REQUIRES(dev->mutex)
683 error = af_inet_ifreq_ioctl(dev->real_name, &ifr,
684 SIOCGIFFLAGS, "SIOCGIFFLAGS");
689 if (ifr.ifr_flags & IFF_UP)
691 if (ifr.ifr_flags & IFF_PROMISC)
692 *flags |= NETDEV_PROMISC;
697 netdev_pltap_get_etheraddr(const struct netdev *netdev,
698 uint8_t mac[ETH_ADDR_LEN])
700 struct netdev_pltap *dev =
701 netdev_pltap_cast(netdev);
704 ovs_mutex_lock(&dev->mutex);
709 error = get_etheraddr(dev, mac);
712 ovs_mutex_unlock(&dev->mutex);
717 // XXX can we read stats in planetlab?
719 netdev_pltap_get_stats(const struct netdev *netdev OVS_UNUSED, struct netdev_stats *stats OVS_UNUSED)
725 netdev_pltap_set_stats(struct netdev *netdev OVS_UNUSED, const struct netdev_stats *stats OVS_UNUSED)
732 netdev_pltap_update_flags(struct netdev *dev_,
733 enum netdev_flags off, enum netdev_flags on,
734 enum netdev_flags *old_flagsp)
736 struct netdev_pltap *netdev =
737 netdev_pltap_cast(dev_);
740 ovs_mutex_lock(&sync_list_mutex);
741 ovs_mutex_lock(&netdev->mutex);
742 if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
747 if (netdev_pltap_finalized(netdev)) {
748 error = get_flags(netdev, &netdev->flags);
750 *old_flagsp = netdev->flags;
751 netdev->new_flags |= on;
752 netdev->new_flags &= ~off;
753 if (netdev->flags != netdev->new_flags) {
754 /* we cannot sync here, since we may be in a signal handler */
759 ovs_mutex_unlock(&netdev->mutex);
760 ovs_mutex_unlock(&sync_list_mutex);
765 netdev_pltap_change_seq(const struct netdev *netdev)
767 struct netdev_pltap *dev =
768 netdev_pltap_cast(netdev);
769 unsigned int change_seq;
771 ovs_mutex_lock(&dev->mutex);
772 change_seq = dev->change_seq;
773 ovs_mutex_unlock(&dev->mutex);
778 /* Helper functions. */
781 netdev_pltap_update_seq(struct netdev_pltap *dev)
782 OVS_REQUIRES(dev->mutex)
785 if (!dev->change_seq) {
791 netdev_pltap_get_real_name(struct unixctl_conn *conn,
792 int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
794 struct netdev_pltap *pltap_dev;
796 ovs_mutex_lock(&pltap_netdevs_mutex);
797 pltap_dev = shash_find_data(&pltap_netdevs, argv[1]);
799 unixctl_command_reply_error(conn, "no such pltap netdev");
802 if (pltap_dev->fd < 0) {
803 unixctl_command_reply_error(conn, "no real device attached");
807 unixctl_command_reply(conn, pltap_dev->real_name);
810 ovs_mutex_unlock(&pltap_netdevs_mutex);
814 netdev_pltap_init(void)
816 unixctl_command_register("netdev-pltap/get-tapname", "port",
817 1, 1, netdev_pltap_get_real_name, NULL);
822 netdev_pltap_run(void)
824 struct netdev_pltap *iter, *next;
825 ovs_mutex_lock(&sync_list_mutex);
826 LIST_FOR_EACH_SAFE(iter, next, sync_list, &sync_list) {
827 netdev_pltap_sync_flags(iter);
829 ovs_mutex_unlock(&sync_list_mutex);
833 netdev_pltap_wait(void)
835 ovs_mutex_lock(&sync_list_mutex);
836 if (!list_is_empty(&sync_list)) {
837 VLOG_DBG("netdev_pltap: scheduling sync");
838 poll_immediate_wake();
840 ovs_mutex_unlock(&sync_list_mutex);
843 const struct netdev_class netdev_pltap_class = {
850 netdev_pltap_construct,
851 netdev_pltap_destruct,
852 netdev_pltap_dealloc,
853 netdev_pltap_get_config,
854 netdev_pltap_set_config,
855 NULL, /* get_tunnel_config */
858 netdev_pltap_send_wait,
860 netdev_pltap_set_etheraddr,
861 netdev_pltap_get_etheraddr,
864 NULL, /* get_ifindex */
865 NULL, /* get_carrier */
866 NULL, /* get_carrier_resets */
867 NULL, /* get_miimon */
868 netdev_pltap_get_stats,
869 netdev_pltap_set_stats,
871 NULL, /* get_features */
872 NULL, /* set_advertisements */
874 NULL, /* set_policing */
875 NULL, /* get_qos_types */
876 NULL, /* get_qos_capabilities */
879 NULL, /* get_queue */
880 NULL, /* set_queue */
881 NULL, /* delete_queue */
882 NULL, /* get_queue_stats */
883 NULL, /* queue_dump_start */
884 NULL, /* queue_dump_next */
885 NULL, /* queue_dump_done */
886 NULL, /* dump_queue_stats */
891 NULL, /* add_router */
892 NULL, /* get_next_hop */
893 NULL, /* get_drv_info */
894 NULL, /* arp_lookup */
896 netdev_pltap_update_flags,
898 netdev_pltap_rxq_alloc,
899 netdev_pltap_rxq_construct,
900 netdev_pltap_rxq_destruct,
901 netdev_pltap_rxq_dealloc,
902 netdev_pltap_rxq_recv,
903 netdev_pltap_rxq_wait,
904 netdev_pltap_rxq_drain,