2 * Copyright (c) 2012 Giuseppe Lettieri
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/types.h>
22 #include <arpa/inet.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
26 #include <net/if_arp.h>
27 #include <linux/if_tun.h>
28 #include <netinet/in.h>
33 #include "netdev-provider.h"
35 #include "ofp-print.h"
38 #include "poll-loop.h"
42 #include "socket-util.h"
46 VLOG_DEFINE_THIS_MODULE(netdev_pltap);
48 /* Protects 'sync_list'. */
49 static struct ovs_mutex sync_list_mutex = OVS_MUTEX_INITIALIZER;
51 static struct list sync_list OVS_GUARDED_BY(sync_list_mutex)
52 = LIST_INITIALIZER(&sync_list);
58 struct list sync_list OVS_GUARDED_BY(sync_list_mutex);
60 /* Protects all members below. */
61 struct ovs_mutex mutex OVS_ACQ_AFTER(sync_list_mutex);
64 struct netdev_stats stats;
65 enum netdev_flags new_flags;
66 enum netdev_flags flags;
68 struct sockaddr_in local_addr;
71 bool valid_local_netmask;
72 bool sync_flags_needed;
73 unsigned int change_seq;
77 struct netdev_rx_pltap {
82 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
84 /* Protects 'pltap_netdevs' */
85 static struct ovs_mutex pltap_netdevs_mutex = OVS_MUTEX_INITIALIZER;
86 static struct shash pltap_netdevs OVS_GUARDED_BY(pltap_netdevs_mutex)
87 = SHASH_INITIALIZER(&pltap_netdevs);
89 static int netdev_pltap_construct(struct netdev *netdev_);
91 static void netdev_pltap_update_seq(struct netdev_pltap *)
92 OVS_REQUIRES(dev->mutex);
93 static int get_flags(struct netdev_pltap *dev, enum netdev_flags *flags)
94 OVS_REQUIRES(dev->mutex);
97 netdev_pltap_finalized(struct netdev_pltap *dev)
98 OVS_REQUIRES(dev->mutex)
100 return dev->valid_local_ip && dev->valid_local_netmask;
104 is_netdev_pltap_class(const struct netdev_class *class)
106 return class->construct == netdev_pltap_construct;
109 static struct netdev_pltap *
110 netdev_pltap_cast(const struct netdev *netdev)
112 ovs_assert(is_netdev_pltap_class(netdev_get_class(netdev)));
113 return CONTAINER_OF(netdev, struct netdev_pltap, up);
116 static struct netdev_rx_pltap*
117 netdev_rx_pltap_cast(const struct netdev_rx *rx)
119 ovs_assert(is_netdev_pltap_class(netdev_get_class(rx->netdev)));
120 return CONTAINER_OF(rx, struct netdev_rx_pltap, up);
123 static void sync_needed(struct netdev_pltap *dev)
124 OVS_REQUIRES(dev->mutex, sync_list_mutex)
126 if (dev->sync_flags_needed)
129 dev->sync_flags_needed = true;
130 list_insert(&sync_list, &dev->sync_list);
133 static void sync_done(struct netdev_pltap *dev)
134 OVS_REQUIRES(dev->mutex, sync_list_mutex)
136 if (!dev->sync_flags_needed)
139 (void) list_remove(&dev->sync_list);
140 dev->sync_flags_needed = false;
143 static struct netdev *
144 netdev_pltap_alloc(void)
146 struct netdev_pltap *netdev = xzalloc(sizeof *netdev);
151 netdev_pltap_construct(struct netdev *netdev_)
153 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
156 ovs_mutex_init(&netdev->mutex);
157 netdev->real_name = xzalloc(IFNAMSIZ + 1);
158 memset(&netdev->local_addr, 0, sizeof(netdev->local_addr));
159 netdev->valid_local_ip = false;
160 netdev->valid_local_netmask = false;
162 netdev->sync_flags_needed = false;
163 netdev->change_seq = 1;
166 /* Open tap device. */
167 netdev->fd = tun_alloc(IFF_TAP, netdev->real_name);
168 if (netdev->fd < 0) {
170 VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s",
171 netdev_get_name(netdev_), ovs_strerror(error));
174 VLOG_DBG("real_name = %s", netdev->real_name);
176 /* Make non-blocking. */
177 error = set_nonblocking(netdev->fd);
182 ovs_mutex_lock(&pltap_netdevs_mutex);
183 shash_add(&pltap_netdevs, netdev_get_name(netdev_), netdev);
184 ovs_mutex_unlock(&pltap_netdevs_mutex);
189 netdev_pltap_destruct(struct netdev *netdev_)
191 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
193 ovs_mutex_lock(&pltap_netdevs_mutex);
194 if (netdev->fd != -1)
197 if (netdev->sync_flags_needed) {
198 ovs_mutex_lock(&sync_list_mutex);
199 (void) list_remove(&netdev->sync_list);
200 ovs_mutex_unlock(&sync_list_mutex);
203 shash_find_and_delete(&pltap_netdevs,
204 netdev_get_name(netdev_));
205 ovs_mutex_unlock(&pltap_netdevs_mutex);
206 ovs_mutex_destroy(&netdev->mutex);
210 netdev_pltap_dealloc(struct netdev *netdev_)
212 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
216 static int netdev_pltap_up(struct netdev_pltap *dev) OVS_REQUIRES(dev->mutex);
218 static struct netdev_rx *
219 netdev_pltap_rx_alloc(void)
221 struct netdev_rx_pltap *rx = xzalloc(sizeof *rx);
226 netdev_pltap_rx_construct(struct netdev_rx *rx_)
228 struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_);
229 struct netdev *netdev_ = rx->up.netdev;
230 struct netdev_pltap *netdev =
231 netdev_pltap_cast(netdev_);
234 ovs_mutex_lock(&netdev->mutex);
236 if (!netdev_pltap_finalized(netdev))
238 error = netdev_pltap_up(netdev);
243 ovs_mutex_unlock(&netdev->mutex);
248 netdev_pltap_rx_destruct(struct netdev_rx *rx_ OVS_UNUSED)
253 netdev_pltap_rx_dealloc(struct netdev_rx *rx_)
255 struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_);
260 static int vsys_transaction(const char *script,
261 const char **preply, char *format, ...)
263 char *msg = NULL, *reply = NULL;
264 const size_t reply_size = 1024;
265 int ifd = -1, ofd = -1, maxfd;
266 size_t bytes_to_write, bytes_to_read,
267 bytes_written = 0, bytes_read = 0;
269 char *ofname = NULL, *ifname = NULL;
272 va_start(args, format);
273 msg = xvasprintf(format, args);
275 reply = (char*)xmalloc(reply_size);
276 if (!msg || !reply) {
277 VLOG_ERR("Out of memory");
282 ofname = xasprintf("/vsys/%s.out", script);
283 ifname = xasprintf("/vsys/%s.in", script);
284 if (!ofname || !ifname) {
285 VLOG_ERR("Out of memory");
290 ofd = open(ofname, O_RDONLY | O_NONBLOCK);
292 VLOG_ERR("Cannot open %s: %s", ofname, ovs_strerror(errno));
296 ifd = open(ifname, O_WRONLY | O_NONBLOCK);
298 VLOG_ERR("Cannot open %s: %s", ifname, ovs_strerror(errno));
302 maxfd = (ifd < ofd) ? ofd : ifd;
304 bytes_to_write = strlen(msg);
305 bytes_to_read = reply_size;
306 while (bytes_to_write || bytes_to_read) {
307 fd_set readset, writeset, errorset;
312 if (bytes_to_write) {
313 FD_SET(ifd, &writeset);
314 FD_SET(ifd, &errorset);
316 FD_SET(ofd, &readset);
317 FD_SET(ofd, &errorset);
318 if (select(maxfd + 1, &readset, &writeset, &errorset, NULL) < 0) {
321 VLOG_ERR("selec error: %s", ovs_strerror(errno));
325 if (FD_ISSET(ifd, &errorset) || FD_ISSET(ofd, &errorset)) {
326 VLOG_ERR("error condition on ifd or ofd");
329 if (FD_ISSET(ifd, &writeset)) {
330 ssize_t n = write(ifd, msg + bytes_written, bytes_to_write);
332 if (errno != EAGAIN && errno != EINTR) {
333 VLOG_ERR("write on %s: %s", ifname, ovs_strerror(errno));
340 if (bytes_to_write == 0)
344 if (FD_ISSET(ofd, &readset)) {
345 ssize_t n = read(ofd, reply + bytes_read, bytes_to_read);
347 if (errno != EAGAIN && errno != EINTR) {
348 VLOG_ERR("read on %s: %s", ofname, ovs_strerror(errno));
361 reply[bytes_read] = '\0';
364 reply = NULL; /* prevent freeing the reply msg */
366 VLOG_ERR("%s returned: %s", script, reply);
383 netdev_pltap_up(struct netdev_pltap *dev)
384 OVS_REQUIRES(dev->mutex)
386 if (!netdev_pltap_finalized(dev)) {
390 return vsys_transaction("vif_up", NULL, "%s\n"IP_FMT"\n%d\n",
392 IP_ARGS(dev->local_addr.sin_addr.s_addr),
397 netdev_pltap_down(struct netdev_pltap *dev)
398 OVS_REQUIRES(dev->mutex)
400 if (!netdev_pltap_finalized(dev)) {
404 return vsys_transaction("vif_down", NULL, "%s\n", dev->real_name);
408 netdev_pltap_promisc(struct netdev_pltap *dev, bool promisc)
409 OVS_REQUIRES(dev-mutex)
411 if (!netdev_pltap_finalized(dev)) {
415 return vsys_transaction("promisc", NULL, "%s\n%s",
417 (promisc ? "" : "-\n"));
421 netdev_pltap_sync_flags(struct netdev_pltap *dev)
422 OVS_REQUIRES(sync_list_mutex)
425 ovs_mutex_lock(&dev->mutex);
427 if (dev->fd < 0 || !netdev_pltap_finalized(dev)) {
431 VLOG_DBG("sync_flags(%s): current: %s %s target: %s %s",
433 (dev->flags & NETDEV_UP ? "UP" : "-"),
434 (dev->flags & NETDEV_PROMISC ? "PROMISC" : "-"),
435 (dev->new_flags & NETDEV_UP ? "UP" : "-"),
436 (dev->new_flags & NETDEV_PROMISC ? "PROMISC" : "-"));
438 if ((dev->new_flags & NETDEV_UP) && !(dev->flags & NETDEV_UP)) {
439 (void) netdev_pltap_up(dev);
440 } else if (!(dev->new_flags & NETDEV_UP) && (dev->flags & NETDEV_UP)) {
441 (void) netdev_pltap_down(dev);
444 if ((dev->new_flags & NETDEV_PROMISC) ^ (dev->flags & NETDEV_PROMISC)) {
445 (void) netdev_pltap_promisc(dev, dev->new_flags & NETDEV_PROMISC);
448 netdev_pltap_update_seq(dev);
452 ovs_mutex_unlock(&dev->mutex);
457 netdev_pltap_get_config(const struct netdev *dev_, struct smap *args)
459 struct netdev_pltap *netdev = netdev_pltap_cast(dev_);
461 ovs_mutex_lock(&netdev->mutex);
462 if (netdev->valid_local_ip)
463 smap_add_format(args, "local_ip", IP_FMT,
464 IP_ARGS(netdev->local_addr.sin_addr.s_addr));
465 if (netdev->valid_local_netmask)
466 smap_add_format(args, "local_netmask", "%"PRIu32,
467 ntohs(netdev->local_netmask));
468 ovs_mutex_unlock(&netdev->mutex);
473 netdev_pltap_set_config(struct netdev *dev_, const struct smap *args)
475 struct netdev_pltap *netdev = netdev_pltap_cast(dev_);
476 struct shash_node *node;
478 ovs_mutex_lock(&sync_list_mutex);
479 ovs_mutex_lock(&netdev->mutex);
480 VLOG_DBG("pltap_set_config(%s)", netdev_get_name(dev_));
481 SMAP_FOR_EACH(node, args) {
482 VLOG_DBG("arg: %s->%s", node->name, (char*)node->data);
483 if (!strcmp(node->name, "local_ip")) {
485 if (lookup_ip(node->data, &addr)) {
486 VLOG_WARN("%s: bad 'local_ip'", node->name);
488 netdev->local_addr.sin_addr = addr;
489 netdev->valid_local_ip = true;
491 } else if (!strcmp(node->name, "local_netmask")) {
492 netdev->local_netmask = atoi(node->data);
493 // XXX check valididy
494 netdev->valid_local_netmask = true;
496 VLOG_WARN("%s: unknown argument '%s'",
497 netdev_get_name(dev_), node->name);
500 if (netdev_pltap_finalized(netdev)) {
501 netdev->new_flags |= NETDEV_UP;
504 ovs_mutex_unlock(&netdev->mutex);
505 ovs_mutex_unlock(&sync_list_mutex);
510 netdev_pltap_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
512 size_t size = ofpbuf_tailroom(buffer);
513 struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_);
515 struct iovec iov[2] = {
516 { .iov_base = &pi, .iov_len = sizeof(pi) },
517 { .iov_base = buffer->data, .iov_len = size }
521 retval = readv(rx->fd, iov, 2);
523 if (retval <= size) {
524 buffer->size += retval;
529 } else if (errno != EINTR) {
530 if (errno != EAGAIN) {
531 VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s",
532 netdev_rx_get_name(rx_), ovs_strerror(errno));
540 netdev_pltap_rx_wait(struct netdev_rx *rx_)
542 struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_);
543 struct netdev_pltap *netdev =
544 netdev_pltap_cast(rx->up.netdev);
545 if (rx->fd >= 0 && netdev_pltap_finalized(netdev)) {
546 poll_fd_wait(rx->fd, POLLIN);
551 netdev_pltap_send(struct netdev *netdev_, const void *buffer, size_t size)
553 struct netdev_pltap *dev =
554 netdev_pltap_cast(netdev_);
555 struct tun_pi pi = { 0, 0x86 };
556 struct iovec iov[2] = {
557 { .iov_base = &pi, .iov_len = sizeof(pi) },
558 { .iov_base = (char*) buffer, .iov_len = size }
564 retval = writev(dev->fd, iov, 2);
566 if (retval != size + 4) {
567 VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIdSIZE" bytes of %"PRIuSIZE") on %s",
568 retval, size + 4, netdev_get_name(netdev_));
571 } else if (errno != EINTR) {
572 if (errno != EAGAIN) {
573 VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
574 netdev_get_name(netdev_), ovs_strerror(errno));
582 netdev_pltap_send_wait(struct netdev *netdev_)
584 struct netdev_pltap *dev =
585 netdev_pltap_cast(netdev_);
586 if (dev->fd >= 0 && netdev_pltap_finalized(dev)) {
587 poll_fd_wait(dev->fd, POLLOUT);
592 netdev_pltap_rx_drain(struct netdev_rx *rx_)
594 struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_);
601 error = recv(rx->fd, buffer, 128, MSG_TRUNC);
603 if (error == -EAGAIN)
605 else if (error != -EMSGSIZE)
613 netdev_pltap_set_etheraddr(struct netdev *netdevi OVS_UNUSED,
614 const uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED)
620 // XXX from netdev-linux.c
622 get_etheraddr(struct netdev_pltap *dev, uint8_t ea[ETH_ADDR_LEN])
623 OVS_REQUIRES(dev->mutex)
629 memset(&ifr, 0, sizeof ifr);
630 ovs_strzcpy(ifr.ifr_name, dev->real_name, sizeof ifr.ifr_name);
631 error = af_inet_ifreq_ioctl(dev->real_name, &ifr,
632 SIOCGIFHWADDR, "SIOCGIFHWADDR");
636 hwaddr_family = ifr.ifr_hwaddr.sa_family;
637 if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
638 VLOG_WARN("%s device has unknown hardware address family %d",
639 dev->real_name, hwaddr_family);
641 memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
646 get_flags(struct netdev_pltap *dev, enum netdev_flags *flags)
647 OVS_REQUIRES(dev->mutex)
652 error = af_inet_ifreq_ioctl(dev->real_name, &ifr,
653 SIOCGIFFLAGS, "SIOCGIFFLAGS");
658 if (ifr.ifr_flags & IFF_UP)
660 if (ifr.ifr_flags & IFF_PROMISC)
661 *flags |= NETDEV_PROMISC;
666 netdev_pltap_get_etheraddr(const struct netdev *netdev,
667 uint8_t mac[ETH_ADDR_LEN])
669 struct netdev_pltap *dev =
670 netdev_pltap_cast(netdev);
673 ovs_mutex_lock(&dev->mutex);
678 error = get_etheraddr(dev, mac);
681 ovs_mutex_unlock(&dev->mutex);
686 // XXX can we read stats in planetlab?
688 netdev_pltap_get_stats(const struct netdev *netdev OVS_UNUSED, struct netdev_stats *stats OVS_UNUSED)
694 netdev_pltap_set_stats(struct netdev *netdev OVS_UNUSED, const struct netdev_stats *stats OVS_UNUSED)
701 netdev_pltap_update_flags(struct netdev *dev_,
702 enum netdev_flags off, enum netdev_flags on,
703 enum netdev_flags *old_flagsp)
705 struct netdev_pltap *netdev =
706 netdev_pltap_cast(dev_);
709 ovs_mutex_lock(&sync_list_mutex);
710 ovs_mutex_lock(&netdev->mutex);
711 if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
716 if (netdev_pltap_finalized(netdev)) {
717 error = get_flags(netdev, &netdev->flags);
719 *old_flagsp = netdev->flags;
720 netdev->new_flags |= on;
721 netdev->new_flags &= ~off;
722 if (netdev->flags != netdev->new_flags) {
723 /* we cannot sync here, since we may be in a signal handler */
728 ovs_mutex_unlock(&netdev->mutex);
729 ovs_mutex_unlock(&sync_list_mutex);
734 netdev_pltap_change_seq(const struct netdev *netdev)
736 struct netdev_pltap *dev =
737 netdev_pltap_cast(netdev);
738 unsigned int change_seq;
740 ovs_mutex_lock(&dev->mutex);
741 change_seq = dev->change_seq;
742 ovs_mutex_unlock(&dev->mutex);
747 /* Helper functions. */
750 netdev_pltap_update_seq(struct netdev_pltap *dev)
751 OVS_REQUIRES(dev->mutex)
754 if (!dev->change_seq) {
760 netdev_pltap_get_real_name(struct unixctl_conn *conn,
761 int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
763 struct netdev_pltap *pltap_dev;
765 ovs_mutex_lock(&pltap_netdevs_mutex);
766 pltap_dev = shash_find_data(&pltap_netdevs, argv[1]);
768 unixctl_command_reply_error(conn, "no such pltap netdev");
771 if (pltap_dev->fd < 0) {
772 unixctl_command_reply_error(conn, "no real device attached");
776 unixctl_command_reply(conn, pltap_dev->real_name);
779 ovs_mutex_unlock(&pltap_netdevs_mutex);
783 netdev_pltap_init(void)
785 unixctl_command_register("netdev-pltap/get-tapname", "port",
786 1, 1, netdev_pltap_get_real_name, NULL);
791 netdev_pltap_run(void)
793 struct netdev_pltap *iter, *next;
794 ovs_mutex_lock(&sync_list_mutex);
795 LIST_FOR_EACH_SAFE(iter, next, sync_list, &sync_list) {
796 netdev_pltap_sync_flags(iter);
798 ovs_mutex_unlock(&sync_list_mutex);
802 netdev_pltap_wait(void)
804 ovs_mutex_lock(&sync_list_mutex);
805 if (!list_is_empty(&sync_list)) {
806 VLOG_DBG("netdev_pltap: scheduling sync");
807 poll_immediate_wake();
809 ovs_mutex_unlock(&sync_list_mutex);
812 const struct netdev_class netdev_pltap_class = {
819 netdev_pltap_construct,
820 netdev_pltap_destruct,
821 netdev_pltap_dealloc,
822 netdev_pltap_get_config,
823 netdev_pltap_set_config,
824 NULL, /* get_tunnel_config */
827 netdev_pltap_send_wait,
829 netdev_pltap_set_etheraddr,
830 netdev_pltap_get_etheraddr,
833 NULL, /* get_ifindex */
834 NULL, /* get_carrier */
835 NULL, /* get_carrier_resets */
836 NULL, /* get_miimon */
837 netdev_pltap_get_stats,
838 netdev_pltap_set_stats,
840 NULL, /* get_features */
841 NULL, /* set_advertisements */
843 NULL, /* set_policing */
844 NULL, /* get_qos_types */
845 NULL, /* get_qos_capabilities */
848 NULL, /* get_queue */
849 NULL, /* set_queue */
850 NULL, /* delete_queue */
851 NULL, /* get_queue_stats */
852 NULL, /* queue_dump_start */
853 NULL, /* queue_dump_next */
854 NULL, /* queue_dump_done */
855 NULL, /* dump_queue_stats */
860 NULL, /* add_router */
861 NULL, /* get_next_hop */
862 NULL, /* get_drv_info */
863 NULL, /* arp_lookup */
865 netdev_pltap_update_flags,
867 netdev_pltap_rx_alloc,
868 netdev_pltap_rx_construct,
869 netdev_pltap_rx_destruct,
870 netdev_pltap_rx_dealloc,
871 netdev_pltap_rx_recv,
872 netdev_pltap_rx_wait,
873 netdev_pltap_rx_drain,