2 * Copyright (c) 2012 Giuseppe Lettieri
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/types.h>
22 #include <arpa/inet.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
26 #include <net/if_arp.h>
27 #include <linux/if_tun.h>
28 #include <netinet/in.h>
33 #include "netdev-provider.h"
35 #include "ofp-print.h"
38 #include "poll-loop.h"
42 #include "socket-util.h"
46 VLOG_DEFINE_THIS_MODULE(netdev_pltap);
48 struct netdev_dev_pltap {
49 struct netdev_dev netdev_dev;
51 struct netdev_stats stats;
52 enum netdev_flags new_flags;
53 enum netdev_flags flags;
55 struct sockaddr_in local_addr;
58 bool valid_local_netmask;
59 bool sync_flags_needed;
60 struct list sync_list;
61 unsigned int change_seq;
64 static struct list sync_list;
70 static int af_inet_sock = -1;
72 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
74 static struct shash pltap_netdev_devs = SHASH_INITIALIZER(&pltap_netdev_devs);
76 static int netdev_pltap_create(const struct netdev_class *, const char *,
77 struct netdev_dev **);
79 static void netdev_pltap_update_seq(struct netdev_dev_pltap *);
80 static int get_flags(struct netdev_dev_pltap *dev, enum netdev_flags *flags);
83 netdev_pltap_finalized(struct netdev_dev_pltap *dev)
85 return dev->valid_local_ip && dev->valid_local_netmask;
89 is_pltap_class(const struct netdev_class *class)
91 return class->create == netdev_pltap_create;
94 static struct netdev_dev_pltap *
95 netdev_dev_pltap_cast(const struct netdev_dev *netdev_dev)
97 ovs_assert(is_pltap_class(netdev_dev_get_class(netdev_dev)));
98 return CONTAINER_OF(netdev_dev, struct netdev_dev_pltap, netdev_dev);
101 static struct netdev_pltap *
102 netdev_pltap_cast(const struct netdev *netdev)
104 struct netdev_dev *netdev_dev = netdev_get_dev(netdev);
105 ovs_assert(is_pltap_class(netdev_dev_get_class(netdev_dev)));
106 return CONTAINER_OF(netdev, struct netdev_pltap, netdev);
109 static void sync_needed(struct netdev_dev_pltap *dev)
111 if (dev->sync_flags_needed)
114 dev->sync_flags_needed = true;
115 list_insert(&sync_list, &dev->sync_list);
119 static void sync_done(struct netdev_dev_pltap *dev)
121 if (!dev->sync_flags_needed)
124 (void) list_remove(&dev->sync_list);
125 dev->sync_flags_needed = false;
129 netdev_pltap_create(const struct netdev_class *class OVS_UNUSED, const char *name,
130 struct netdev_dev **netdev_devp)
132 struct netdev_dev_pltap *netdev_dev;
135 netdev_dev = xzalloc(sizeof *netdev_dev);
137 netdev_dev->real_name = xzalloc(IFNAMSIZ + 1);
138 memset(&netdev_dev->local_addr, 0, sizeof(netdev_dev->local_addr));
139 netdev_dev->valid_local_ip = false;
140 netdev_dev->valid_local_netmask = false;
141 netdev_dev->flags = 0;
142 netdev_dev->sync_flags_needed = false;
143 list_init(&netdev_dev->sync_list);
146 /* Open tap device. */
147 netdev_dev->fd = tun_alloc(IFF_TAP, netdev_dev->real_name);
148 if (netdev_dev->fd < 0) {
150 VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s", name, strerror(error));
153 VLOG_DBG("real_name = %s", netdev_dev->real_name);
155 /* Make non-blocking. */
156 error = set_nonblocking(netdev_dev->fd);
161 netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_pltap_class);
162 shash_add(&pltap_netdev_devs, name, netdev_dev);
163 *netdev_devp = &netdev_dev->netdev_dev;
172 netdev_pltap_destroy(struct netdev_dev *netdev_dev_)
174 struct netdev_dev_pltap *netdev_dev = netdev_dev_pltap_cast(netdev_dev_);
176 if (netdev_dev->fd != -1)
177 close(netdev_dev->fd);
179 sync_done(netdev_dev);
181 shash_find_and_delete(&pltap_netdev_devs,
182 netdev_dev_get_name(netdev_dev_));
187 netdev_pltap_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
189 struct netdev_pltap *netdev;
191 netdev = xmalloc(sizeof *netdev);
192 netdev_init(&netdev->netdev, netdev_dev_);
194 *netdevp = &netdev->netdev;
199 netdev_pltap_close(struct netdev *netdev_)
201 struct netdev_pltap *netdev = netdev_pltap_cast(netdev_);
205 static int vsys_transaction(const char *script,
206 const char **preply, char *format, ...)
208 char *msg = NULL, *reply = NULL;
209 const size_t reply_size = 1024;
210 int ifd = -1, ofd = -1, maxfd;
211 size_t bytes_to_write, bytes_to_read,
212 bytes_written = 0, bytes_read = 0;
214 char *ofname = NULL, *ifname = NULL;
217 va_start(args, format);
218 msg = xvasprintf(format, args);
220 reply = (char*)xmalloc(reply_size);
221 if (!msg || !reply) {
222 VLOG_ERR("Out of memory");
227 ofname = xasprintf("/vsys/%s.out", script);
228 ifname = xasprintf("/vsys/%s.in", script);
229 if (!ofname || !ifname) {
230 VLOG_ERR("Out of memory");
235 ofd = open(ofname, O_RDONLY | O_NONBLOCK);
237 VLOG_ERR("Cannot open %s: %s", ofname, strerror(errno));
241 ifd = open(ifname, O_WRONLY | O_NONBLOCK);
243 VLOG_ERR("Cannot open %s: %s", ifname, strerror(errno));
247 maxfd = (ifd < ofd) ? ofd : ifd;
249 bytes_to_write = strlen(msg);
250 bytes_to_read = reply_size;
251 while (bytes_to_write || bytes_to_read) {
252 fd_set readset, writeset, errorset;
257 if (bytes_to_write) {
258 FD_SET(ifd, &writeset);
259 FD_SET(ifd, &errorset);
261 FD_SET(ofd, &readset);
262 FD_SET(ofd, &errorset);
263 if (select(maxfd + 1, &readset, &writeset, &errorset, NULL) < 0) {
266 VLOG_ERR("selec error: %s", strerror(errno));
270 if (FD_ISSET(ifd, &errorset) || FD_ISSET(ofd, &errorset)) {
271 VLOG_ERR("error condition on ifd or ofd");
274 if (FD_ISSET(ifd, &writeset)) {
275 ssize_t n = write(ifd, msg + bytes_written, bytes_to_write);
277 if (errno != EAGAIN && errno != EINTR) {
278 VLOG_ERR("write on %s: %s", ifname, strerror(errno));
285 if (bytes_to_write == 0)
289 if (FD_ISSET(ofd, &readset)) {
290 ssize_t n = read(ofd, reply + bytes_read, bytes_to_read);
292 if (errno != EAGAIN && errno != EINTR) {
293 VLOG_ERR("read on %s: %s", ofname, strerror(errno));
306 reply[bytes_read] = '\0';
309 reply = NULL; /* prevent freeing the reply msg */
311 VLOG_ERR("%s returned: %s", script, reply);
328 netdev_pltap_up(struct netdev_dev_pltap *dev)
330 if (!netdev_pltap_finalized(dev)) {
334 return vsys_transaction("vif_up", NULL, "%s\n"IP_FMT"\n%d\n",
336 IP_ARGS(dev->local_addr.sin_addr.s_addr),
341 netdev_pltap_down(struct netdev_dev_pltap *dev)
343 if (!netdev_pltap_finalized(dev)) {
347 return vsys_transaction("vif_down", NULL, "%s\n", dev->real_name);
351 netdev_pltap_promisc(struct netdev_dev_pltap *dev, bool promisc)
353 if (!netdev_pltap_finalized(dev)) {
357 return vsys_transaction("promisc", NULL, "%s\n%s",
359 (promisc ? "" : "-\n"));
363 netdev_pltap_sync_flags(struct netdev_dev_pltap *dev)
366 if (dev->fd < 0 || !netdev_pltap_finalized(dev)) {
371 VLOG_DBG("sync_flags(%s): current: %s %s target: %s %s",
373 (dev->flags & NETDEV_UP ? "UP" : "-"),
374 (dev->flags & NETDEV_PROMISC ? "PROMISC" : "-"),
375 (dev->new_flags & NETDEV_UP ? "UP" : "-"),
376 (dev->new_flags & NETDEV_PROMISC ? "PROMISC" : "-"));
378 if ((dev->new_flags & NETDEV_UP) && !(dev->flags & NETDEV_UP)) {
379 (void) netdev_pltap_up(dev);
380 } else if (!(dev->new_flags & NETDEV_UP) && (dev->flags & NETDEV_UP)) {
381 (void) netdev_pltap_down(dev);
384 if ((dev->new_flags & NETDEV_PROMISC) ^ (dev->flags & NETDEV_PROMISC)) {
385 (void) netdev_pltap_promisc(dev, dev->new_flags & NETDEV_PROMISC);
388 netdev_pltap_update_seq(dev);
394 netdev_pltap_get_config(struct netdev_dev *dev_, struct smap *args)
396 struct netdev_dev_pltap *netdev_dev = netdev_dev_pltap_cast(dev_);
398 if (netdev_dev->valid_local_ip)
399 smap_add_format(args, "local_ip", IP_FMT,
400 IP_ARGS(netdev_dev->local_addr.sin_addr.s_addr));
401 if (netdev_dev->valid_local_netmask)
402 smap_add_format(args, "local_netmask", "%"PRIu32,
403 ntohs(netdev_dev->local_netmask));
408 netdev_pltap_set_config(struct netdev_dev *dev_, const struct smap *args)
410 struct netdev_dev_pltap *netdev_dev = netdev_dev_pltap_cast(dev_);
411 struct shash_node *node;
413 VLOG_DBG("pltap_set_config(%s)", netdev_dev_get_name(dev_));
414 SMAP_FOR_EACH(node, args) {
415 VLOG_DBG("arg: %s->%s", node->name, (char*)node->data);
416 if (!strcmp(node->name, "local_ip")) {
418 if (lookup_ip(node->data, &addr)) {
419 VLOG_WARN("%s: bad 'local_ip'", node->name);
421 netdev_dev->local_addr.sin_addr = addr;
422 netdev_dev->valid_local_ip = true;
424 } else if (!strcmp(node->name, "local_netmask")) {
425 netdev_dev->local_netmask = atoi(node->data);
426 // XXX check valididy
427 netdev_dev->valid_local_netmask = true;
429 VLOG_WARN("%s: unknown argument '%s'",
430 netdev_dev_get_name(dev_), node->name);
433 if (netdev_pltap_finalized(netdev_dev)) {
434 netdev_dev->new_flags |= NETDEV_UP;
435 sync_needed(netdev_dev);
441 netdev_pltap_listen(struct netdev *netdev_ OVS_UNUSED)
443 struct netdev_dev_pltap *dev =
444 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
445 if (!netdev_pltap_finalized(dev))
447 return netdev_pltap_up(dev);
451 netdev_pltap_recv(struct netdev *netdev_, void *buffer, size_t size)
453 struct netdev_dev_pltap *dev =
454 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
456 struct iovec iov[2] = {
457 { .iov_base = prefix, .iov_len = 4 },
458 { .iov_base = buffer, .iov_len = size }
462 retval = readv(dev->fd, iov, 2);
464 if (retval <= size) {
469 } else if (errno != EINTR) {
470 if (errno != EAGAIN) {
471 VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s",
472 netdev_get_name(netdev_), strerror(errno));
480 netdev_pltap_recv_wait(struct netdev *netdev_)
482 struct netdev_dev_pltap *dev =
483 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
484 if (dev->fd >= 0 && netdev_pltap_finalized(dev)) {
485 poll_fd_wait(dev->fd, POLLIN);
490 netdev_pltap_send(struct netdev *netdev_, const void *buffer, size_t size)
492 struct netdev_dev_pltap *dev =
493 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
494 char prefix[4] = { 0, 0, 8, 6 };
495 struct iovec iov[2] = {
496 { .iov_base = prefix, .iov_len = 4 },
497 { .iov_base = (char*) buffer, .iov_len = size }
503 retval = writev(dev->fd, iov, 2);
505 if (retval != size + 4) {
506 VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of %zu) on %s",
507 retval, size + 4, netdev_get_name(netdev_));
510 } else if (errno != EINTR) {
511 if (errno != EAGAIN) {
512 VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
513 netdev_get_name(netdev_), strerror(errno));
521 netdev_pltap_send_wait(struct netdev *netdev_)
523 struct netdev_dev_pltap *dev =
524 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
525 if (dev->fd >= 0 && netdev_pltap_finalized(dev)) {
526 poll_fd_wait(dev->fd, POLLOUT);
531 netdev_pltap_drain(struct netdev *netdev_)
533 struct netdev_dev_pltap *dev =
534 netdev_dev_pltap_cast(netdev_get_dev(netdev_));
541 error = recv(dev->fd, buffer, 128, MSG_TRUNC);
543 if (error == -EAGAIN)
545 else if (error != -EMSGSIZE)
553 netdev_pltap_set_etheraddr(struct netdev *netdevi OVS_UNUSED,
554 const uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED)
560 // XXX from netdev-linux.c
562 get_etheraddr(struct netdev_dev_pltap *dev, uint8_t ea[ETH_ADDR_LEN])
567 memset(&ifr, 0, sizeof ifr);
568 ovs_strzcpy(ifr.ifr_name, dev->real_name, sizeof ifr.ifr_name);
569 if (ioctl(af_inet_sock, SIOCGIFHWADDR, &ifr) < 0) {
570 /* ENODEV probably means that a vif disappeared asynchronously and
571 * hasn't been removed from the database yet, so reduce the log level
572 * to INFO for that case. */
573 VLOG(errno == ENODEV ? VLL_INFO : VLL_ERR,
574 "ioctl(SIOCGIFHWADDR) on %s device failed: %s",
575 dev->real_name, strerror(errno));
578 hwaddr_family = ifr.ifr_hwaddr.sa_family;
579 if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
580 VLOG_WARN("%s device has unknown hardware address family %d",
581 dev->real_name, hwaddr_family);
583 memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
588 get_flags(struct netdev_dev_pltap *dev, enum netdev_flags *flags)
592 memset(&ifr, 0, sizeof ifr);
593 ovs_strzcpy(ifr.ifr_name, dev->real_name, sizeof ifr.ifr_name);
594 if (ioctl(af_inet_sock, SIOCGIFFLAGS, &ifr) < 0)
597 if (ifr.ifr_flags & IFF_UP)
599 if (ifr.ifr_flags & IFF_PROMISC)
600 *flags |= NETDEV_PROMISC;
605 netdev_pltap_get_etheraddr(const struct netdev *netdev,
606 uint8_t mac[ETH_ADDR_LEN])
608 struct netdev_dev_pltap *dev =
609 netdev_dev_pltap_cast(netdev_get_dev(netdev));
612 return get_etheraddr(dev, mac);
616 // XXX can we read stats in planetlab?
618 netdev_pltap_get_stats(const struct netdev *netdev OVS_UNUSED, struct netdev_stats *stats OVS_UNUSED)
624 netdev_pltap_set_stats(struct netdev *netdev OVS_UNUSED, const struct netdev_stats *stats OVS_UNUSED)
631 netdev_pltap_update_flags(struct netdev *netdev,
632 enum netdev_flags off, enum netdev_flags on,
633 enum netdev_flags *old_flagsp)
635 struct netdev_dev_pltap *dev =
636 netdev_dev_pltap_cast(netdev_get_dev(netdev));
639 if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
643 if (netdev_pltap_finalized(dev)) {
644 error = get_flags(dev, &dev->flags);
646 *old_flagsp = dev->flags;
647 dev->new_flags |= on;
648 dev->new_flags &= ~off;
649 if (dev->flags != dev->new_flags) {
650 /* we cannot sync here, since we may be in a signal handler */
658 netdev_pltap_change_seq(const struct netdev *netdev)
660 return netdev_dev_pltap_cast(netdev_get_dev(netdev))->change_seq;
663 /* Helper functions. */
666 netdev_pltap_update_seq(struct netdev_dev_pltap *dev)
669 if (!dev->change_seq) {
675 netdev_pltap_get_real_name(struct unixctl_conn *conn,
676 int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
678 struct netdev_dev_pltap *pltap_dev;
680 pltap_dev = shash_find_data(&pltap_netdev_devs, argv[1]);
682 unixctl_command_reply_error(conn, "no such pltap netdev");
685 if (pltap_dev->fd < 0) {
686 unixctl_command_reply_error(conn, "no real device attached");
690 unixctl_command_reply(conn, pltap_dev->real_name);
694 netdev_pltap_init(void)
696 list_init(&sync_list);
697 af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0);
698 if (af_inet_sock < 0) {
699 VLOG_ERR("failed to create inet socket: %s", strerror(errno));
701 unixctl_command_register("netdev-pltap/get-tapname", "port",
702 1, 1, netdev_pltap_get_real_name, NULL);
707 netdev_pltap_run(void)
709 struct netdev_dev_pltap *iter, *next;
710 LIST_FOR_EACH_SAFE(iter, next, sync_list, &sync_list) {
711 netdev_pltap_sync_flags(iter);
716 netdev_pltap_wait(void)
718 if (!list_is_empty(&sync_list)) {
719 VLOG_DBG("netdev_pltap: scheduling sync");
720 poll_immediate_wake();
724 const struct netdev_class netdev_pltap_class = {
731 netdev_pltap_destroy,
732 netdev_pltap_get_config,
733 netdev_pltap_set_config,
734 NULL, /* get_tunnel_config */
741 netdev_pltap_recv_wait,
745 netdev_pltap_send_wait,
747 netdev_pltap_set_etheraddr,
748 netdev_pltap_get_etheraddr,
751 NULL, /* get_ifindex */
752 NULL, /* get_carrier */
753 NULL, /* get_carrier_resets */
754 NULL, /* get_miimon */
755 netdev_pltap_get_stats,
756 netdev_pltap_set_stats,
758 NULL, /* get_features */
759 NULL, /* set_advertisements */
761 NULL, /* set_policing */
762 NULL, /* get_qos_types */
763 NULL, /* get_qos_capabilities */
766 NULL, /* get_queue */
767 NULL, /* set_queue */
768 NULL, /* delete_queue */
769 NULL, /* get_queue_stats */
770 NULL, /* dump_queues */
771 NULL, /* dump_queue_stats */
776 NULL, /* add_router */
777 NULL, /* get_next_hop */
778 NULL, /* get_drv_info */
779 NULL, /* arp_lookup */
781 netdev_pltap_update_flags,
783 netdev_pltap_change_seq