From: Giuseppe Lettieri Date: Sat, 17 Aug 2013 13:08:21 +0000 (+0200) Subject: Merge branch 'mainstream' X-Git-Tag: sliver-openvswitch-2.0.90-1~24 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=77d14d9c7f9ce7245eff56aacd420646577892d0;hp=a0014912b714f41d34eb6a11ae838aec981491ab;p=sliver-openvswitch.git Merge branch 'mainstream' --- diff --git a/.gitignore b/.gitignore index 60ed30d3d..46b638781 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,5 @@ Module.symvers TAGS cscope.* tags +myexp/ _debian diff --git a/.non-distfiles b/.non-distfiles new file mode 100644 index 000000000..146c4451e --- /dev/null +++ b/.non-distfiles @@ -0,0 +1,8 @@ +planetlab/exp-tool +planetlab/exp-tool/Makefile +planetlab/exp-tool/showgraph +planetlab/exp-tool/README +planetlab/scripts/refresh +sliver-openvswitch.spec +.gitignore +.non-distfiles diff --git a/Makefile.am b/Makefile.am index 5b9e0ac18..32e85d1e6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -108,6 +108,7 @@ ro_shell = printf '\043 Generated automatically -- do not modify! -*- buffer- SUFFIXES += .in .in: + @mkdir -p $$(dirname $@) $(PERL) $(srcdir)/build-aux/soexpand.pl -I$(srcdir) < $< | \ sed \ -e 's,[@]PKIDIR[@],$(PKIDIR),g' \ @@ -149,7 +150,7 @@ dist-hook-git: distfiles (cd datapath && $(MAKE) distfiles); \ (cat distfiles; sed 's|^|datapath/|' datapath/distfiles) | \ sort -u > all-distfiles; \ - (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \ + (cd $(srcdir) && git ls-files) | grep -vFf $(srcdir)/.non-distfiles | \ sort -u > all-gitfiles; \ comm -1 -3 all-distfiles all-gitfiles > missing-distfiles; \ if test -s missing-distfiles; then \ @@ -273,4 +274,5 @@ include rhel/automake.mk include xenserver/automake.mk include python/automake.mk include python/compat/automake.mk +include planetlab/automake.mk include tutorial/automake.mk diff --git a/lib/automake.mk b/lib/automake.mk index f936897bf..fa7f17332 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -93,6 +93,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/multipath.c \ lib/multipath.h \ lib/netdev-dummy.c \ + lib/netdev-tunnel.c \ + lib/netdev-pltap.c \ lib/netdev-provider.h \ lib/netdev-vport.c \ lib/netdev-vport.h \ @@ -199,6 +201,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/timeval.h \ lib/token-bucket.c \ lib/token-bucket.h \ + lib/tunalloc.c \ + lib/tunalloc.h \ lib/type-props.h \ lib/unaligned.h \ lib/unicode.c \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 8f79128c2..07c146775 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -201,10 +201,17 @@ dpif_netdev_class_is_dummy(const struct dpif_class *class) return class != &dpif_netdev_class; } +static bool +dpif_netdev_class_is_planetlab(const struct dpif_class *class) +{ + return class == &dpif_planetlab_class; +} + static const char * dpif_netdev_port_open_type(const struct dpif_class *class, const char *type) { return strcmp(type, "internal") ? type + : dpif_netdev_class_is_planetlab(class) ? "pltap" : dpif_netdev_class_is_dummy(class) ? "dummy" : "tap"; } @@ -232,7 +239,8 @@ choose_port(struct dp_netdev *dp, const char *name) { uint32_t port_no; - if (dp->class != &dpif_netdev_class) { + if (dp->class != &dpif_netdev_class && + dp->class != &dpif_planetlab_class) { const char *p; int start_no = 0; @@ -1297,41 +1305,49 @@ dp_netdev_execute_actions(struct dp_netdev *dp, dp_netdev_output_port, dp_netdev_action_userspace); } +#define DPIF_NETDEV_CLASS_FUNCTIONS \ + dpif_netdev_enumerate, \ + dpif_netdev_port_open_type, \ + dpif_netdev_open, \ + dpif_netdev_close, \ + dpif_netdev_destroy, \ + dpif_netdev_run, \ + dpif_netdev_wait, \ + dpif_netdev_get_stats, \ + dpif_netdev_port_add, \ + dpif_netdev_port_del, \ + dpif_netdev_port_query_by_number, \ + dpif_netdev_port_query_by_name, \ + dpif_netdev_get_max_ports, \ + NULL, /* port_get_pid */ \ + dpif_netdev_port_dump_start, \ + dpif_netdev_port_dump_next, \ + dpif_netdev_port_dump_done, \ + dpif_netdev_port_poll, \ + dpif_netdev_port_poll_wait, \ + dpif_netdev_flow_get, \ + dpif_netdev_flow_put, \ + dpif_netdev_flow_del, \ + dpif_netdev_flow_flush, \ + dpif_netdev_flow_dump_start, \ + dpif_netdev_flow_dump_next, \ + dpif_netdev_flow_dump_done, \ + dpif_netdev_execute, \ + NULL, /* operate */ \ + dpif_netdev_recv_set, \ + dpif_netdev_queue_to_priority, \ + dpif_netdev_recv, \ + dpif_netdev_recv_wait, \ + dpif_netdev_recv_purge, \ + const struct dpif_class dpif_netdev_class = { "netdev", - dpif_netdev_enumerate, - dpif_netdev_port_open_type, - dpif_netdev_open, - dpif_netdev_close, - dpif_netdev_destroy, - dpif_netdev_run, - dpif_netdev_wait, - dpif_netdev_get_stats, - dpif_netdev_port_add, - dpif_netdev_port_del, - dpif_netdev_port_query_by_number, - dpif_netdev_port_query_by_name, - dpif_netdev_get_max_ports, - NULL, /* port_get_pid */ - dpif_netdev_port_dump_start, - dpif_netdev_port_dump_next, - dpif_netdev_port_dump_done, - dpif_netdev_port_poll, - dpif_netdev_port_poll_wait, - dpif_netdev_flow_get, - dpif_netdev_flow_put, - dpif_netdev_flow_del, - dpif_netdev_flow_flush, - dpif_netdev_flow_dump_start, - dpif_netdev_flow_dump_next, - dpif_netdev_flow_dump_done, - dpif_netdev_execute, - NULL, /* operate */ - dpif_netdev_recv_set, - dpif_netdev_queue_to_priority, - dpif_netdev_recv, - dpif_netdev_recv_wait, - dpif_netdev_recv_purge, + DPIF_NETDEV_CLASS_FUNCTIONS +}; + +const struct dpif_class dpif_planetlab_class = { + "planetlab", + DPIF_NETDEV_CLASS_FUNCTIONS }; static void @@ -1403,3 +1419,4 @@ dpif_dummy_register(bool override) "DP PORT NEW-NUMBER", 3, 3, dpif_dummy_change_port_number, NULL); } + diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index d6315ed61..1609c122a 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -363,6 +363,7 @@ struct dpif_class { extern const struct dpif_class dpif_linux_class; extern const struct dpif_class dpif_netdev_class; +extern const struct dpif_class dpif_planetlab_class; #ifdef __cplusplus } diff --git a/lib/dpif.c b/lib/dpif.c index ae4da6201..1c1a5243f 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -61,6 +61,7 @@ static const struct dpif_class *base_dpif_classes[] = { &dpif_linux_class, #endif &dpif_netdev_class, + &dpif_planetlab_class, }; struct registered_dpif_class { diff --git a/lib/netdev-pltap.c b/lib/netdev-pltap.c new file mode 100644 index 000000000..c8c79896a --- /dev/null +++ b/lib/netdev-pltap.c @@ -0,0 +1,872 @@ +/* + * Copyright (c) 2012 Giuseppe Lettieri + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "flow.h" +#include "list.h" +#include "netdev-provider.h" +#include "odp-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "shash.h" +#include "sset.h" +#include "unixctl.h" +#include "socket-util.h" +#include "vlog.h" +#include "tunalloc.h" + +VLOG_DEFINE_THIS_MODULE(netdev_pltap); + +/* Protects 'sync_list'. */ +static struct ovs_mutex sync_list_mutex = OVS_MUTEX_INITIALIZER; + +static struct list sync_list OVS_GUARDED_BY(sync_list_mutex) + = LIST_INITIALIZER(&sync_list); + +struct netdev_pltap { + struct netdev up; + + /* In sync_list. */ + struct list sync_list OVS_GUARDED_BY(sync_list_mutex); + + /* Protects all members below. */ + struct ovs_mutex mutex OVS_ACQ_AFTER(sync_list_mutex); + + char *real_name; + struct netdev_stats stats; + enum netdev_flags new_flags; + enum netdev_flags flags; + int fd; + struct sockaddr_in local_addr; + int local_netmask; + bool valid_local_ip; + bool valid_local_netmask; + bool sync_flags_needed; + unsigned int change_seq; +}; + + +struct netdev_rx_pltap { + struct netdev_rx up; + int fd; +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + +/* Protects 'pltap_netdevs' */ +static struct ovs_mutex pltap_netdevs_mutex = OVS_MUTEX_INITIALIZER; +static struct shash pltap_netdevs OVS_GUARDED_BY(pltap_netdevs_mutex) + = SHASH_INITIALIZER(&pltap_netdevs); + +static int netdev_pltap_construct(struct netdev *netdev_); + +static void netdev_pltap_update_seq(struct netdev_pltap *) + OVS_REQUIRES(dev->mutex); +static int get_flags(struct netdev_pltap *dev, enum netdev_flags *flags) + OVS_REQUIRES(dev->mutex); + +static bool +netdev_pltap_finalized(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex) +{ + return dev->valid_local_ip && dev->valid_local_netmask; +} + +static bool +is_netdev_pltap_class(const struct netdev_class *class) +{ + return class->construct == netdev_pltap_construct; +} + +static struct netdev_pltap * +netdev_pltap_cast(const struct netdev *netdev) +{ + ovs_assert(is_netdev_pltap_class(netdev_get_class(netdev))); + return CONTAINER_OF(netdev, struct netdev_pltap, up); +} + +static struct netdev_rx_pltap* +netdev_rx_pltap_cast(const struct netdev_rx *rx) +{ + ovs_assert(is_netdev_pltap_class(netdev_get_class(rx->netdev))); + return CONTAINER_OF(rx, struct netdev_rx_pltap, up); +} + +static void sync_needed(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex, sync_list_mutex) +{ + if (dev->sync_flags_needed) + return; + + dev->sync_flags_needed = true; + list_insert(&sync_list, &dev->sync_list); +} + +static void sync_done(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex, sync_list_mutex) +{ + if (!dev->sync_flags_needed) + return; + + (void) list_remove(&dev->sync_list); + dev->sync_flags_needed = false; +} + +static struct netdev * +netdev_pltap_alloc(void) +{ + struct netdev_pltap *netdev = xzalloc(sizeof *netdev); + return &netdev->up; +} + +static int +netdev_pltap_construct(struct netdev *netdev_) +{ + struct netdev_pltap *netdev = netdev_pltap_cast(netdev_); + int error; + + ovs_mutex_init(&netdev->mutex, PTHREAD_MUTEX_NORMAL); + netdev->real_name = xzalloc(IFNAMSIZ + 1); + memset(&netdev->local_addr, 0, sizeof(netdev->local_addr)); + netdev->valid_local_ip = false; + netdev->valid_local_netmask = false; + netdev->flags = 0; + netdev->sync_flags_needed = false; + netdev->change_seq = 1; + + + /* Open tap device. */ + netdev->fd = tun_alloc(IFF_TAP, netdev->real_name); + if (netdev->fd < 0) { + error = errno; + VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s", + netdev_get_name(netdev_), ovs_strerror(error)); + return error; + } + VLOG_DBG("real_name = %s", netdev->real_name); + + /* Make non-blocking. */ + error = set_nonblocking(netdev->fd); + if (error) { + return error; + } + + ovs_mutex_lock(&pltap_netdevs_mutex); + shash_add(&pltap_netdevs, netdev_get_name(netdev_), netdev); + ovs_mutex_unlock(&pltap_netdevs_mutex); + return 0; +} + +static void +netdev_pltap_destruct(struct netdev *netdev_) +{ + struct netdev_pltap *netdev = netdev_pltap_cast(netdev_); + + ovs_mutex_lock(&pltap_netdevs_mutex); + if (netdev->fd != -1) + close(netdev->fd); + + if (netdev->sync_flags_needed) { + ovs_mutex_lock(&sync_list_mutex); + (void) list_remove(&netdev->sync_list); + ovs_mutex_unlock(&sync_list_mutex); + } + + shash_find_and_delete(&pltap_netdevs, + netdev_get_name(netdev_)); + ovs_mutex_unlock(&pltap_netdevs_mutex); + ovs_mutex_destroy(&netdev->mutex); +} + +static void +netdev_pltap_dealloc(struct netdev *netdev_) +{ + struct netdev_pltap *netdev = netdev_pltap_cast(netdev_); + free(netdev); +} + +static int netdev_pltap_up(struct netdev_pltap *dev) OVS_REQUIRES(dev->mutex); + +static struct netdev_rx * +netdev_pltap_rx_alloc(void) +{ + struct netdev_rx_pltap *rx = xzalloc(sizeof *rx); + return &rx->up; +} + +static int +netdev_pltap_rx_construct(struct netdev_rx *rx_) +{ + struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_); + struct netdev *netdev_ = rx->up.netdev; + struct netdev_pltap *netdev = + netdev_pltap_cast(netdev_); + int error = 0; + + ovs_mutex_lock(&netdev->mutex); + rx->fd = netdev->fd; + if (!netdev_pltap_finalized(netdev)) + goto out; + error = netdev_pltap_up(netdev); + if (error) { + goto out; + } +out: + ovs_mutex_unlock(&netdev->mutex); + return error; +} + +static void +netdev_pltap_rx_destruct(struct netdev_rx *rx_ OVS_UNUSED) +{ +} + +static void +netdev_pltap_rx_dealloc(struct netdev_rx *rx_) +{ + struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_); + + free(rx); +} + +static int vsys_transaction(const char *script, + const char **preply, char *format, ...) +{ + char *msg = NULL, *reply = NULL; + const size_t reply_size = 1024; + int ifd = -1, ofd = -1, maxfd; + size_t bytes_to_write, bytes_to_read, + bytes_written = 0, bytes_read = 0; + int error = 0; + char *ofname = NULL, *ifname = NULL; + va_list args; + + va_start(args, format); + msg = xvasprintf(format, args); + va_end(args); + reply = (char*)xmalloc(reply_size); + if (!msg || !reply) { + VLOG_ERR("Out of memory"); + error = ENOMEM; + goto cleanup; + } + + ofname = xasprintf("/vsys/%s.out", script); + ifname = xasprintf("/vsys/%s.in", script); + if (!ofname || !ifname) { + VLOG_ERR("Out of memory"); + error = ENOMEM; + goto cleanup; + } + + ofd = open(ofname, O_RDONLY | O_NONBLOCK); + if (ofd < 0) { + VLOG_ERR("Cannot open %s: %s", ofname, ovs_strerror(errno)); + error = errno; + goto cleanup; + } + ifd = open(ifname, O_WRONLY | O_NONBLOCK); + if (ifd < 0) { + VLOG_ERR("Cannot open %s: %s", ifname, ovs_strerror(errno)); + error = errno; + goto cleanup; + } + maxfd = (ifd < ofd) ? ofd : ifd; + + bytes_to_write = strlen(msg); + bytes_to_read = reply_size; + while (bytes_to_write || bytes_to_read) { + fd_set readset, writeset, errorset; + + FD_ZERO(&readset); + FD_ZERO(&writeset); + FD_ZERO(&errorset); + if (bytes_to_write) { + FD_SET(ifd, &writeset); + FD_SET(ifd, &errorset); + } + FD_SET(ofd, &readset); + FD_SET(ofd, &errorset); + if (select(maxfd + 1, &readset, &writeset, &errorset, NULL) < 0) { + if (errno == EINTR) + continue; + VLOG_ERR("selec error: %s", ovs_strerror(errno)); + error = errno; + goto cleanup; + } + if (FD_ISSET(ifd, &errorset) || FD_ISSET(ofd, &errorset)) { + VLOG_ERR("error condition on ifd or ofd"); + goto cleanup; + } + if (FD_ISSET(ifd, &writeset)) { + ssize_t n = write(ifd, msg + bytes_written, bytes_to_write); + if (n < 0) { + if (errno != EAGAIN && errno != EINTR) { + VLOG_ERR("write on %s: %s", ifname, ovs_strerror(errno)); + error = errno; + goto cleanup; + } + } else { + bytes_written += n; + bytes_to_write -= n; + if (bytes_to_write == 0) + close(ifd); + } + } + if (FD_ISSET(ofd, &readset)) { + ssize_t n = read(ofd, reply + bytes_read, bytes_to_read); + if (n < 0) { + if (errno != EAGAIN && errno != EINTR) { + VLOG_ERR("read on %s: %s", ofname, ovs_strerror(errno)); + error = errno; + goto cleanup; + } + } else if (n == 0) { + bytes_to_read = 0; + } else { + bytes_read += n; + bytes_to_read -= n; + } + } + } + if (bytes_read) { + reply[bytes_read] = '\0'; + if (preply) { + *preply = reply; + reply = NULL; /* prevent freeing the reply msg */ + } else { + VLOG_ERR("%s returned: %s", script, reply); + } + error = EAGAIN; + goto cleanup; + } + +cleanup: + free(msg); + free(reply); + free(ofname); + free(ifname); + close(ifd); + close(ofd); + return error; +} + +static int +netdev_pltap_up(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex) +{ + if (!netdev_pltap_finalized(dev)) { + return 0; + } + + return vsys_transaction("vif_up", NULL, "%s\n"IP_FMT"\n%d\n", + dev->real_name, + IP_ARGS(dev->local_addr.sin_addr.s_addr), + dev->local_netmask); +} + +static int +netdev_pltap_down(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex) +{ + if (!netdev_pltap_finalized(dev)) { + return 0; + } + + return vsys_transaction("vif_down", NULL, "%s\n", dev->real_name); +} + +static int +netdev_pltap_promisc(struct netdev_pltap *dev, bool promisc) + OVS_REQUIRES(dev-mutex) +{ + if (!netdev_pltap_finalized(dev)) { + return 0; + } + + return vsys_transaction("promisc", NULL, "%s\n%s", + dev->real_name, + (promisc ? "" : "-\n")); +} + +static void +netdev_pltap_sync_flags(struct netdev_pltap *dev) + OVS_REQUIRES(sync_list_mutex) +{ + + ovs_mutex_lock(&dev->mutex); + + if (dev->fd < 0 || !netdev_pltap_finalized(dev)) { + goto out; + } + + VLOG_DBG("sync_flags(%s): current: %s %s target: %s %s", + dev->real_name, + (dev->flags & NETDEV_UP ? "UP" : "-"), + (dev->flags & NETDEV_PROMISC ? "PROMISC" : "-"), + (dev->new_flags & NETDEV_UP ? "UP" : "-"), + (dev->new_flags & NETDEV_PROMISC ? "PROMISC" : "-")); + + if ((dev->new_flags & NETDEV_UP) && !(dev->flags & NETDEV_UP)) { + (void) netdev_pltap_up(dev); + } else if (!(dev->new_flags & NETDEV_UP) && (dev->flags & NETDEV_UP)) { + (void) netdev_pltap_down(dev); + } + + if ((dev->new_flags & NETDEV_PROMISC) ^ (dev->flags & NETDEV_PROMISC)) { + (void) netdev_pltap_promisc(dev, dev->new_flags & NETDEV_PROMISC); + } + + netdev_pltap_update_seq(dev); + +out: + sync_done(dev); + ovs_mutex_unlock(&dev->mutex); +} + + +static int +netdev_pltap_get_config(const struct netdev *dev_, struct smap *args) +{ + struct netdev_pltap *netdev = netdev_pltap_cast(dev_); + + ovs_mutex_lock(&netdev->mutex); + if (netdev->valid_local_ip) + smap_add_format(args, "local_ip", IP_FMT, + IP_ARGS(netdev->local_addr.sin_addr.s_addr)); + if (netdev->valid_local_netmask) + smap_add_format(args, "local_netmask", "%"PRIu32, + ntohs(netdev->local_netmask)); + ovs_mutex_unlock(&netdev->mutex); + return 0; +} + +static int +netdev_pltap_set_config(struct netdev *dev_, const struct smap *args) +{ + struct netdev_pltap *netdev = netdev_pltap_cast(dev_); + struct shash_node *node; + + ovs_mutex_lock(&sync_list_mutex); + ovs_mutex_lock(&netdev->mutex); + VLOG_DBG("pltap_set_config(%s)", netdev_get_name(dev_)); + SMAP_FOR_EACH(node, args) { + VLOG_DBG("arg: %s->%s", node->name, (char*)node->data); + if (!strcmp(node->name, "local_ip")) { + struct in_addr addr; + if (lookup_ip(node->data, &addr)) { + VLOG_WARN("%s: bad 'local_ip'", node->name); + } else { + netdev->local_addr.sin_addr = addr; + netdev->valid_local_ip = true; + } + } else if (!strcmp(node->name, "local_netmask")) { + netdev->local_netmask = atoi(node->data); + // XXX check valididy + netdev->valid_local_netmask = true; + } else { + VLOG_WARN("%s: unknown argument '%s'", + netdev_get_name(dev_), node->name); + } + } + if (netdev_pltap_finalized(netdev)) { + netdev->new_flags |= NETDEV_UP; + sync_needed(netdev); + } + ovs_mutex_unlock(&netdev->mutex); + ovs_mutex_unlock(&sync_list_mutex); + return 0; +} + +static int +netdev_pltap_rx_recv(struct netdev_rx *rx_, void *buffer, size_t size) +{ + struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_); + struct tun_pi pi; + struct iovec iov[2] = { + { .iov_base = &pi, .iov_len = sizeof(pi) }, + { .iov_base = buffer, .iov_len = size } + }; + for (;;) { + ssize_t retval; + retval = readv(rx->fd, iov, 2); + if (retval >= 0) { + if (retval <= size) { + return retval; + } else { + return -EMSGSIZE; + } + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s", + netdev_rx_get_name(rx_), ovs_strerror(errno)); + } + return -errno; + } + } +} + +static void +netdev_pltap_rx_wait(struct netdev_rx *rx_) +{ + struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_); + struct netdev_pltap *netdev = + netdev_pltap_cast(rx->up.netdev); + if (rx->fd >= 0 && netdev_pltap_finalized(netdev)) { + poll_fd_wait(rx->fd, POLLIN); + } +} + +static int +netdev_pltap_send(struct netdev *netdev_, const void *buffer, size_t size) +{ + struct netdev_pltap *dev = + netdev_pltap_cast(netdev_); + struct tun_pi pi = { 0, 0x86 }; + struct iovec iov[2] = { + { .iov_base = &pi, .iov_len = sizeof(pi) }, + { .iov_base = (char*) buffer, .iov_len = size } + }; + if (dev->fd < 0) + return EAGAIN; + for (;;) { + ssize_t retval; + retval = writev(dev->fd, iov, 2); + if (retval >= 0) { + if (retval != size + 4) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of %zu) on %s", + retval, size + 4, netdev_get_name(netdev_)); + } + return 0; + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), ovs_strerror(errno)); + } + return errno; + } + } +} + +static void +netdev_pltap_send_wait(struct netdev *netdev_) +{ + struct netdev_pltap *dev = + netdev_pltap_cast(netdev_); + if (dev->fd >= 0 && netdev_pltap_finalized(dev)) { + poll_fd_wait(dev->fd, POLLOUT); + } +} + +static int +netdev_pltap_rx_drain(struct netdev_rx *rx_) +{ + struct netdev_rx_pltap *rx = netdev_rx_pltap_cast(rx_); + char buffer[128]; + int error; + + if (rx->fd < 0) + return EAGAIN; + for (;;) { + error = recv(rx->fd, buffer, 128, MSG_TRUNC); + if (error) { + if (error == -EAGAIN) + break; + else if (error != -EMSGSIZE) + return error; + } + } + return 0; +} + +static int +netdev_pltap_set_etheraddr(struct netdev *netdevi OVS_UNUSED, + const uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED) +{ + return ENOTSUP; +} + + +// XXX from netdev-linux.c +static int +get_etheraddr(struct netdev_pltap *dev, uint8_t ea[ETH_ADDR_LEN]) + OVS_REQUIRES(dev->mutex) +{ + struct ifreq ifr; + int hwaddr_family; + int error; + + memset(&ifr, 0, sizeof ifr); + ovs_strzcpy(ifr.ifr_name, dev->real_name, sizeof ifr.ifr_name); + error = af_inet_ifreq_ioctl(dev->real_name, &ifr, + SIOCGIFHWADDR, "SIOCGIFHWADDR"); + if (error) { + return error; + } + hwaddr_family = ifr.ifr_hwaddr.sa_family; + if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) { + VLOG_WARN("%s device has unknown hardware address family %d", + dev->real_name, hwaddr_family); + } + memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN); + return 0; +} + +static int +get_flags(struct netdev_pltap *dev, enum netdev_flags *flags) + OVS_REQUIRES(dev->mutex) +{ + struct ifreq ifr; + int error; + + error = af_inet_ifreq_ioctl(dev->real_name, &ifr, + SIOCGIFFLAGS, "SIOCGIFFLAGS"); + if (error) { + return error; + } + *flags = 0; + if (ifr.ifr_flags & IFF_UP) + *flags |= NETDEV_UP; + if (ifr.ifr_flags & IFF_PROMISC) + *flags |= NETDEV_PROMISC; + return 0; +} + +static int +netdev_pltap_get_etheraddr(const struct netdev *netdev, + uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_pltap *dev = + netdev_pltap_cast(netdev); + int error = 0; + + ovs_mutex_lock(&dev->mutex); + if (dev->fd < 0) { + error = EAGAIN; + goto out; + } + error = get_etheraddr(dev, mac); + +out: + ovs_mutex_unlock(&dev->mutex); + return error; +} + + +// XXX can we read stats in planetlab? +static int +netdev_pltap_get_stats(const struct netdev *netdev OVS_UNUSED, struct netdev_stats *stats OVS_UNUSED) +{ + return ENOTSUP; +} + +static int +netdev_pltap_set_stats(struct netdev *netdev OVS_UNUSED, const struct netdev_stats *stats OVS_UNUSED) +{ + return ENOTSUP; +} + + +static int +netdev_pltap_update_flags(struct netdev *dev_, + enum netdev_flags off, enum netdev_flags on, + enum netdev_flags *old_flagsp) +{ + struct netdev_pltap *netdev = + netdev_pltap_cast(dev_); + int error = 0; + + ovs_mutex_lock(&sync_list_mutex); + ovs_mutex_lock(&netdev->mutex); + if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) { + error = EINVAL; + goto out; + } + + if (netdev_pltap_finalized(netdev)) { + error = get_flags(netdev, &netdev->flags); + } + *old_flagsp = netdev->flags; + netdev->new_flags |= on; + netdev->new_flags &= ~off; + if (netdev->flags != netdev->new_flags) { + /* we cannot sync here, since we may be in a signal handler */ + sync_needed(netdev); + } + +out: + ovs_mutex_unlock(&netdev->mutex); + ovs_mutex_unlock(&sync_list_mutex); + return error; +} + +static unsigned int +netdev_pltap_change_seq(const struct netdev *netdev) +{ + struct netdev_pltap *dev = + netdev_pltap_cast(netdev); + unsigned int change_seq; + + ovs_mutex_lock(&dev->mutex); + change_seq = dev->change_seq; + ovs_mutex_unlock(&dev->mutex); + + return change_seq; +} + +/* Helper functions. */ + +static void +netdev_pltap_update_seq(struct netdev_pltap *dev) + OVS_REQUIRES(dev->mutex) +{ + dev->change_seq++; + if (!dev->change_seq) { + dev->change_seq++; + } +} + +static void +netdev_pltap_get_real_name(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev_pltap *pltap_dev; + + ovs_mutex_lock(&pltap_netdevs_mutex); + pltap_dev = shash_find_data(&pltap_netdevs, argv[1]); + if (!pltap_dev) { + unixctl_command_reply_error(conn, "no such pltap netdev"); + goto out; + } + if (pltap_dev->fd < 0) { + unixctl_command_reply_error(conn, "no real device attached"); + goto out; + } + + unixctl_command_reply(conn, pltap_dev->real_name); + +out: + ovs_mutex_unlock(&pltap_netdevs_mutex); +} + +static int +netdev_pltap_init(void) +{ + unixctl_command_register("netdev-pltap/get-tapname", "port", + 1, 1, netdev_pltap_get_real_name, NULL); + return 0; +} + +static void +netdev_pltap_run(void) +{ + struct netdev_pltap *iter, *next; + ovs_mutex_lock(&sync_list_mutex); + LIST_FOR_EACH_SAFE(iter, next, sync_list, &sync_list) { + netdev_pltap_sync_flags(iter); + } + ovs_mutex_unlock(&sync_list_mutex); +} + +static void +netdev_pltap_wait(void) +{ + ovs_mutex_lock(&sync_list_mutex); + if (!list_is_empty(&sync_list)) { + VLOG_DBG("netdev_pltap: scheduling sync"); + poll_immediate_wake(); + } + ovs_mutex_unlock(&sync_list_mutex); +} + +const struct netdev_class netdev_pltap_class = { + "pltap", + netdev_pltap_init, + netdev_pltap_run, + netdev_pltap_wait, + + netdev_pltap_alloc, + netdev_pltap_construct, + netdev_pltap_destruct, + netdev_pltap_dealloc, + netdev_pltap_get_config, + netdev_pltap_set_config, + NULL, /* get_tunnel_config */ + + netdev_pltap_send, + netdev_pltap_send_wait, + + netdev_pltap_set_etheraddr, + netdev_pltap_get_etheraddr, + NULL, /* get_mtu */ + NULL, /* set_mtu */ + NULL, /* get_ifindex */ + NULL, /* get_carrier */ + NULL, /* get_carrier_resets */ + NULL, /* get_miimon */ + netdev_pltap_get_stats, + netdev_pltap_set_stats, + + NULL, /* get_features */ + NULL, /* set_advertisements */ + + NULL, /* set_policing */ + NULL, /* get_qos_types */ + NULL, /* get_qos_capabilities */ + NULL, /* get_qos */ + NULL, /* set_qos */ + NULL, /* get_queue */ + NULL, /* set_queue */ + NULL, /* delete_queue */ + NULL, /* get_queue_stats */ + NULL, /* dump_queues */ + NULL, /* dump_queue_stats */ + + NULL, /* get_in4 */ + NULL, /* set_in4 */ + NULL, /* get_in6 */ + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* get_drv_info */ + NULL, /* arp_lookup */ + + netdev_pltap_update_flags, + + netdev_pltap_change_seq, + + netdev_pltap_rx_alloc, + netdev_pltap_rx_construct, + netdev_pltap_rx_destruct, + netdev_pltap_rx_dealloc, + netdev_pltap_rx_recv, + netdev_pltap_rx_wait, + netdev_pltap_rx_drain, +}; diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 70a5188ed..23905d413 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -647,6 +647,9 @@ extern const struct netdev_class netdev_tap_class; extern const struct netdev_class netdev_bsd_class; #endif +extern const struct netdev_class netdev_tunnel_class; +extern const struct netdev_class netdev_pltap_class; + #ifdef __cplusplus } #endif diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c new file mode 100644 index 000000000..f8eadabc8 --- /dev/null +++ b/lib/netdev-tunnel.c @@ -0,0 +1,629 @@ +/* + * Copyright (c) 2010, 2011, 2012 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include "flow.h" +#include "list.h" +#include "netdev-provider.h" +#include "odp-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "shash.h" +#include "sset.h" +#include "unixctl.h" +#include "socket-util.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(netdev_tunnel); + +struct netdev_tunnel { + struct netdev up; + + /* Protects all members below. */ + struct ovs_mutex mutex; + + uint8_t hwaddr[ETH_ADDR_LEN]; + struct netdev_stats stats; + enum netdev_flags flags; + int sockfd; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + bool valid_remote_ip; + bool valid_remote_port; + bool connected; + unsigned int change_seq; +}; + +struct netdev_rx_tunnel { + struct netdev_rx up; + int fd; +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + +static struct ovs_mutex tunnel_netdevs_mutex = OVS_MUTEX_INITIALIZER; +static struct shash tunnel_netdevs OVS_GUARDED_BY(tunnel_netdevs_mutex) + = SHASH_INITIALIZER(&tunnel_netdevs); + +static int netdev_tunnel_construct(struct netdev *netdevp_); +static void netdev_tunnel_update_seq(struct netdev_tunnel *); + +static bool +is_netdev_tunnel_class(const struct netdev_class *class) +{ + return class->construct == netdev_tunnel_construct; +} + +static struct netdev_tunnel * +netdev_tunnel_cast(const struct netdev *netdev) +{ + ovs_assert(is_netdev_tunnel_class(netdev_get_class(netdev))); + return CONTAINER_OF(netdev, struct netdev_tunnel, up); +} + +static struct netdev_rx_tunnel * +netdev_rx_tunnel_cast(const struct netdev_rx *rx) +{ + ovs_assert(is_netdev_tunnel_class(netdev_get_class(rx->netdev))); + return CONTAINER_OF(rx, struct netdev_rx_tunnel, up); +} + +static struct netdev * +netdev_tunnel_alloc(void) +{ + struct netdev_tunnel *netdev = xzalloc(sizeof *netdev); + return &netdev->up; +} + +static int +netdev_tunnel_construct(struct netdev *netdev_) +{ + static atomic_uint next_n = ATOMIC_VAR_INIT(0); + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + unsigned int n; + + atomic_add(&next_n, 1, &n); + + ovs_mutex_init(&netdev->mutex, PTHREAD_MUTEX_NORMAL); + netdev->hwaddr[0] = 0xfe; + netdev->hwaddr[1] = 0xff; + netdev->hwaddr[2] = 0xff; + netdev->hwaddr[3] = n >> 16; + netdev->hwaddr[4] = n >> 8; + netdev->hwaddr[5] = n; + netdev->flags = 0; + netdev->change_seq = 1; + memset(&netdev->remote_addr, 0, sizeof(netdev->remote_addr)); + netdev->valid_remote_ip = false; + netdev->valid_remote_port = false; + netdev->connected = false; + + + netdev->sockfd = inet_open_passive(SOCK_DGRAM, "", 0, &netdev->local_addr, 0); + if (netdev->sockfd < 0) { + return netdev->sockfd; + } + + + shash_add(&tunnel_netdevs, netdev_get_name(netdev_), netdev); + + n++; + + VLOG_DBG("tunnel_create: name=%s, fd=%d, port=%d", + netdev_get_name(netdev_), netdev->sockfd, netdev->local_addr.sin_port); + + return 0; + +} + +static void +netdev_tunnel_destruct(struct netdev *netdev_) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + + ovs_mutex_lock(&tunnel_netdevs_mutex); + + if (netdev->sockfd != -1) + close(netdev->sockfd); + + shash_find_and_delete(&tunnel_netdevs, + netdev_get_name(netdev_)); + + ovs_mutex_destroy(&netdev->mutex); + ovs_mutex_unlock(&tunnel_netdevs_mutex); +} + +static void +netdev_tunnel_dealloc(struct netdev *netdev_) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + free(netdev); +} + +static int +netdev_tunnel_get_config(const struct netdev *dev_, struct smap *args) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(dev_); + + ovs_mutex_lock(&netdev->mutex); + if (netdev->valid_remote_ip) + smap_add_format(args, "remote_ip", IP_FMT, + IP_ARGS(netdev->remote_addr.sin_addr.s_addr)); + if (netdev->valid_remote_port) + smap_add_format(args, "remote_port", "%"PRIu16, + ntohs(netdev->remote_addr.sin_port)); + ovs_mutex_unlock(&netdev->mutex); + return 0; +} + +static int +netdev_tunnel_connect(struct netdev_tunnel *dev) + OVS_REQUIRES(dev->mutex) +{ + char buf[1024]; + if (dev->sockfd < 0) + return EBADF; + if (!dev->valid_remote_ip || !dev->valid_remote_port) + return 0; + dev->remote_addr.sin_family = AF_INET; + if (connect(dev->sockfd, (struct sockaddr*) &dev->remote_addr, sizeof(dev->remote_addr)) < 0) { + return errno; + } + dev->connected = true; + netdev_tunnel_update_seq(dev); + VLOG_DBG("%s: connected to (%s, %d)", netdev_get_name(&dev->up), + inet_ntop(AF_INET, &dev->remote_addr.sin_addr, buf, 1024), ntohs(dev->remote_addr.sin_port)); + return 0; +} + +static int +netdev_tunnel_set_config(struct netdev *dev_, const struct smap *args) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(dev_); + struct shash_node *node; + int error; + + ovs_mutex_lock(&netdev->mutex); + VLOG_DBG("tunnel_set_config(%s)", netdev_get_name(dev_)); + SMAP_FOR_EACH(node, args) { + VLOG_DBG("arg: %s->%s", node->name, (char*)node->data); + if (!strcmp(node->name, "remote_ip")) { + struct in_addr addr; + if (lookup_ip(node->data, &addr)) { + VLOG_WARN("%s: bad 'remote_ip'", node->name); + } else { + netdev->remote_addr.sin_addr = addr; + netdev->valid_remote_ip = true; + } + } else if (!strcmp(node->name, "remote_port")) { + netdev->remote_addr.sin_port = htons(atoi(node->data)); + netdev->valid_remote_port = true; + } else { + VLOG_WARN("%s: unknown argument '%s'", + netdev_get_name(dev_), node->name); + } + } + error = netdev_tunnel_connect(netdev); + ovs_mutex_unlock(&netdev->mutex); + return error; +} + +static struct netdev_rx * +netdev_tunnel_rx_alloc(void) +{ + struct netdev_rx_tunnel *rx = xzalloc(sizeof *rx); + return &rx->up; +} + +static int +netdev_tunnel_rx_construct(struct netdev_rx *rx_) +{ + struct netdev_rx_tunnel *rx = netdev_rx_tunnel_cast(rx_); + struct netdev *netdev_ = rx->up.netdev; + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + + ovs_mutex_lock(&netdev->mutex); + rx->fd = netdev->sockfd; + ovs_mutex_unlock(&netdev->mutex); + return 0; +} + +static void +netdev_tunnel_rx_destruct(struct netdev_rx *rx_ OVS_UNUSED) +{ +} + +static void +netdev_tunnel_rx_dealloc(struct netdev_rx *rx_) +{ + struct netdev_rx_tunnel *rx = netdev_rx_tunnel_cast(rx_); + + free(rx); +} + +static int +netdev_tunnel_rx_recv(struct netdev_rx *rx_, void *buffer, size_t size) +{ + struct netdev_rx_tunnel *rx = netdev_rx_tunnel_cast(rx_); + struct netdev_tunnel *netdev = + netdev_tunnel_cast(rx_->netdev); + if (!netdev->connected) + return -EAGAIN; + for (;;) { + ssize_t retval; + retval = recv(rx->fd, buffer, size, MSG_TRUNC); + VLOG_DBG("%s: recv(%"PRIxPTR", %zu, MSG_TRUNC) = %zd", + netdev_rx_get_name(rx_), (uintptr_t)buffer, size, retval); + if (retval >= 0) { + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += retval; + if (retval <= size) { + return retval; + } else { + netdev->stats.rx_errors++; + netdev->stats.rx_length_errors++; + return -EMSGSIZE; + } + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s", + netdev_rx_get_name(rx_), ovs_strerror(errno)); + netdev->stats.rx_errors++; + } + return -errno; + } + } +} + +static void +netdev_tunnel_rx_wait(struct netdev_rx *rx_) +{ + struct netdev_rx_tunnel *rx = + netdev_rx_tunnel_cast(rx_); + if (rx->fd >= 0) { + poll_fd_wait(rx->fd, POLLIN); + } +} + +static int +netdev_tunnel_send(struct netdev *netdev_, const void *buffer, size_t size) +{ + struct netdev_tunnel *dev = + netdev_tunnel_cast(netdev_); + if (!dev->connected) + return EAGAIN; + for (;;) { + ssize_t retval; + retval = send(dev->sockfd, buffer, size, 0); + VLOG_DBG("%s: send(%"PRIxPTR", %zu) = %zd", + netdev_get_name(netdev_), (uintptr_t)buffer, size, retval); + if (retval >= 0) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += retval; + if (retval != size) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of " + "%zu) on %s", retval, size, netdev_get_name(netdev_)); + dev->stats.tx_errors++; + } + return 0; + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), ovs_strerror(errno)); + dev->stats.tx_errors++; + } + return errno; + } + } +} + +static void +netdev_tunnel_send_wait(struct netdev *netdev_) +{ + struct netdev_tunnel *dev = netdev_tunnel_cast(netdev_); + if (dev->sockfd >= 0) { + poll_fd_wait(dev->sockfd, POLLOUT); + } +} + +static int +netdev_tunnel_rx_drain(struct netdev_rx *rx_) +{ + struct netdev_tunnel *netdev = + netdev_tunnel_cast(rx_->netdev); + struct netdev_rx_tunnel *rx = + netdev_rx_tunnel_cast(rx_); + char buffer[128]; + int error; + + if (!netdev->connected) + return 0; + for (;;) { + error = recv(rx->fd, buffer, 128, MSG_TRUNC); + if (error) { + if (error == -EAGAIN) + break; + else if (error != -EMSGSIZE) + return error; + } + } + return 0; +} + +static int +netdev_tunnel_set_etheraddr(struct netdev *netdev, + const uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_tunnel *dev = netdev_tunnel_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + if (!eth_addr_equals(dev->hwaddr, mac)) { + memcpy(dev->hwaddr, mac, ETH_ADDR_LEN); + netdev_tunnel_update_seq(dev); + } + ovs_mutex_unlock(&dev->mutex); + + return 0; +} + +static int +netdev_tunnel_get_etheraddr(const struct netdev *netdev, + uint8_t mac[ETH_ADDR_LEN]) +{ + const struct netdev_tunnel *dev = netdev_tunnel_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + memcpy(mac, dev->hwaddr, ETH_ADDR_LEN); + ovs_mutex_unlock(&dev->mutex); + return 0; +} + + +static int +netdev_tunnel_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +{ + const struct netdev_tunnel *dev = netdev_tunnel_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + *stats = dev->stats; + ovs_mutex_unlock(&dev->mutex); + return 0; +} + +static int +netdev_tunnel_set_stats(struct netdev *netdev, const struct netdev_stats *stats) +{ + struct netdev_tunnel *dev = netdev_tunnel_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + dev->stats = *stats; + ovs_mutex_unlock(&dev->mutex); + return 0; +} + +static int +netdev_tunnel_update_flags(struct netdev *dev_, + enum netdev_flags off, enum netdev_flags on, + enum netdev_flags *old_flagsp) +{ + struct netdev_tunnel *netdev = + netdev_tunnel_cast(dev_); + int error = 0; + + ovs_mutex_lock(&netdev->mutex); + if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) { + error = EINVAL; + goto out; + } + + // XXX should we actually do something with these flags? + *old_flagsp = netdev->flags; + netdev->flags |= on; + netdev->flags &= ~off; + if (*old_flagsp != netdev->flags) { + netdev_tunnel_update_seq(netdev); + } + +out: + ovs_mutex_unlock(&netdev->mutex); + return error; +} + +static unsigned int +netdev_tunnel_change_seq(const struct netdev *netdev_) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + unsigned int change_seq; + + + ovs_mutex_lock(&netdev->mutex); + change_seq = netdev->change_seq; + ovs_mutex_unlock(&netdev->mutex); + return change_seq; +} + +/* Helper functions. */ + +static void +netdev_tunnel_update_seq(struct netdev_tunnel *dev) + OVS_REQUIRES(dev->mutex) +{ + dev->change_seq++; + if (!dev->change_seq) { + dev->change_seq++; + } +} + +static void +netdev_tunnel_get_port(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev_tunnel *tunnel_dev; + char buf[6]; + + ovs_mutex_lock(&tunnel_netdevs_mutex); + tunnel_dev = shash_find_data(&tunnel_netdevs, argv[1]); + if (!tunnel_dev) { + unixctl_command_reply_error(conn, "no such tunnel netdev"); + goto out; + } + + ovs_mutex_lock(&tunnel_dev->mutex); + sprintf(buf, "%d", ntohs(tunnel_dev->local_addr.sin_port)); + ovs_mutex_unlock(&tunnel_dev->mutex); + + unixctl_command_reply(conn, buf); +out: + ovs_mutex_unlock(&tunnel_netdevs_mutex); +} + +static void +netdev_tunnel_get_tx_bytes(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev_tunnel *tunnel_dev; + char buf[128]; + + ovs_mutex_lock(&tunnel_netdevs_mutex); + tunnel_dev = shash_find_data(&tunnel_netdevs, argv[1]); + if (!tunnel_dev) { + unixctl_command_reply_error(conn, "no such tunnel netdev"); + goto out; + } + + ovs_mutex_lock(&tunnel_dev->mutex); + sprintf(buf, "%"PRIu64, tunnel_dev->stats.tx_bytes); + ovs_mutex_unlock(&tunnel_dev->mutex); + unixctl_command_reply(conn, buf); +out: + ovs_mutex_unlock(&tunnel_netdevs_mutex); +} + +static void +netdev_tunnel_get_rx_bytes(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev_tunnel *tunnel_dev; + char buf[128]; + + ovs_mutex_lock(&tunnel_netdevs_mutex); + tunnel_dev = shash_find_data(&tunnel_netdevs, argv[1]); + if (!tunnel_dev) { + unixctl_command_reply_error(conn, "no such tunnel netdev"); + goto out; + } + + sprintf(buf, "%"PRIu64, tunnel_dev->stats.rx_bytes); + unixctl_command_reply(conn, buf); +out: + ovs_mutex_unlock(&tunnel_netdevs_mutex); +} + + +static int +netdev_tunnel_init(void) +{ + unixctl_command_register("netdev-tunnel/get-port", "NAME", + 1, 1, netdev_tunnel_get_port, NULL); + unixctl_command_register("netdev-tunnel/get-tx-bytes", "NAME", + 1, 1, netdev_tunnel_get_tx_bytes, NULL); + unixctl_command_register("netdev-tunnel/get-rx-bytes", "NAME", + 1, 1, netdev_tunnel_get_rx_bytes, NULL); + return 0; +} + +static void +netdev_tunnel_run(void) +{ +} + +static void +netdev_tunnel_wait(void) +{ +} + +const struct netdev_class netdev_tunnel_class = { + "tunnel", + netdev_tunnel_init, + netdev_tunnel_run, + netdev_tunnel_wait, + + netdev_tunnel_alloc, + netdev_tunnel_construct, + netdev_tunnel_destruct, + netdev_tunnel_dealloc, + netdev_tunnel_get_config, + netdev_tunnel_set_config, + NULL, /* get_tunnel_config */ + + netdev_tunnel_send, + netdev_tunnel_send_wait, + + netdev_tunnel_set_etheraddr, + netdev_tunnel_get_etheraddr, + NULL, /* get_mtu */ + NULL, /* set_mtu */ + NULL, /* get_ifindex */ + NULL, /* get_carrier */ + NULL, /* get_carrier_resets */ + NULL, /* get_miimon */ + netdev_tunnel_get_stats, + netdev_tunnel_set_stats, + + NULL, /* get_features */ + NULL, /* set_advertisements */ + + NULL, /* set_policing */ + NULL, /* get_qos_types */ + NULL, /* get_qos_capabilities */ + NULL, /* get_qos */ + NULL, /* set_qos */ + NULL, /* get_queue */ + NULL, /* set_queue */ + NULL, /* delete_queue */ + NULL, /* get_queue_stats */ + NULL, /* dump_queues */ + NULL, /* dump_queue_stats */ + + NULL, /* get_in4 */ + NULL, /* set_in4 */ + NULL, /* get_in6 */ + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* get_drv_info */ + NULL, /* arp_lookup */ + + netdev_tunnel_update_flags, + + netdev_tunnel_change_seq, + + netdev_tunnel_rx_alloc, + netdev_tunnel_rx_construct, + netdev_tunnel_rx_destruct, + netdev_tunnel_rx_dealloc, + netdev_tunnel_rx_recv, + netdev_tunnel_rx_wait, + netdev_tunnel_rx_drain, +}; diff --git a/lib/netdev.c b/lib/netdev.c index 088aea907..c70105ba0 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -109,6 +109,8 @@ netdev_initialize(void) netdev_register_provider(&netdev_tap_class); netdev_register_provider(&netdev_bsd_class); #endif + netdev_register_provider(&netdev_tunnel_class); + netdev_register_provider(&netdev_pltap_class); ovsthread_once_done(&once); } diff --git a/lib/tunalloc.c b/lib/tunalloc.c new file mode 100644 index 000000000..b2484b9e4 --- /dev/null +++ b/lib/tunalloc.c @@ -0,0 +1,106 @@ +/* Slice-side code to allocate tuntap interface in root slice + * Based on bmsocket.c + * Thom Haddow - 08/10/09 + * + * Call tun_alloc() with IFFTUN or IFFTAP as an argument to get back fd to + * new tuntap interface. Interface name can be acquired via TUNGETIFF ioctl. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VSYS_TUNTAP "/vsys/fd_tuntap.control" + + +int tun_alloc(int iftype, char *if_name); + +/* Reads vif FD from "fd", writes interface name to vif_name, and returns vif FD. + * vif_name should be IFNAMSIZ chars long. */ +static +int receive_vif_fd(int fd, char *vif_name) +{ + struct msghdr msg; + struct iovec iov; + int rv; + size_t ccmsg[CMSG_SPACE(sizeof(int)) / sizeof(size_t)]; + struct cmsghdr *cmsg; + unsigned char *data; + + /* Use IOV to read interface name */ + iov.iov_base = vif_name; + iov.iov_len = IFNAMSIZ; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + /* old BSD implementations should use msg_accrights instead of + * msg_control; the interface is different. */ + msg.msg_control = ccmsg; + msg.msg_controllen = sizeof(ccmsg); + + while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR); + if (rv == -1) { + perror("recvmsg"); + return -1; + } + if(!rv) { + /* EOF */ + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg->cmsg_type == SCM_RIGHTS) { + fprintf(stderr, "got control message of unknown type %d\n", + cmsg->cmsg_type); + return -1; + } + data = CMSG_DATA(cmsg); + return *(int*)data; +} + + +int tun_alloc(int iftype, char *if_name) +{ + int control_fd; + struct sockaddr_un addr; + int remotefd; + + control_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (control_fd == -1) { + perror("Could not create UNIX socket\n"); + exit(-1); + } + + memset(&addr, 0, sizeof(struct sockaddr_un)); + /* Clear structure */ + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, VSYS_TUNTAP, + sizeof(addr.sun_path) - 1); + + if (connect(control_fd, (struct sockaddr *) &addr, + sizeof(struct sockaddr_un)) == -1) { + perror("Could not connect to Vsys control socket"); + exit(-1); + } + + /* passing type param */ + if (send(control_fd, &iftype, sizeof(iftype), 0) != sizeof(iftype)) { + perror("Could not send paramater to Vsys control socket"); + exit(-1); + } + + remotefd = receive_vif_fd(control_fd, if_name); + + close(control_fd); + + return remotefd; +} diff --git a/lib/tunalloc.h b/lib/tunalloc.h new file mode 100644 index 000000000..3e5caae1d --- /dev/null +++ b/lib/tunalloc.h @@ -0,0 +1,6 @@ +#ifndef _TUNALLOC_H +#define _TUNALLOC_H + +int tun_alloc(int iftype, char *if_name); + +#endif diff --git a/planetlab/automake.mk b/planetlab/automake.mk new file mode 100644 index 000000000..f32a0839c --- /dev/null +++ b/planetlab/automake.mk @@ -0,0 +1,9 @@ + +# this Makefile is not intended to go on the sliver image - esp. not in /usr/sbin +# planetlab/scripts/Makefile +# same goes for showgraph +# planetlab/scripts/showgraph +dist_sbin_SCRIPTS += planetlab/scripts/sliver-ovs + +EXTRA_DIST += \ + planetlab/scripts/sliver-ovs.in diff --git a/planetlab/exp-tool/Makefile b/planetlab/exp-tool/Makefile new file mode 100644 index 000000000..ef48321e8 --- /dev/null +++ b/planetlab/exp-tool/Makefile @@ -0,0 +1,606 @@ +# see README +# conf.mk is expected to define +# HOST_ and IP_ for all nodes involved, as well as +# LINKS as a list of - elements + +# should work with any shell, but we have only tested bash +SHELL=/bin/bash + +.DELETE_ON_ERROR: + +# run make CONF=anotherconfig.mk if you need several configs + +CONF ?= conf.mk +include $(CONF) + +# if undefined in the conf file, use single dash +SEP?=- + +# bridge name (XXX the same on all nodes) +BRIDGE?=$(SLICE) + +### helper functions +# flip(1) = 2 +# flip(2) = 1 +flip=$(if $(findstring 1,$(1)),2,1) +# cutsep (x-y)-> x y +cutsep=$(subst $(SEP), ,$(1)) +# leftnode (x-y) -> x +leftnode=$(word 1,$(call cutsep,$(1))) +# rightnode (x-y) -> y +rightnode=$(word 2,$(call cutsep,$(1))) +# linkpart(x@y) = x +linkpart=$(word 1,$(subst @, ,$(1))) +# endpart(x@y) = y +endpart=$(word 2,$(subst @, ,$(1))) +# get(x-y@1) = x +# get(x-y@2) = y +get=$(word $(call endpart,$(1)),$(call cutsep,$(call linkpart,$(1)))) +# opp(x-y@1) = x-y@2 +# opp(x-y@2) = x-y@1 +opp=$(call linkpart,$(1))@$(call flip,$(call endpart,$(1))) +# rget(x-y@1) = y +# rget(x-y@2) = x +rget=$(call get,$(call opp,$(1))) +### +default=$(if $($(1)_$(2)),$($(1)_$(2)),$($(1))) +solve=$(HOST_$(1)) +solve_ip=$(IP_$(1)) +# can be redefined in conf.mk if that's not the expected behaviour +display?=host $(1) aka $(call solve,$(1)) + +# log file name +log=$(addprefix log/,$(notdir $(1))) + +#################### set variables after conf.mk +ifeq "$(SSH_KEY)" "" +SSH_KEY_OPTION ?= +else +SSH_KEY_OPTION ?= -i $(SSH_KEY) +endif + +SSH_OPTIONS ?= $(SSH_KEY_OPTION) -l $(SLICE) +SSH = ssh $(SSH_OPTIONS) + +SUDO ?= sudo -S + +ALL_NODE_IDS=$(sort $(foreach link,$(LINKS),$(call leftnode,$(link))) $(foreach link,$(LINKS),$(call rightnode,$(link)))) +ALL_NODE_HOSTS=$(foreach id,$(ALL_NODE_IDS),$(call solve,$(id))) +ALL_LINK_IDS=$(addsuffix @1,$(LINKS)) $(addsuffix @2,$(LINKS)) +CONF_NODE_IDS=$(subst HOST_,,$(filter HOST_%,$(.VARIABLES))) + +#################### +init_all: init all +init: + @[ -d L ] || ( echo Creating tmp dir L; mkdir L) + @[ -d log ] || (echo Creating tmp dir log; mkdir log) + @[ -d cache ] || ( echo Creating tmp dir cache; mkdir cache) +.PHONY: init_all init + +FORCE: + +.SECONDARY: + +LINKTARGETS=$(addprefix L/,$(LINKS)) +all: $(LINKTARGETS) +.PHONY: all + +# could also do make ++SLICE +showslice: ++SLICE FORCE + +shownodes: + @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id));) +showhostnames: ++ALL_NODE_HOSTS +# xxx missing: something that outputs raw python stuff like +# SEND_IP="10.0.16.1" +# that would belong in the export to OF +showips: + @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id)) has ip/network set to $(IP_$(id));) +showlinks: + @$(foreach link,$(LINKS), echo $(call display,$(call leftnode,$(link))) '====>' $(call display,$(call rightnode,$(link)));) +showcontrollers: + @$(foreach id,$(ALL_NODE_IDS), echo $(call display,$(id)) has controller $(call default,CONTROLLER,$(id));) +.PHONY: shownodes showhostnames showips showlinks showcontrollers + +sshchecks: $(foreach id,$(ALL_NODE_IDS),sshcheck-$(id)) +.PHONY: sshchecks +ovsversions: $(foreach id,$(ALL_NODE_IDS),ovsversion-$(id)) +.PHONY: ovsversions +# more general form; run with make sshs SSH-COMMAND="bla bla bla" +sshs: $(foreach id,$(ALL_NODE_IDS),ssh-$(id)) +.PHONY: sshs +showdpids: $(foreach id,$(ALL_NODE_IDS),showdpid-$(id)) +.PHONY: showdpids +showmacs: $(foreach id,$(ALL_NODE_IDS),showmac-$(id)) +.PHONY: showmacs +showports: $(foreach id,$(ALL_NODE_IDS),showports-$(id)) +.PHONY: showports + +DBS=$(foreach id,$(ALL_NODE_IDS),cache/db.$(id)) +dbs: $(DBS) +.PHONY: dbs + +SWITCHS=$(foreach id,$(ALL_NODE_IDS),cache/switch.$(id)) +switchs: $(SWITCHS) +.PHONY: switchs + +start: dbs switchs +.PHONY: start + +stop:$(foreach id,$(ALL_NODE_IDS),cache/stop.$(id)) +.PHONY: stop + +status:$(foreach id,$(ALL_NODE_IDS),cache/status.$(id)) +.PHONY: status + +BRIDGES=$(foreach id,$(ALL_NODE_IDS),cache/bridge.$(id)) +bridges: $(BRIDGES) +.PHONY: bridges + +CONTROLLERS=$(foreach id,$(ALL_NODE_IDS),cache/controller.$(id)) +controllers: $(CONTROLLERS) +.PHONY: controllers + +del-controllers: $(foreach id,$(ALL_NODE_IDS),del-controller-$(id)) +.PHONY: del-controllers + +### node-oriented targets +# check ssh connectivity +sshcheck-%: FORCE + @if $(SSH) $(HOST_$*) hostname > /dev/null 2>&1; then echo "ssh on" $(call display,$*) "OK" ; \ + else echo "ssh on" $(call display,$*) "KO !!!"; fi + +ovsversion-%: FORCE + @OVSVERSION=$$($(SSH) $(HOST_$*) rpm -q sliver-openvswitch); echo $(call display,$*) has $$OVSVERSION + +ssh-%: FORCE + @$(SSH) $(HOST_$*) $(SUDO) $(SSH-COMMAND) + +showdpid-%: FORCE + @echo $*_DPID=0x$$( $(SSH) $(HOST_$*) $(SUDO) ovs-vsctl get bridge $(BRIDGE) datapath_id | sed -e 's,",,g') + +showmac-%: FORCE + @echo $*_MAC=\"$$( $(SSH) $(HOST_$*) $(SUDO) sliver-ovs get-mac $(BRIDGE))\" + +showports-%: FORCE + @$(SSH) $(HOST_$*) $(SUDO) ovs-ofctl show $(BRIDGE) \ + | perl -nle '/(\d+)\(L(.*?)\Q$(SEP)\E(.*?)\):/ && \ + print "PORT_".("$$2" eq "$*" ? "$$2_$$3" : "$$3_$$2")."=$$1"' +# should probably replace sshcheck +cache/status.%: FORCE + @echo "=== DB and SWITCH processes on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs status ||: + +cache/host.%: + @echo "IP lookup for $(call display,$*)" + @host $(HOST_$*) | sed -n 's/^.*has address *//p' > $@ + +cache/db.%: + @echo "Starting db server on $(call display,$*) - logs in $(call log,$@)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-db > $(call log,$@) 2>&1 + @touch $@ + +cache/switch.%: | cache/db.% + @echo "Starting vswitchd on $(call display,$*) - logs in $(call log,$@)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-switch > $(call log,$@) 2>&1 + @touch $@ + +cache/bridge.%: | cache/db.% + @echo "Creating bridge on $(call display,$*) - logs in $(call log,$@)" + @$(SSH) $(HOST_$*) $(SUDO) \ + sliver-ovs create-bridge $(BRIDGE) $(IP_$*) $(call default,BROPTIONS,$*) > $(call log,$@) 2>&1 + @{ echo "IP_$*=$(IP_$*)"; echo "BROPTIONS_$*=$(call default,BROPTIONS,$*)"; } > $@ + +cache/controller.%: cache/bridge.% + @echo "Setting controller $(call default,CONTROLLER,$*) on $(call display,$*) - logs in $(call log,$@)" + @$(SSH) $(HOST_$*) $(SUDO) ovs-vsctl set-controller $(BRIDGE) $(call default,CONTROLLER,$*) > $(call log,$@) 2>&1 + @echo "CONTROLLER_$*=$(call default,CONTROLLER,$*)" > $@ + +# xxx this probably needs a more thorough cleanup in cache/ +cache/stop.%: del-bridge.% + @echo "Stopping switch & db on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop && rm cache/switch.$* cache/db.$* + +### link-oriented targets +# L/-: +# Establish a link between nodes and +L/%: cache/endpoint.%@1 cache/endpoint.%@2 + @touch $@ + @echo "Link $* is up" + +# U/- +# Tear down the link between nodes and +U/%: del-iface.%@1 del-iface.%@2 + @rm -f L/$* + @echo "Deleted link $*" + +# del-bridge.: Delete the bridge on node . +# +# We can do this only if the db on node is running, but +# we don't need to re-delete if the db is restarted (hence the +# order-only dependency). +# +# Deleting a bridge also deletes all interfaces of the bridge +# as a side effect. This in turn invalidates local tunnel +# port numbers and endpoint info on both sides of each affected tunnel. +# The corresponding links obviously go down. +# Controller information is also lost. +# We invalidate the cache accordingly. +del-bridge.%: | cache/db.% + @echo "Deleting bridge on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs del-bridge $(BRIDGE) + @rm -f cache/bridge.$* \ + cache/iface.$*$(SEP)*@1 cache/iface.*$(SEP)$*@2 \ + cache/port.$*$(SEP)*@1 cache/port.*$(SEP)$*@2 \ + cache/endpoint.$*$(SEP)*@? cache/endpoint.*$(SEP)$*@? \ + L/$*$(SEP)* L/*$(SEP)$* \ + cache/controller.$* + +# del-switch.: Stops the switch daemon on . +# +# As a side effect, the local port numbers of the tunnels +# are no longer valid (they will change when the daemon is restarted) +# and, therefore, the endpoint info on the remote side of +# the tunnels must be invalidated. The links also go down. +# Controller information is also lost. +# We invalidate the cache accordingly. +del-switch.%: + @echo "Shutting down switch on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-switch + @rm -f cache/switch.$* \ + cache/port.$*$(SEP)*@1 cache/port.*$(SEP)$*@2 \ + cache/endpoint.$*$(SEP)*@2 cache/endpoint.*$(SEP)$*@1 \ + L/$*$(SEP)* L/*$(SEP)$* \ + cache/controller.$* + +# del-db.: Stops the db daemon on . +# +# This has no additional side effects. +del-db.%: + @echo "Shutting down db on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-db + @rm -f cache/db.$* + +# del-controller.: +# Detaches from the controller. +# +# This has no additional side effects. +del-controller-%: | cache/db.% + @echo "Removing controller for $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs del-controller $(BRIDGE) + @rm -f cache/controller.$* + +# del-links: Shortcut to delete all currently up links. +del-links: $(addprefix U/,$(notdir $(wildcard L/*))) + +# del-switchs: Shortcut to stop all currently running switch daemons. +del-switchs: $(addprefix del-,$(notdir $(wildcard cache/switch.*))) + +# del-dbs: Shortcut to stop all currently running db daemona.s +del-dbs: $(addprefix del-,$(notdir $(wildcard cache/db.*))) + +# shutdown: Shortcut to stop all currently running daemons. +shutdown: del-switchs del-dbs + +.PHONY: del-links del-switchs del-dbs shutdown + + +### snapshots +snapshot: + @echo BRIDGE=$(BRIDGE) + @cat cache/bridge.* 2>/dev/null || : + @cat cache/controller.* 2>/dev/null || : + @ls L | sed 's/^/LINKS += /' + +remote-snapshot: remote-snapshot-ips remote-snapshot-links + @true + +remote-snapshot-ips: $(addprefix cache/rsnap.ip.,$(CONF_NODE_IDS)) + @cat /dev/null $^ + +remote-snapshot-links: $(addprefix cache/rsnap.links.,$(CONF_NODE_IDS)) + @sort -u /dev/null $^ + +.PHONY: remote-snapshot remote-snapshot-ips remote-snapshot-links + + +cache/rsnap.ip.%: FORCE + @$(SSH) $(HOST_$*) $(SUDO) \ + sliver-ovs get-local-ip $(BRIDGE) | sed 's/^/IP_$*=/' > $@ + +cache/rsnap.links.%: FORCE + @$(SSH) $(HOST_$*) $(SUDO) \ + sliver-ovs get-local-links $(BRIDGE) | sed -n 's/^L/LINKS += /p' > $@ + +### update sliver-ovs +update: $(addprefix update-,$(CONF_NODE_IDS)) + @true + +update-%: FORCE + @[ -n "$(SLIVER_OVS)" ] || { echo "SLIVER_OVS not set" >&2; exit 1; } + @[ -f "$(SLIVER_OVS)" ] || { echo "$(SLIVER_OVS) not found" >&2; exit 1; } + @echo "Sending $(SLIVER_OVS) to $(call display,$*)" + @scp $(SSH_KEY_OPTION) -q $(SLIVER_OVS) $(SLICE)@$(call solve,$*): + @$(SSH) $(call solve,$*) $(SUDO) mv $(notdir $(SLIVER_OVS)) /usr/sbin/sliver-ovs + +.SECONDEXPANSION: + +# del-iface.-@: +# Deletes the interface of link - on either +# or , according to . +# +# We need a running db daemon to do this, but we do not have to +# redo the delete if the db daemon is restarted. +# +# This also invalidates the local port of the tunnel and the endpoint +# info on both sides of the tunnel. The link goes down. +del-iface.%: | cache/db.$$(call get,%) + @echo "Removing interface for link $(call linkpart,$*) from $(call get,$*)" + @$(SSH) $(HOST_$(call get,$*)) \ + $(SUDO) sliver-ovs del-port L$(call linkpart,$*) + @rm -f cache/iface.$* \ + cache/port.$* cache/endpoint.$* cache/endpoint.$(call opp,$*) \ + L/$(call linkpart,$*) + + +### '%' here is leftid-rightid@{1,2} +# we retrieve % as $(*F) +#linkid=$(call linkpart,%) +#nodeid=$(call get,%) +#bridgefile=cache/bridge.$(nodeid) + +# cache/iface.-@: +# Creates the interface for link - on +# >. +# +# The bridge of the local node must already exist, and we need to create +# the interface again if the bridge is re-created. +# We also need a running db daemon, but we do not need to do anything +# if the db daemon is restarted. +cache/iface.%: cache/bridge.$$(call get,%) | cache/db.$$(call get,%) + @echo "Creating interface for link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $(call log,$@)" + @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs create-port $(BRIDGE) \ + L$(call linkpart,$(*F)) > $(call log,$@) 2>&1 + @touch $@ + +# cache/port.-@: +# Retrieves the local port of link - on +# node >. +# +# The local interface must have been created already and the +# switch daemon must be running. We need to retrieve the port +# number again if the interface is re-created, or the switch +# daemon is restarted. +cache/port.%: cache/iface.% cache/switch.$$(call get,%) + @echo "Getting port number for link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $(call log,$@)" + @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) \ + sliver-ovs get-local-endpoint L$(call linkpart,$(*F)) > $@ 2> $(call log,$@) + + +# linkid=$(call linkpart,%) +# nodeid=$(call get,%) +# iface1=cache/iface.% +# iface2=cache/iface.$(call opp,%) + +# cache/endpoint.-@: +# Sets the other side (IP address, UDP port) info for link - +# on >. +# +# We need the IP address and the UDP port of the other side and the interace of this side. +# We need to set the info again if any of these change. +cache/endpoint.%: cache/host.$$(call rget,%) cache/port.$$(call opp,%) cache/iface.% | cache/db.$$(call get,%) + @echo "Setting port number of link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $(call log,$@)" + @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs set-remote-endpoint L$(call linkpart,$(*F)) \ + $$(cat cache/host.$(call rget,$(*F))) \ + $$(cat cache/port.$(call opp,$(*F))) 2> $(call log,$@) + @touch $@ + + +###################### +# testing +###################### + +test: $(foreach l,$(LINKS),test-$(l)) + +test-%: ping-% ping-$$(call get,$$*@2)$(SEP)$$(call get,$$*@1) + @true + +ping-%: FORCE + @echo "Testing connectivity $(call get,$*@1) ===> $(call get,$*@2) - logs in $(call log,$@)" + @$(SSH) $(call solve,$(call get,$*@1)) ping -c 1 $(call solve_ip_addr,$(call get,$*@2)) > $(call log,$@) 2>&1 + @echo "Connectivity $(call get,$*@1) ===> $(call get,$*@2) OK" + +#################### +CLEANTARGETS=$(addprefix del-,$(notdir $(wildcard cache/bridge.*))) +clean: $(CLEANTARGETS) +distclean: + rm -rf L log cache +.PHONY: clean distclean + +#################### +graph.dot: + ( echo "digraph $(SLICE) {"; ls L | sed 's/$(SEP)/->/;s/$$/;/'; echo "}" ) > $@ +graph.ps: graph.dot + dot -Tps < $^ > $@ + +#################### +# googlemap stuff +# Uses: +# GMAP_SERVER, the hostname that runs an ndnmap instance +# GMAP_PROBES, a list of tuples of the form +# === +# e.g. +# SENDER-MUX=SENDER=3=0.5 +# which would mean, the link 'SENDER-MUX' should be monitored at the 'SENDER' node and is coded +# in links.json with id=3, every half second +# sliver-ovs gmap-probe is designed to run in background, +# and so that a new run will kill any previously running instance +gprobes: $(foreach probe,$(GMAP_PROBES),gprobe-$(probe)) + +gprobe-%: FORCE + $(SSH) -n $(call solve,$(word 2,$(subst @, ,$(*F)))) $(SUDO) \ + sliver-ovs gmap-probe L$(word 1,$(subst @, ,$(*F))) $(GMAP_SERVER) $(word 3,$(subst @, ,$(*F))) $(word 4,$(subst @, ,$(*F))) + +jsons: routers.json links.json geocode.json +.PHONE: jsons + +routers.json: $(CONF) + @{ echo "["; \ + sep=" "; \ + for n in $(CONF_NODE_IDS); do \ + echo " $$sep\"$$n\""; \ + sep=", "; \ + done; \ + echo "]"; \ + } > $@ + +links.json: $(foreach l,$(LINKS),cache/json.$(l)) + @{ echo "["; \ + cat $^ | sed '2,$$s/^/, /'; \ + echo "]"; \ + } > $@ + +cache/json.%: $(CONF) + @echo "{\"id\": $($*_linkid), \"start\": \"$(call get,$*@1)\", \"end\": \"$(call get,$*@2)\"}" > $@ + +geocode.json: $(foreach n,$(CONF_NODE_IDS),cache/geocode.$(n)) + @{ echo "{"; \ + sep=" "; \ + for f in $^; do \ + echo -n "$$sep"; \ + cat $$f; \ + sep=", "; \ + done; \ + echo "}"; \ + } > $@ + +cache/geocode.%: cache/loc.% $(CONF) + @{ echo "\"$*\": {"; \ + echo " \"name\": \"$(HOST_$*)\","; \ + echo " \"shortname\": \"$*\","; \ + echo " \"site\": \"\","; \ + echo " \"backbone\": false,"; \ + echo " \"position\": [ $(shell cat cache/loc.$*) ]"; \ + echo " }"; \ + } > $@ + +cache/loc.%: $(CONF) + @wget -O - 'http://freegeoip.net/csv/$(HOST_$*)' | \ + awk -F, '{ printf "%s, %s\n", $$8, $$9 }' > $@ + +#################### +# 'virtual' targets in that there's no real file attached +define node_shortcuts +sshcheck.$(1): sshcheck-$(1) FORCE +db.$(1): cache/db.$(1) FORCE +switch.$(1): cache/switch.$(1) FORCE +start.$(1): cache/start.$(1) FORCE +stop.$(1): cache/stop.$(1) FORCE +status.$(1): cache/status.$(1) FORCE +bridge.$(1): cache/bridge.$(1) FORCE +host.$(1): cache/host.$(1) FORCE +controller.$(1): cache/controller.$(1) FORCE +# switch already depends on db, but well +cache/start.$(1): cache/db.$(1) cache/switch.$(1) FORCE +endef + +$(foreach id,$(ALL_NODE_IDS), $(eval $(call node_shortcuts,$(id)))) + +define link_shortcuts +iface.%: cache/iface.% +endpoint.%: cache/endpoint.% +endef + +$(foreach id,$(ALL_LINK_IDS), $(eval $(call link_shortcuts,$(id)))) + +help: + @cat Makefile.help + +#################### convenience, for debugging only +# make +foo : prints the value of $(foo) +# make ++foo : idem but verbose, i.e. foo=$(foo) +++%: varname=$(subst +,,$@) +++%: + @echo "$(varname)=$($(varname))" ++%: varname=$(subst +,,$@) ++%: + @echo "$($(varname))" + +# external nodes and links +# +ALL_LINKS := $(LINKS) $(patsubst L/%,%,$(filter L/%,$(MAKECMDGOALS))) +EXTERNAL_LINKS := $(filter $(foreach host,$(EXTERNAL_HOSTS),%-$(host)),$(ALL_LINKS)) + +$(foreach host,$(EXTERNAL_HOSTS),sshcheck-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),ovsversion-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),showdpid-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),showmac-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),showports-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),update-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),del-controller-$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),cache/status.$(host)): ; +$(foreach host,$(EXTERNAL_HOSTS),cache/controller.$(host)): ; + +$(foreach host,$(EXTERNAL_HOSTS),cache/rsnap.ip.$(host)): ; + @touch $@ + +$(foreach host,$(EXTERNAL_HOSTS),cache/rsnap.links.$(host)): ; + @touch $@ + +$(foreach host,$(EXTERNAL_HOSTS),cache/db.$(host)): + @touch $@ + +$(foreach host,$(EXTERNAL_HOSTS),del-db.$(host)): del-db.%: + @rm cache/db.$* + +$(foreach host,$(EXTERNAL_HOSTS),cache/switch.$(host)): ; + @touch $@ + +$(foreach host,$(EXTERNAL_HOSTS),del-switch.$(host)): del-switch.%: + @rm -f cache/switch.$* \ + cache/port.$*$(SEP)*@1 cache/port.*$(SEP)$*@2 \ + cache/endpoint.$*$(SEP)*@2 cache/endpoint.*$(SEP)$*@1 \ + L/$*$(SEP)* L/*$(SEP)$* \ + cache/controller.$* + +.SECONDEXPANSION: + +$(foreach host,$(EXTERNAL_HOSTS),cache/bridge.$(host)): cache/bridge.%: | cache/db.% + @echo "Creating fake bridge on external host $(call display,$*)" + @touch $@ + +$(foreach link,$(EXTERNAL_LINKS),cache/iface.$(link)@2): cache/iface.%: cache/bridge.$$(call get,%) | cache/db.$$(call get,%) + @echo "Creating fake interface for link $(call linkpart,$*) on external host $(call get,$*)" + @touch $@ + +$(foreach link,$(EXTERNAL_LINKS),cache/port.$(link)@2): cache/port.%: cache/iface.% cache/switch.$$(call get,%) + @echo "Getting port number for link $(call linkpart,$(*F)) on external host $(call display,$(call get,$(*F)))" + @echo $(call default,EXTERNAL_PORT,$(call rightnode,$*)) > $@ + + +$(foreach link,$(EXTERNAL_LINKS),del-iface.$(link)@2): del-iface.%: | cache/db.$$(call get,%) + @echo "Removing fake interface for link $(call linkpart,$*) from external host $(call get,$*)" + @rm -f cache/iface.$* \ + cache/port.$* cache/endpoint.$* cache/endpoint.$(call opp,$*) \ + L/$(call linkpart,$*) + +$(foreach host,$(EXTERNAL_HOSTS),del-bridge.$(host)): del-bridge.%: | cache/db.% + @echo "Deleting fake bridge on external host $(call display,$*)" + @rm -f cache/bridge.$* \ + cache/iface.$*$(SEP)*@1 cache/iface.*$(SEP)$*@2 \ + cache/port.$*$(SEP)*@1 cache/port.*$(SEP)$*@2 \ + cache/endpoint.$*$(SEP)*@? cache/endpoint.*$(SEP)$*@? \ + L/$*$(SEP)* L/*$(SEP)$* \ + cache/controller.$* + +$(foreach link,$(EXTERNAL_LINKS),ping-$(call get,$(link)@2)$(SEP)$(call get,$(link)@1)): ping-%: FORCE + @echo "Test from external host $(call get,$*@1) to $(call get,$*@2) skipped" + +# we assume that a program called tunproxy is available on the +# external nodes. +$(foreach link,$(EXTERNAL_LINKS),cache/endpoint.$(link)@2): cache/endpoint.%@2: cache/port.%@1 cache/host.$$(call leftnode,%) + @#echo $* [$<] [$^] $(call rightnode,$*) + @echo "===>" $(call rightnode,$*): \ + ./tunproxy -t $$(cat cache/host.$(call leftnode,$*)):$$(cat cache/port.$*@1) \ + -p $(call default,EXTERNAL_PORT,$(call rightnode,$*)) -e -d + diff --git a/planetlab/exp-tool/Makefile.help b/planetlab/exp-tool/Makefile.help new file mode 100644 index 000000000..d0f53cf5e --- /dev/null +++ b/planetlab/exp-tool/Makefile.help @@ -0,0 +1,84 @@ +in almost all cases below it is recommended to use the -j option to make +which was the point with uing make in the first place +so that maximum degree of parallelism can be reached + +********************************************************** +Note: to get the greatest speedup from make -j you should also enable +connection reuse in your ssh setup, e.g., by having the following lines +in your .ssh/config: +host * + ControlMaster auto + ControlPath ~/.ssh/ssh_mux_%h_%p_%r + ControlPersist 4h + +Please note, however, that maximum concurrent per-node reuse is usually +set to something low (typically 10), and this is a problem if some node +in your topology has many links. In this case you should raise that limit +on the slivers, or limit makefile concurrency (e.g., by using make -j10). +********************************************************** + +==================== usual target +make [init+all] + shortcut for 'make init all' +make init + create util subdirs needed by this tool +make -j all + initialize the whole topology on all nodes +==================== inspect donfig +make showslice + show SLICE as defined in conf.mk +make shownodes +make showhostnames +make showips +make showlinks + show nodes and links declared in conf.mk in various formats +make sshchecks +make ovsversions + try out ssh connectivity to slivers in the slice, or show installed ovs version +make sshs SSH-COMMAND="bla bla bla" + run "bla bla bla" in all slivers (and under sudo) +==================== For changing the topology on the fly +make L/- + create a link between nodes with ids and +make U/- + delete link between nodes with ids and +==================== cleanup +make shutdown + tear down everything on the slivers, stops daemons and all +make clean + clean up whatever has been created on the slivers, keeps daemons running +make distclean + clean up local cache, useful when restarting from a clean slice +==================== configure (direct switches towards) OF controllers +make showcontrollers + show how OF controllers are configured from conf.mk + use CONTROLLER_ if set, or CONTROLLER as a default otherwise + e.g. CONTROLLER=tcp:112.34.23.56:6633 + +make controllers + configure the various switches so they use the configured controllers +make del-controllers + reset the switches to run in standalone mode +==================== devel +make update + push a new version of sliver-ovs into the slivers + use SLIVER_OVS that should point to the local new version +==================== manually/progressively deploy stuff (done in make all) +make dbs + initialize ovs db in all slivers +make switchs + start ovs switch in all slivers +make bridges + create a bridge interface linked to tap device in each sliver +make start + shortcut for make dbs switchs +make stop + stop all ovs switch in all slivers +make status + list status of db and switch processes in all slivers +==================== +make gprobes + Uses GMAP_SERVER and GMAP_PROBES to run probes in selected slivers + that can update a googlemap server for animating link speeds + This of course is mostly a demo thing, based on a one-host ndnmap deployment for now +==================== diff --git a/planetlab/exp-tool/README b/planetlab/exp-tool/README new file mode 100644 index 000000000..5494c0cc1 --- /dev/null +++ b/planetlab/exp-tool/README @@ -0,0 +1,185 @@ +* Introduction + +The Makefile contained in this directory can be used by an +experimenter to dynamically create an overlay network in a PlanetLab +slice, using the sliver-openvswitch distribution. + +The overlay network supported by the Makefile may consist of: + +- at most one Open vSwitch bridge per sliver; +- at most a pair of tunnels between each pair of slivers. + +(Please note that these limitations are due to the simple naming scheme +adopted by the Makefile, and are not limitations of sliver-openvswitch.) + +Each bridge is connected to a tap device in the sliver. The tap device +has an IP address chosen by the experimenter. The idea is to connect +all the tap devices through the overlay network made up of Open vSwitch +bridges and tunnels among them. + +Please note that the tap device IP address is required for technical +reasons, even if you do not plan to use it. + + +* Installation + +In new slices, sliver-openvswitch comes preinstalled together with +the vsys scripts it needs. All you have to do is ask your administrator +for a private IP subnet for your slice. + +On older slices you may have to install sliver-openvswitch on each sliver +and obtain the following tags: + +NAME VALUE +vsys fd_tuntap +vsys vif_up +vsys vif_down +vsys promisc +vsys_net (some subnet) + + +On the experimenter box we need: + +- GNU make +- the openssh client +- the host program (usually distributed in bind-tools) +- (optionally) the dot program from the graphviz distribution + +Then, we can simply copy the Makefile in a working directory on the +experimenter box + +$ mkdir work +$ cp /path/to/Makefile work +$ cd work +$ make init + +The last command creates some subdirectories that are later used by the Makefile. + + +* Example usage + +Assume we have a PlanetLab slice called 'example_slice' which +contains four nodes: + +1) onelab7.iet.unipi.it +2) planet2.elte.hu +3) planetlab2.ics.forth.gr +4) planetlab2.urv.cat + + +Assume we have obtained subnet 10.0.9.0/24 for our slice. We are +going to build the following overlay network: + + 10.0.9.1/24 10.0.9.2/24 10.0.9.3/24 + 1 ----------- 2 ------------ 3 + | + | + | + 4 + 10.0.9.4/24 + + +In the same directory were we have put the Makefile we create a 'conf.mk' +file containing the following variables: + +---------- +SLICE=example_slice +HOST_1=onelab7.iet.unipi.it +IP_1=10.0.9.1/24 +HOST_2=planet2.elte.hu +IP_2=10.0.9.2/24 +HOST_3=planetlab2.ics.forth.gr +IP_3=10.0.9.3/24 +HOST_4=planetlab2.urv.cat +IP_4=10.0.9.4/24 + +LINKS := +LINKS += 1-2 +LINKS += 2-3 +LINKS += 2-4 +---------- + +NOTE. In this example we have chosen to use numbers (1,2,3,4) as ids +for nodes, you can use any other name that is convenient for you. +See the example files in this directory for an example of this. + + +Then, we can just type: + +$ make -j + +Assuming everything has been setup correctly, this command Will start +the Open vSwitch servers, create the bridges and setup the tunnels. We +can test that network is up by logging into a node and pinging some +other node using the private subnet addresses: + +$ ssh -l example_slice onelab7.iet.unipi.it ping 10.0.9.4 + +Links can be destroyed and created dynamically. Assume we now want the +the topology to match the following one: + + 10.0.9.1/24 10.0.9.2/24 + 1 ----------- 2 + | + | + | + 4 ----------- 3 + 10.0.9.4/24 10.0.9.3/24 + + +We can issue the following commands: + +$ make -j U/2-3 # unlink nodes 2 and 3 +$ make -j L/4-3 # link nodes 4 and 3 + +The current state of the links is represented as a set of files in the 'L' +directory. If dot is installed, we can obtain a graphical representation +of the topology by typing: + +$ make graph.ps + +The current state of the nodes and links can be obtained by typing + +$ make snapshot > snapshot.mk + +The snapshot.mk file follows the same format as conf.mk and can be used +to recreate the topology at a later time: + +$ make CONF=snapshot.mk + +* Command reference + +All targets can be issued with the '-j' flag to (greatly) speed up operations (*) +It may also be useful to use the '-k' flag, so that errors on some nodes do not +stop the setup on the other nodes. + + +all: do wathever is needed to setup all the links in the 'links' file. + +clean: tear down all existing links + +L/N1-N2: setup a link between nodes $HOST_N1 and $HOST_N2 + +U/N1-N2: tear down the link (if it exists) between nodes $HOST_N1 + and $HOST_N2 + +del-bridge.N: delete the bridge running on node $HOST_N (this also tears down + all links that have an endpoint in $HOST_N) + +graph.ps create a postscript file containing a (simple) graphical + representation of the current topology + +==================================================== + +(*) To get the greatest speedup from make -j you should also enable +connection reuse in your ssh setup, e.g., by having the following lines +in your .ssh/config: +host * + ControlMaster auto + ControlPath ~/.ssh/ssh_mux_%h_%p_%r + ControlPersist 4h + +Please note, however, that maximum concurrent per-node reuse is usually +set to something low (typically 10), and this is a problem if some node +in your topology has many links. In this case you should raise that limit +on the slivers, or limit makefile concurrency (e.g., by using make -j10). diff --git a/planetlab/exp-tool/conf.mk.example b/planetlab/exp-tool/conf.mk.example new file mode 100644 index 000000000..e2fdaf832 --- /dev/null +++ b/planetlab/exp-tool/conf.mk.example @@ -0,0 +1,40 @@ +# the slice that you're using +SLICE=inri_sl1 + +# optionally, the OF controller that you'd like to use +# CONTROLLER = tcp:138.96.116.63:6633 +# you need to run 'make controllers' for this to take effect +# you can also specify a controller per node, e.g. +# CONTROLLER_SENDER = +# NOTE that hostnames do not seem to be supported as of this writing + +# optionnally, the related ssh (private) key to use +SSH_KEY=key_user.rsa + +# optionally, you can change the way nodes are displayed +display="$(call solve,$(1))" + +#################### the nodes to use +HOST_SENDER=vnode09.pl.sophia.inria.fr +HOST_MUX=vnode02.pl.sophia.inria.fr +HOST_END1=vnode10.pl.sophia.inria.fr +HOST_END2=vnode07.pl.sophia.inria.fr + +# and their related IP and netmask +# not that all these MUST fall in the vsys_vnet tag as granted by your planetlab operator +# (in this example it is 10.0.100.0/24) + +IP_SENDER=10.0.100.1/24 +IP_MUX=10.0.100.2/24 +IP_END1=10.0.100.3/24 +IP_END2=10.0.100.4/24 + +#################### the links to create +LINKS := +# add one from SENDER to MUX +LINKS += SENDER-MUX +# one from MUX to each of the 2 receivers +LINKS += MUX-END1 +LINKS += MUX-END2 + +#################### diff --git a/planetlab/exp-tool/showgraph b/planetlab/exp-tool/showgraph new file mode 100755 index 000000000..85421cb3e --- /dev/null +++ b/planetlab/exp-tool/showgraph @@ -0,0 +1,7 @@ +make graph.ps +gv -watch -spartan graph.ps >/dev/null 2>&1 & +while : +do + inotifywait -e CREATE -e DELETE L + make graph.ps +done >/dev/null 2>&1 diff --git a/planetlab/exp-tool/tunproxy.txt b/planetlab/exp-tool/tunproxy.txt new file mode 100644 index 000000000..cea99afd2 --- /dev/null +++ b/planetlab/exp-tool/tunproxy.txt @@ -0,0 +1,144 @@ +/* + * tunproxy.c --- small demo program for tunneling over UDP with tun/tap + * + * Copyright (C) 2003 Philippe Biondi + * Copyright (C) 2013 Felician Nemeth + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * http://www.secdev.org/projects/tuntap_udp/files/tunproxy.c + */ + +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PERROR(x) do { perror(x); exit(1); } while (0) +#define ERROR(x, args ...) do { fprintf(stderr,"ERROR:" x, ## args); exit(1); } while (0) + +extern void exit(int); + +void usage() +{ + fprintf(stderr, "Usage: tunproxy -t target_ip:port [-p local_port] [-e]\n"); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in sin, sout, remote; + struct ifreq ifr; + int fd, s, remote_len, remote_port, local_port, l; + unsigned int soutlen; + char c, *p, *remote_ip = 0; + char buf[2000]; + fd_set fdset; + + int TUNMODE = IFF_TUN, DEBUG = 0; + + while ((c = getopt(argc, argv, "t:p:ehd")) != -1) { + switch (c) { + case 'h': + usage(); + case 'd': + DEBUG++; + break; + case 'p': + local_port = atoi(optarg); + break; + case 't': + p = memchr(optarg,':',16); + if (!p) ERROR("invalid argument : [%s]\n",optarg); + *p = 0; + remote_ip = optarg; + remote_port = atoi(p+1); + break; + case 'e': + TUNMODE = IFF_TAP; + break; + default: + usage(); + } + } + if (remote_ip == 0) usage(); + + if ( (fd = open("/dev/net/tun",O_RDWR)) < 0) PERROR("open"); + + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = TUNMODE | IFF_NO_PI; + strncpy(ifr.ifr_name, "toto%d", IFNAMSIZ); + if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) PERROR("ioctl"); + + printf("Allocated interface %s. Configure and use it\n", ifr.ifr_name); + + s = socket(PF_INET, SOCK_DGRAM, 0); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(local_port); + + if ( bind(s,(struct sockaddr *)&sin, sizeof(sin)) < 0) + PERROR("bind"); + + remote_len = sizeof(remote); + memset(&remote, 0, remote_len); + remote.sin_family = AF_INET; + remote.sin_port = htons(remote_port); + remote.sin_addr.s_addr=inet_addr(remote_ip); + + while (1) { + FD_ZERO(&fdset); + FD_SET(fd, &fdset); + FD_SET(s, &fdset); + if (select(fd+s+1, &fdset,NULL,NULL,NULL) < 0) PERROR("select"); + if (FD_ISSET(fd, &fdset)) { + if (DEBUG) + if (write(1,">", 1) < 0) PERROR("write"); + l = read(fd, buf, sizeof(buf)); + if (l < 0) + PERROR("read"); + if (sendto(s, buf, l, 0, (struct sockaddr *)&remote, remote_len) < 0) + PERROR("sendto"); + } + if (FD_ISSET(s, &fdset)) { + if (DEBUG) + if (write(1,"<", 1) < 0) PERROR("write"); + soutlen = sizeof(sout); + l = recvfrom(s, buf, sizeof(buf), 0, (struct sockaddr *)&sout, &soutlen); + if (l == -1) { + if (write(1,"(", 1) < 0) PERROR("write"); + fprintf(stderr, "[%s,%d]", strerror(errno), l); + continue; + } + if ((sout.sin_addr.s_addr != remote.sin_addr.s_addr) || + (sout.sin_port != remote.sin_port)) { + printf("Got packet from %s:%u instead of %s:%u\n", + inet_ntoa(sout.sin_addr), ntohs(sout.sin_port), + inet_ntoa(remote.sin_addr), ntohs(remote.sin_port)); + } + if (write(fd, buf, l) < 0) PERROR("write"); + } + } +} diff --git a/planetlab/scripts/refresh b/planetlab/scripts/refresh new file mode 100755 index 000000000..92b1d5fd3 --- /dev/null +++ b/planetlab/scripts/refresh @@ -0,0 +1,13 @@ +#!/bin/bash +# +# this script is not used by the build but can be convenient +# in a devel environment to ship an experimental sliver-ovs +# into running slices (using e.g. make update) +DIRNAME=$(dirname $0) +cd $DIRNAME +echo Refreshing sliver-ovs from sliver-ovs.in in $(pwd) +sed -e s,@RUNDIR@,/var/run/openvswitch,g \ + -e s,@DBDIR@,/etc/openvswitch,g \ + -e s,@pkgdatadir@,/usr/share/openvswitch,g \ + -e s,@LOGDIR@,/var/log/openvswitch,g \ + sliver-ovs.in > sliver-ovs diff --git a/planetlab/scripts/sliver-ovs.in b/planetlab/scripts/sliver-ovs.in new file mode 100755 index 000000000..edda1961b --- /dev/null +++ b/planetlab/scripts/sliver-ovs.in @@ -0,0 +1,396 @@ +#!/bin/bash +# -*-shell-mode-*- + +### expected to be run as root + +COMMAND=$0 + +#################### global vars +RUN_DIR=@RUNDIR@ +DB_CONF_FILE=@DBDIR@/conf.db +DB_SCHEMA=@pkgdatadir@/vswitch.ovsschema +DB_LOG=@LOGDIR@/ovs-db.log +## +DB_SOCKET=$RUN_DIR/db.sock +## +SWITCH_LOG=@LOGDIR@/ovs-switch.log + +#################### helper functions + +function kill_pltap_ovs () { + killall pltap-ovs 2>/dev/null || : +} + +function error { + echo "$@" >&2 + exit 1 +} + +function get_params { + params=$1; shift + err_msg="$COMMAND $SUBCOMMAND $(echo $params | perl -pe 's/\S+/<$&>/g')" + for p in $params; do + [[ -z "$@" ]] && error "$err_msg" + pname=$(echo -n $p|perl -pe 's/\W/_/g') + eval $pname="$1"; shift + done + [[ -n "$@" ]] && error "$err_msg" +} + +function is_switch_running { + ovs-appctl --target=ovs-vswitchd version >& /dev/null +} + +function is_db_running { + ovs-appctl --target=ovsdb-server version >& /dev/null +} + +function tapname () { + IP=$1; shift + echo $(ip addr show to "$IP/32" | perl -ne '/^\s*\d+:\s*([\w-]+):/ && print $1') +} + +function wait_server () { + pid_file=$1; shift + server_name=$1; shift + timeout=$1; shift + + expire=$(($(date +%s) + $timeout)) + + ## wait for it to be up - xxx todo - could use a timeout of some kind + while [ ! -f "$pid_file" ]; do + echo "Waiting for $server_name to start... $(($expire - $(date +%s)))s left" >&2 + sleep 1; + [ $(date +%s) -ge $expire ] && return 1 + done + cat "$pid_file" +} + +function wait_device () { + local tapname=$1; shift + local timeout=$1; shift + + local expire=$(($(date +%s) + $timeout)) + + while ! ip link show up | egrep -q "^[0-9]+: +$tapname:"; do + echo "Waiting for $tapname to come UP...$(($expire - $(date +%s)))s left" >&2 + sleep 1 + [ $(date +%s) -ge $expire ] && return 1 + done + return 0 +} + +######################################## startup +function start_db () { + get_params "" "$@" + + ## init conf + conf_dir=$(dirname $DB_CONF_FILE) + [ -d $conf_dir ] || mkdir -p $conf_dir + [ -f $DB_CONF_FILE ] || ovsdb-tool create $DB_CONF_FILE $DB_SCHEMA + + ## init run + [ -d $RUN_DIR ] || mkdir -p $RUN_DIR + + ## check + [ -f $DB_CONF_FILE ] || { echo "Could not initialize $DB_CONF_FILE - exiting" ; exit 1 ; } + [ -d $RUN_DIR ] || { echo "Could not initialize $RUN_DIR - exiting" ; exit 1 ; } + + ## run the stuff + if [ ! -f "$RUN_DIR/ovsdb-server.pid" ]; then + ovsdb-server $DB_CONF_FILE \ + --remote=punix:$DB_SOCKET \ + --remote=db:Open_vSwitch,Open_vSwitch,manager_options \ + --private-key=db:Open_vSwitch,SSL,private_key \ + --certificate=db:Open_vSwitch,SSL,certificate \ + --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert \ + --pidfile \ + --log-file=$DB_LOG \ + --detach >& /dev/null + else + echo 'ovsdb-server appears to be running already, *not* starting' + fi + wait_server $RUN_DIR/ovsdb-server.pid ovsdb-server 30 +} + +function start_switch () { + get_params "" "$@" + + # ensure ovsdb-server is running + is_db_running || { echo "ovsdb-server not running" >&2 ; exit 1 ; } + + if [ ! -f "$RUN_DIR/ovs-vswitchd.pid" ] ; then + ovs-vswitchd \ + --pidfile \ + --log-file=$SWITCH_LOG \ + --detach \ + unix:$DB_SOCKET >& /dev/null + else + echo 'ovs-vswitchd appears to be running already, *not* starting' + fi + wait_server $RUN_DIR/ovs-vswitchd.pid ovs-vswitchd 30 +} + +function stop_db () { + ovs-appctl --target=ovsdb-server exit || : +} + +function stop_switch () { + ovs-appctl --target=ovs-vswitchd exit || : +} + +function status () { + pids=$(pgrep '^ovs') + [ -n "$pids" ] && ps $pids +} + +function start () { + start_db + start_switch +} + +function stop () { + stop_switch + stop_db +} + +#################### create functions +function create_bridge () { + + get_params "bridge IP/PREFIX" "$1" "$2" + shift; shift; + + IP=${IP_PREFIX%/*} + PREFIX=${IP_PREFIX#*/} + + W= + if ! is_switch_running; then + # we can create the bridge even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + + set -e + ovs-vsctl --db=unix:$DB_SOCKET $W -- --may-exist add-br "$bridge" \ + -- set bridge "$bridge" datapath_type=planetlab \ + -- set interface "$bridge" options:local_ip="$IP" option:local_netmask="$PREFIX" \ + -- "$@" + + # check that the bridge has actually been created + if [ -z "$W" ]; then + local tap=$(ovs-appctl netdev-pltap/get-tapname "$bridge") + wait_device $tap 10 + fi +} + +function create_port () { + + get_params "bridge port" "$@" + + W= + if ! is_switch_running; then + # we can create the port even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + set -e + ovs-vsctl --db=unix:$DB_SOCKET $W -- --may-exist add-port "$bridge" "$port" \ + -- set interface "$port" type=tunnel +} + +function get_local_endpoint () { + + get_params "local_port" "$@" + + is_switch_running || { echo "ovs-vswitchd not running" >&2; exit 1; } + + set -e + ovs-appctl --target=ovs-vswitchd netdev-tunnel/get-port "$local_port" +} + +function set_remote_endpoint () { + + get_params "local_port remote_ip remote_UDP_port" "$@" + + W= + if ! is_switch_running; then + # we can store the info even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + set -e + ovs-vsctl --db=unix:$DB_SOCKET $W set interface $local_port \ + options:remote_ip=$remote_ip \ + options:remote_port=$remote_UDP_port +} + +function set_controller () { + + get_params "bridge_name controller" "$@" + + # ensure ovs-vswitchd is running + is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; } + + set -e + ovs-vsctl --db=unix:$DB_SOCKET set-controller "$bridge_name" "$controller" +} + +function del_controller () { + + get_params "bridge_name" "$@" + + # ensure ovs-vswitchd is running + is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; } + + set -e + ovs-vsctl --db=unix:$DB_SOCKET del-controller "$bridge_name" +} + +#################### del functions +function del_bridge () { + + get_params "bridge_name" "$@" + + W= + if ! is_switch_running; then + # we can delete the bridge even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + ovs-vsctl --db=unix:$DB_SOCKET $W -- --if-exists del-br $bridge_name +} + +function del_port () { + + get_params "port" "$@" + + W= + if ! is_switch_running; then + # we can delete the port even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + ovs-vsctl --db=unix:$DB_SOCKET $W -- --if-exists del-port "$port" +} + +function show () { + + get_params "" "$@" + + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + + ovs-vsctl --db=unix:$DB_SOCKET show +} + +function get_local_ip () { + + get_params "bridge" "$@" + + set -e + ovs-vsctl --db=unix:$DB_SOCKET br-exists "$bridge" || return + local ip=$(ovs-vsctl get interface "$bridge" options:local_ip) + local netmask=$(ovs-vsctl get interface "$bridge" options:local_netmask) + eval echo $ip/$netmask +} + +function get_local_links () { + + get_params "bridge" "$@" + + set -e + ovs-vsctl --db=unix:$DB_SOCKET br-exists "$bridge" || return + ovs-vsctl --db=unix:$DB_SOCKET list-ifaces "$bridge" +} + +function get_mac () { + + get_params "bridge" "$@" + + set -e + local tap=$(ovs-appctl netdev-pltap/get-tapname "$bridge") + ifconfig "$tap" | awk '/HWaddr/ { print $5 }' +} + +### for demos - connect to an ndnmap deployment to visualize links bandwidth +# this expects 3 arguments +# an interface name, L- based on your ids in conf.mk +# the hostname for a ndnmap deployment +# a linkid, this is the id that this link has in your ndnmap scenario (hard-coded in some json file) +# this one-shot function writes the current statistics onto the ndnmap site +# it needs to be called regularly so that ndnmap can do the bw computations +# would make sense for the caller to redirect stderr onto some relevant location +function gmap_probe_once () { + iface=$1; shift + hostname=$1; shift + linkid=$1; shift + rx_bytes=$(ovs-appctl netdev-tunnel/get-rx-bytes $iface) + tx_bytes=$(ovs-appctl netdev-tunnel/get-tx-bytes $iface) + rx_bits=$(($rx_bytes*8)) + tx_bits=$(($tx_bytes*8)) + now=$(date +%s).$(date +%N) + trigger=http://${hostname}/bw/${linkid}/${now}/${rx_bits}/${tx_bits} +# curl -s -L $trigger | grep -q "Got it" || echo Missed event with $trigger +# echo $trigger + curl -s -L $trigger >& /dev/null +} + +### the front end, manages pid and so on +function gmap_probe () { + iface=$1; shift + hostname=$1; shift + linkid=$1; shift + looptime=$1; shift + [ -z "$looptime" ] && looptime=1 + pid_file=/var/run/openvswitch/gmap-$iface.pid + if [ -f $pid_file ] ; then + pid=$(cat $pid_file) + [ -n "$pid" ] && kill $pid >& /dev/null + rm $pid_file + fi + # close std fds so that ssh invokations can return + exec <&- + exec >&- + while true; do + gmap_probe_once $iface $hostname $linkid + sleep $looptime + done & + # this is the pid for the background process + echo $! > $pid_file +} + +#################### +SUPPORTED_SUBCOMMANDS="start stop status +start_db stop_db start_switch stop_switch +create_bridge create_port del_bridge del_port +show get_local_endpoint set_remote_endpoint +set_controller del_controller gmap_probe +get_local_ip get_local_links get_mac" + +function main () { + message="Usage: $COMMAND ... +Supported subcommands are (dash or underscore is the same): +$SUPPORTED_SUBCOMMANDS" + [[ -z "$@" ]] && error "$message" + + SUBCOMMAND=$1; shift + # support dashes instead of underscores + SUBCOMMAND=$(echo $SUBCOMMAND | sed -e s,-,_,g) + found="" + for supported in $SUPPORTED_SUBCOMMANDS; do [ "$SUBCOMMAND" = "$supported" ] && found=yes; done + + [ -z "$found" ] && error $message + + $SUBCOMMAND "$@" +} + +main "$@" diff --git a/sliver-openvswitch.spec b/sliver-openvswitch.spec new file mode 100644 index 000000000..360f60d1a --- /dev/null +++ b/sliver-openvswitch.spec @@ -0,0 +1,106 @@ +%define name sliver-openvswitch +# to check for any change: +# grep AC_INIT configure.ac +%define version 1.11.90 +%define taglevel 1 + +%define debug_package %{nil} + +%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} + +Vendor: OneLab +Packager: OneLab +Distribution: PlanetLab %{plrelease} +URL: %{SCMURL} +# Dependencies +# mar 2013 - because of the move to f18 I have to turn off auto requires +# this is because rpm would otherwise find deps to /bin/python and /bin/perl +# In other modules I was able to solve this by referring to /usr/bin/python +# instead of just python in the builds scripts, but here it looks too complex +AutoReq: no + +Summary: Openvswitch modified for running from a PlanetLab sliver +Name: %{name} +Version: %{version} +Release: %{release} +License: GPL +Group: System Environment/Applications +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot +Source0: sliver-openvswitch-%{version}.tar.gz + +%description +Openvswitch tuned for running within a PlanetLab sliver + +%prep +%setup -q + +%build +./boot.sh +# let's be as close as the regular linux/fedora layout +./configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var --with-logdir=/var/log +make + +%install +make install DESTDIR=$RPM_BUILD_ROOT + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +/usr/bin/* +/usr/sbin/* +/usr/share/openvswitch +/usr/share/man + +%post + +%postun + +%changelog +* Sat Jul 06 2013 Giuseppe Lettieri - sliver-openvswitch-1.11.90-1 +- merge with mainstream + +* Sat Jul 06 2013 Giuseppe Lettieri - sliver-openvswitch-1.10.90-3 +- merge with mainstream + +* Wed May 01 2013 Giuseppe Lettieri - sliver-openvswitch-1.10.90-2 +- - fixed several bugs in the external-nodes support in exp-tool/Makefile +- - let sliver-ovs return an error if tap device configuration failed + +* Mon Apr 22 2013 Thierry Parmentelat - sliver-openvswitch-1.10.90-1 +- merged with upstream (develoment version 1.10.90) +- integrated ALLEGRA contributions for the termination of virtual cables in external nodes. + +* Fri Feb 22 2013 Thierry Parmentelat - sliver-openvswitch-1.9.90-3 +- pulled mainstream - amazingly this is still known as 1.9.90 despite the size of changes + +* Fri Dec 21 2012 Thierry Parmentelat - sliver-openvswitch-1.9.90-2 +- merged with upstream +- handling of promisc &up/down flags for tap devices +- small improvements to the Makefile + +* Fri Nov 23 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-6 +- fixes in the exp-tool makefile (bash redirections, scp with key..) + +* Tue Oct 16 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-5 +- numerous additional make targets for finer control (use make help) +- including gprobe for reporting traffic to an ndnmap instance +- related, more functions in sliver-ovs as well, like exposing +- detailed info (mac, dpids..) relevant to the OF controller +- retrieving rx_bytes/tx_bytes (fixed) accessible through ovs-appctl + +* Fri Sep 28 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-4 +- fix file descriptor leaks + +* Fri Sep 28 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-3 +- can specify OpenFlow controller ip/port for each ovs instance +- through $(CONTROLLER_), or $(CONTROLLER) by default + +* Thu Sep 27 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-2 +- add/skip packet information on tap send/recv + +* Wed Sep 26 2012 Thierry Parmentelat - sliver-openvswitch-1.8.90-1 +- merged mainstream 1.8.90 +- planetlab extensions to the openvswitch: single helper command tool 'sliver-ovs' in /usr/sbin +- planetlab exp-tool : single config file (conf.mk) +- planetlab exp-tool : can retrieve and save current topology