From: Giuseppe Lettieri Date: Thu, 13 Sep 2012 09:16:45 +0000 (+0200) Subject: Merge branch 'master' of git://openvswitch.org/openvswitch X-Git-Tag: sliver-openvswitch-1.8.90-0~15 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=e8d780af73b8571f5998cae8d3fa97069e49c9fe;hp=225b582a8c218eec242921b0eed291cf6ec19b76;p=sliver-openvswitch.git Merge branch 'master' of git://openvswitch.org/openvswitch --- diff --git a/.gitignore b/.gitignore index 702cc6ca1..cc8cb2325 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ Module.symvers TAGS cscope.* tags +myexp/ diff --git a/.non-distfiles b/.non-distfiles new file mode 100644 index 000000000..0c43af554 --- /dev/null +++ b/.non-distfiles @@ -0,0 +1,7 @@ +planetlab/exp-tool +planetlab/exp-tool/Makefile +planetlab/exp-tool/showgraph +planetlab/exp-tool/README +sliver-openvswitch.spec +.gitignore +.non-distfiles diff --git a/Makefile.am b/Makefile.am index 1b148719f..932f495f0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -145,7 +145,7 @@ dist-hook-git: distfiles (cd datapath && $(MAKE) distfiles); \ (cat distfiles; sed 's|^|datapath/|' datapath/distfiles) | \ sort -u > all-distfiles; \ - (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \ + (cd $(srcdir) && git ls-files) | grep -vFf $(srcdir)/.non-distfiles | \ sort -u > all-gitfiles; \ comm -1 -3 all-distfiles all-gitfiles > missing-distfiles; \ if test -s missing-distfiles; then \ @@ -223,3 +223,4 @@ include rhel/automake.mk include xenserver/automake.mk include python/automake.mk include python/compat/automake.mk +include planetlab/automake.mk diff --git a/lib/automake.mk b/lib/automake.mk index 94b86f68c..a7f469c8f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -88,6 +88,7 @@ lib_libopenvswitch_a_SOURCES = \ lib/multipath.c \ lib/multipath.h \ lib/netdev-dummy.c \ + lib/netdev-tunnel.c \ lib/netdev-provider.h \ lib/netdev.c \ lib/netdev.h \ @@ -181,6 +182,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/timeval.h \ lib/token-bucket.c \ lib/token-bucket.h \ + lib/tunalloc.c \ + lib/tunalloc.h \ lib/type-props.h \ lib/unaligned.h \ lib/unicode.c \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 144b6b633..48bc92d91 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -316,6 +316,15 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) return 0; } +static const char* internal_port_type(const struct dp_netdev* dp) +{ + if (dp->class == &dpif_netdev_class) + return "tap"; + if (dp->class == &dpif_planetlab_class) + return "tap_pl"; + return "dummy"; +} + static int do_add_port(struct dp_netdev *dp, const char *devname, const char *type, uint16_t port_no) @@ -329,9 +338,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, /* XXX reject devices already in some dp_netdev. */ /* Open and validate network device. */ - open_type = (strcmp(type, "internal") ? type - : dp->class != &dpif_netdev_class ? "dummy" - : "tap"); + open_type = (strcmp(type, "internal") ? type : internal_port_type(dp)); error = netdev_open(devname, open_type, &netdev); if (error) { return error; @@ -376,7 +383,9 @@ choose_port(struct dpif *dpif, struct netdev *netdev) struct dp_netdev *dp = get_dp_netdev(dpif); int port_no; - if (dpif->dpif_class != &dpif_netdev_class) { + if (dpif->dpif_class != &dpif_netdev_class && + dpif->dpif_class != &dpif_planetlab_class) + { /* If the port name contains a number, try to assign that port number. * This can make writing unit tests easier because port numbers are * predictable. */ @@ -1265,40 +1274,48 @@ dp_netdev_execute_actions(struct dp_netdev *dp, } } +#define DPIF_NETDEV_CLASS_FUNCTIONS \ + dpif_netdev_enumerate, \ + dpif_netdev_open, \ + dpif_netdev_close, \ + dpif_netdev_destroy, \ + dpif_netdev_run, \ + dpif_netdev_wait, \ + dpif_netdev_get_stats, \ + dpif_netdev_port_add, \ + dpif_netdev_port_del, \ + dpif_netdev_port_query_by_number, \ + dpif_netdev_port_query_by_name, \ + dpif_netdev_get_max_ports, \ + NULL, /* port_get_pid */ \ + dpif_netdev_port_dump_start, \ + dpif_netdev_port_dump_next, \ + dpif_netdev_port_dump_done, \ + dpif_netdev_port_poll, \ + dpif_netdev_port_poll_wait, \ + dpif_netdev_flow_get, \ + dpif_netdev_flow_put, \ + dpif_netdev_flow_del, \ + dpif_netdev_flow_flush, \ + dpif_netdev_flow_dump_start, \ + dpif_netdev_flow_dump_next, \ + dpif_netdev_flow_dump_done, \ + dpif_netdev_execute, \ + NULL, /* operate */ \ + dpif_netdev_recv_set, \ + dpif_netdev_queue_to_priority, \ + dpif_netdev_recv, \ + dpif_netdev_recv_wait, \ + dpif_netdev_recv_purge, \ + const struct dpif_class dpif_netdev_class = { "netdev", - dpif_netdev_enumerate, - dpif_netdev_open, - dpif_netdev_close, - dpif_netdev_destroy, - dpif_netdev_run, - dpif_netdev_wait, - dpif_netdev_get_stats, - dpif_netdev_port_add, - dpif_netdev_port_del, - dpif_netdev_port_query_by_number, - dpif_netdev_port_query_by_name, - dpif_netdev_get_max_ports, - NULL, /* port_get_pid */ - dpif_netdev_port_dump_start, - dpif_netdev_port_dump_next, - dpif_netdev_port_dump_done, - dpif_netdev_port_poll, - dpif_netdev_port_poll_wait, - dpif_netdev_flow_get, - dpif_netdev_flow_put, - dpif_netdev_flow_del, - dpif_netdev_flow_flush, - dpif_netdev_flow_dump_start, - dpif_netdev_flow_dump_next, - dpif_netdev_flow_dump_done, - dpif_netdev_execute, - NULL, /* operate */ - dpif_netdev_recv_set, - dpif_netdev_queue_to_priority, - dpif_netdev_recv, - dpif_netdev_recv_wait, - dpif_netdev_recv_purge, + DPIF_NETDEV_CLASS_FUNCTIONS +}; + +const struct dpif_class dpif_planetlab_class = { + "planetlab", + DPIF_NETDEV_CLASS_FUNCTIONS }; static void @@ -1331,3 +1348,4 @@ dpif_dummy_register(bool override) dpif_dummy_register__("dummy"); } + diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 317e61706..f264e1b85 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -340,6 +340,7 @@ struct dpif_class { extern const struct dpif_class dpif_linux_class; extern const struct dpif_class dpif_netdev_class; +extern const struct dpif_class dpif_planetlab_class; #ifdef __cplusplus } diff --git a/lib/dpif.c b/lib/dpif.c index 296896674..7be7b2a77 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -62,6 +62,7 @@ static const struct dpif_class *base_dpif_classes[] = { &dpif_linux_class, #endif &dpif_netdev_class, + &dpif_planetlab_class, }; struct registered_dpif_class { diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 412a92deb..08937de46 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -69,6 +69,7 @@ #include "sset.h" #include "timer.h" #include "vlog.h" +#include "tunalloc.h" VLOG_DEFINE_THIS_MODULE(netdev_linux); @@ -732,7 +733,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) netdev_dev->tc->ops->tc_destroy(netdev_dev->tc); } - if (class == &netdev_tap_class) { + if (class == &netdev_tap_class || class == &netdev_tap_pl_class) { destroy_tap(netdev_dev); } free(netdev_dev); @@ -768,7 +769,7 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp) } } - if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap") && + if (!strncmp(netdev_dev_get_type(netdev_dev_), "tap", 3) && !netdev_dev->state.tap.opened) { /* We assume that the first user of the tap device is the primary user @@ -793,7 +794,7 @@ netdev_linux_close(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) { + if (netdev->fd > 0 && strncmp(netdev_get_type(netdev_), "tap", 3)) { close(netdev->fd); } free(netdev); @@ -867,7 +868,8 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) for (;;) { ssize_t retval; - retval = (netdev_->netdev_dev->netdev_class == &netdev_tap_class + retval = ((netdev_->netdev_dev->netdev_class == &netdev_tap_class || + netdev_->netdev_dev->netdev_class == &netdev_tap_pl_class) ? read(netdev->fd, data, size) : recv(netdev->fd, data, size, MSG_TRUNC)); if (retval >= 0) { @@ -900,7 +902,7 @@ netdev_linux_drain(struct netdev *netdev_) struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (netdev->fd < 0) { return 0; - } else if (!strcmp(netdev_get_type(netdev_), "tap")) { + } else if (!strncmp(netdev_get_type(netdev_), "tap", 3)) { struct ifreq ifr; int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr, SIOCGIFTXQLEN, "SIOCGIFTXQLEN"); @@ -1011,7 +1013,7 @@ netdev_linux_send_wait(struct netdev *netdev_) struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (netdev->fd < 0) { /* Nothing to do. */ - } else if (strcmp(netdev_get_type(netdev_), "tap")) { + } else if (strncmp(netdev_get_type(netdev_), "tap", 3)) { poll_fd_wait(netdev->fd, POLLOUT); } else { /* TAP device always accepts packets.*/ @@ -1786,6 +1788,51 @@ netdev_linux_get_qos_types(const struct netdev *netdev OVS_UNUSED, return 0; } +static int +netdev_linux_create_tap_pl(const struct netdev_class *class OVS_UNUSED, + const char *name, struct netdev_dev **netdev_devp) +{ + struct netdev_dev_linux *netdev_dev; + struct tap_state *state; + char real_name[IFNAMSIZ]; + int error; + + netdev_dev = xzalloc(sizeof *netdev_dev); + state = &netdev_dev->state.tap; + + error = cache_notifier_ref(); + if (error) { + goto error; + } + + /* Open tap device. */ + state->fd = tun_alloc(IFF_TAP, real_name); + if (state->fd < 0) { + error = errno; + VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s", name, strerror(error)); + goto error_unref_notifier; + } + if (strcmp(name, real_name)) { + VLOG_WARN("tap_pl: requested %s, created %s", name, real_name); + } + + /* Make non-blocking. */ + error = set_nonblocking(state->fd); + if (error) { + goto error_unref_notifier; + } + + netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_tap_pl_class); + *netdev_devp = &netdev_dev->netdev_dev; + return 0; + +error_unref_notifier: + cache_notifier_unref(); +error: + free(netdev_dev); + return error; +} + static const struct tc_ops * tc_lookup_ovs_name(const char *name) { @@ -2377,6 +2424,13 @@ netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off, return error; } +static int +netdev_tap_pl_update_flags(struct netdev *netdev OVS_UNUSED, enum netdev_flags off OVS_UNUSED, + enum netdev_flags on OVS_UNUSED, enum netdev_flags *old_flagsp OVS_UNUSED) +{ + return 0; +} + static unsigned int netdev_linux_change_seq(const struct netdev *netdev) { @@ -2384,7 +2438,8 @@ netdev_linux_change_seq(const struct netdev *netdev) } #define NETDEV_LINUX_CLASS(NAME, CREATE, GET_STATS, SET_STATS, \ - GET_FEATURES, GET_STATUS) \ + GET_FEATURES, GET_STATUS, \ + UPDATE_FLAGS) \ { \ NAME, \ \ @@ -2442,7 +2497,7 @@ netdev_linux_change_seq(const struct netdev *netdev) GET_STATUS, \ netdev_linux_arp_lookup, \ \ - netdev_linux_update_flags, \ + UPDATE_FLAGS, \ \ netdev_linux_change_seq \ } @@ -2454,7 +2509,8 @@ const struct netdev_class netdev_linux_class = netdev_linux_get_stats, NULL, /* set_stats */ netdev_linux_get_features, - netdev_linux_get_drv_info); + netdev_linux_get_drv_info, + netdev_linux_update_flags); const struct netdev_class netdev_tap_class = NETDEV_LINUX_CLASS( @@ -2463,7 +2519,8 @@ const struct netdev_class netdev_tap_class = netdev_tap_get_stats, NULL, /* set_stats */ netdev_linux_get_features, - netdev_linux_get_drv_info); + netdev_linux_get_drv_info, + netdev_linux_update_flags); const struct netdev_class netdev_internal_class = NETDEV_LINUX_CLASS( @@ -2472,7 +2529,18 @@ const struct netdev_class netdev_internal_class = netdev_internal_get_stats, netdev_vport_set_stats, NULL, /* get_features */ - netdev_internal_get_drv_info); + netdev_internal_get_drv_info, + netdev_linux_update_flags); + +const struct netdev_class netdev_tap_pl_class = + NETDEV_LINUX_CLASS( + "tap_pl", + netdev_linux_create_tap_pl, + netdev_tap_get_stats, + NULL, /* set_stats */ + netdev_linux_get_features, + netdev_linux_get_drv_info, + netdev_tap_pl_update_flags); /* HTB traffic control class. */ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 94f60af72..f56a0cefb 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -599,6 +599,9 @@ extern const struct netdev_class netdev_tap_class; #ifdef __FreeBSD__ extern const struct netdev_class netdev_bsd_class; #endif +extern const struct netdev_class netdev_tap_pl_class; + +extern const struct netdev_class netdev_tunnel_class; #ifdef __cplusplus } diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c new file mode 100644 index 000000000..d2318dbd1 --- /dev/null +++ b/lib/netdev-tunnel.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2010, 2011, 2012 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include "flow.h" +#include "list.h" +#include "netdev-provider.h" +#include "odp-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "shash.h" +#include "sset.h" +#include "unixctl.h" +#include "socket-util.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(netdev_tunnel); + +struct netdev_dev_tunnel { + struct netdev_dev netdev_dev; + uint8_t hwaddr[ETH_ADDR_LEN]; + struct netdev_stats stats; + enum netdev_flags flags; + int sockfd; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + bool valid_remote_ip; + bool valid_remote_port; + bool connected; + unsigned int change_seq; +}; + +struct netdev_tunnel { + struct netdev netdev; +} ; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + +static struct shash tunnel_netdev_devs = SHASH_INITIALIZER(&tunnel_netdev_devs); + +static int netdev_tunnel_create(const struct netdev_class *, const char *, + struct netdev_dev **); +static void netdev_tunnel_update_seq(struct netdev_dev_tunnel *); + +static bool +is_tunnel_class(const struct netdev_class *class) +{ + return class->create == netdev_tunnel_create; +} + +static struct netdev_dev_tunnel * +netdev_dev_tunnel_cast(const struct netdev_dev *netdev_dev) +{ + assert(is_tunnel_class(netdev_dev_get_class(netdev_dev))); + return CONTAINER_OF(netdev_dev, struct netdev_dev_tunnel, netdev_dev); +} + +static struct netdev_tunnel * +netdev_tunnel_cast(const struct netdev *netdev) +{ + struct netdev_dev *netdev_dev = netdev_get_dev(netdev); + assert(is_tunnel_class(netdev_dev_get_class(netdev_dev))); + return CONTAINER_OF(netdev, struct netdev_tunnel, netdev); +} + +static int +netdev_tunnel_create(const struct netdev_class *class, const char *name, + struct netdev_dev **netdev_devp) +{ + static unsigned int n = 0xaa550000; + struct netdev_dev_tunnel *netdev_dev; + int error; + + netdev_dev = xzalloc(sizeof *netdev_dev); + netdev_dev_init(&netdev_dev->netdev_dev, name, class); + netdev_dev->hwaddr[0] = 0x55; + netdev_dev->hwaddr[1] = 0xaa; + netdev_dev->hwaddr[2] = n >> 24; + netdev_dev->hwaddr[3] = n >> 16; + netdev_dev->hwaddr[4] = n >> 8; + netdev_dev->hwaddr[5] = n; + netdev_dev->flags = 0; + netdev_dev->change_seq = 1; + memset(&netdev_dev->remote_addr, 0, sizeof(netdev_dev->remote_addr)); + netdev_dev->valid_remote_ip = false; + netdev_dev->valid_remote_port = false; + netdev_dev->connected = false; + + + netdev_dev->sockfd = inet_open_passive(SOCK_DGRAM, "", 0, &netdev_dev->local_addr, 0); + if (netdev_dev->sockfd < 0) { + error = netdev_dev->sockfd; + goto error; + } + + + shash_add(&tunnel_netdev_devs, name, netdev_dev); + + n++; + + *netdev_devp = &netdev_dev->netdev_dev; + + VLOG_DBG("tunnel_create: name=%s, fd=%d, port=%d", name, netdev_dev->sockfd, netdev_dev->local_addr.sin_port); + + return 0; + +error: + free(netdev_dev); + return error; +} + +static void +netdev_tunnel_destroy(struct netdev_dev *netdev_dev_) +{ + struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(netdev_dev_); + + if (netdev_dev->sockfd != -1) + close(netdev_dev->sockfd); + + shash_find_and_delete(&tunnel_netdev_devs, + netdev_dev_get_name(netdev_dev_)); + free(netdev_dev); +} + +static int +netdev_tunnel_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp) +{ + struct netdev_tunnel *netdev; + + netdev = xmalloc(sizeof *netdev); + netdev_init(&netdev->netdev, netdev_dev_); + + *netdevp = &netdev->netdev; + return 0; +} + +static void +netdev_tunnel_close(struct netdev *netdev_) +{ + struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_); + free(netdev); +} + +static int +netdev_tunnel_get_config(struct netdev_dev *dev_, struct smap *args) +{ + struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(dev_); + + if (netdev_dev->valid_remote_ip) + smap_add(args, "remote_ip", + xasprintf(IP_FMT, IP_ARGS(&netdev_dev->remote_addr.sin_addr))); + if (netdev_dev->valid_remote_port) + smap_add(args, "remote_port", + xasprintf("%"PRIu16, ntohs(netdev_dev->remote_addr.sin_port))); + return 0; +} + +static int +netdev_tunnel_connect(struct netdev_dev_tunnel *dev) +{ + if (dev->sockfd < 0) + return EBADF; + if (!dev->valid_remote_ip || !dev->valid_remote_port) + return 0; + dev->remote_addr.sin_family = AF_INET; + if (connect(dev->sockfd, (struct sockaddr*) &dev->remote_addr, sizeof(dev->remote_addr)) < 0) { + return errno; + } + dev->connected = true; + netdev_tunnel_update_seq(dev); + VLOG_DBG("%s: connected to (%s, %d)", netdev_dev_get_name(&dev->netdev_dev), + inet_ntoa(dev->remote_addr.sin_addr), ntohs(dev->remote_addr.sin_port)); + return 0; +} + +static int +netdev_tunnel_set_config(struct netdev_dev *dev_, const struct smap *args) +{ + struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(dev_); + struct shash_node *node; + + VLOG_DBG("tunnel_set_config(%s)", netdev_dev_get_name(dev_)); + SMAP_FOR_EACH(node, args) { + VLOG_DBG("arg: %s->%s", node->name, (char*)node->data); + if (!strcmp(node->name, "remote_ip")) { + struct in_addr addr; + if (lookup_ip(node->data, &addr)) { + VLOG_WARN("%s: bad 'remote_ip'", node->name); + } else { + netdev_dev->remote_addr.sin_addr = addr; + netdev_dev->valid_remote_ip = true; + } + } else if (!strcmp(node->name, "remote_port")) { + netdev_dev->remote_addr.sin_port = htons(atoi(node->data)); + netdev_dev->valid_remote_port = true; + } else { + VLOG_WARN("%s: unknown argument '%s'", + netdev_dev_get_name(dev_), node->name); + } + } + return netdev_tunnel_connect(netdev_dev); +} + +static int +netdev_tunnel_listen(struct netdev *netdev_ OVS_UNUSED) +{ + return 0; +} + +static int +netdev_tunnel_recv(struct netdev *netdev_, void *buffer, size_t size) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev_)); + if (!dev->connected) + return -EAGAIN; + for (;;) { + ssize_t retval; + retval = recv(dev->sockfd, buffer, size, MSG_TRUNC); + VLOG_DBG("%s: recv(%"PRIxPTR", %"PRIu64", MSG_TRUNC) = %"PRId64, + netdev_get_name(netdev_), (uintptr_t)buffer, size, retval); + if (retval >= 0) { + dev->stats.rx_packets++; + dev->stats.rx_bytes += retval; + if (retval <= size) { + return retval; + } else { + dev->stats.rx_errors++; + dev->stats.rx_length_errors++; + return -EMSGSIZE; + } + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s", + netdev_get_name(netdev_), strerror(errno)); + dev->stats.rx_errors++; + } + return -errno; + } + } +} + +static void +netdev_tunnel_recv_wait(struct netdev *netdev_) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev_)); + if (dev->sockfd >= 0) { + poll_fd_wait(dev->sockfd, POLLIN); + } +} + +static int +netdev_tunnel_send(struct netdev *netdev_, const void *buffer, size_t size) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev_)); + if (!dev->connected) + return EAGAIN; + for (;;) { + ssize_t retval; + retval = send(dev->sockfd, buffer, size, 0); + VLOG_DBG("%s: send(%"PRIxPTR", %"PRIu64") = %"PRId64, + netdev_get_name(netdev_), (uintptr_t)buffer, size, retval); + if (retval >= 0) { + dev->stats.tx_packets++; + dev->stats.tx_bytes++; + if (retval != size) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRId64" bytes of " + "%"PRIu64") on %s", retval, size, netdev_get_name(netdev_)); + dev->stats.tx_errors++; + } + return 0; + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), strerror(errno)); + dev->stats.tx_errors++; + } + return errno; + } + } +} + +static void +netdev_tunnel_send_wait(struct netdev *netdev_) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev_)); + if (dev->sockfd >= 0) { + poll_fd_wait(dev->sockfd, POLLOUT); + } +} + +static int +netdev_tunnel_drain(struct netdev *netdev_) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev_)); + char buffer[128]; + int error; + + if (!dev->connected) + return 0; + for (;;) { + error = recv(dev->sockfd, buffer, 128, MSG_TRUNC); + if (error) { + if (error == -EAGAIN) + break; + else if (error != -EMSGSIZE) + return error; + } + } + return 0; +} + +static int +netdev_tunnel_set_etheraddr(struct netdev *netdev, + const uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev)); + + if (!eth_addr_equals(dev->hwaddr, mac)) { + memcpy(dev->hwaddr, mac, ETH_ADDR_LEN); + netdev_tunnel_update_seq(dev); + } + + return 0; +} + +static int +netdev_tunnel_get_etheraddr(const struct netdev *netdev, + uint8_t mac[ETH_ADDR_LEN]) +{ + const struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev)); + + memcpy(mac, dev->hwaddr, ETH_ADDR_LEN); + return 0; +} + + +static int +netdev_tunnel_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +{ + const struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev)); + + *stats = dev->stats; + return 0; +} + +static int +netdev_tunnel_set_stats(struct netdev *netdev, const struct netdev_stats *stats) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev)); + + dev->stats = *stats; + return 0; +} + +static int +netdev_tunnel_update_flags(struct netdev *netdev, + enum netdev_flags off, enum netdev_flags on, + enum netdev_flags *old_flagsp) +{ + struct netdev_dev_tunnel *dev = + netdev_dev_tunnel_cast(netdev_get_dev(netdev)); + + if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) { + return EINVAL; + } + + // XXX should we actually do something with this flags? + *old_flagsp = dev->flags; + dev->flags |= on; + dev->flags &= ~off; + if (*old_flagsp != dev->flags) { + netdev_tunnel_update_seq(dev); + } + return 0; +} + +static unsigned int +netdev_tunnel_change_seq(const struct netdev *netdev) +{ + return netdev_dev_tunnel_cast(netdev_get_dev(netdev))->change_seq; +} + +/* Helper functions. */ + +static void +netdev_tunnel_update_seq(struct netdev_dev_tunnel *dev) +{ + dev->change_seq++; + if (!dev->change_seq) { + dev->change_seq++; + } +} + +static void +netdev_tunnel_get_port(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev_dev_tunnel *tunnel_dev; + char buf[6]; + + tunnel_dev = shash_find_data(&tunnel_netdev_devs, argv[1]); + if (!tunnel_dev) { + unixctl_command_reply_error(conn, "no such tunnel netdev"); + return; + } + + sprintf(buf, "%d", ntohs(tunnel_dev->local_addr.sin_port)); + unixctl_command_reply(conn, buf); +} + + +static int +netdev_tunnel_init(void) +{ + unixctl_command_register("netdev-tunnel/get-port", "NAME", + 1, 1, netdev_tunnel_get_port, NULL); + return 0; +} + +const struct netdev_class netdev_tunnel_class = { + "tunnel", + netdev_tunnel_init, /* init */ + NULL, /* run */ + NULL, /* wait */ + + netdev_tunnel_create, + netdev_tunnel_destroy, + netdev_tunnel_get_config, + netdev_tunnel_set_config, + + netdev_tunnel_open, + netdev_tunnel_close, + + netdev_tunnel_listen, + netdev_tunnel_recv, + netdev_tunnel_recv_wait, + netdev_tunnel_drain, + + netdev_tunnel_send, + netdev_tunnel_send_wait, + + netdev_tunnel_set_etheraddr, + netdev_tunnel_get_etheraddr, + NULL, /* get_mtu */ + NULL, /* set_mtu */ + NULL, /* get_ifindex */ + NULL, /* get_carrier */ + NULL, /* get_carrier_resets */ + NULL, /* get_miimon */ + netdev_tunnel_get_stats, + netdev_tunnel_set_stats, + + NULL, /* get_features */ + NULL, /* set_advertisements */ + + NULL, /* set_policing */ + NULL, /* get_qos_types */ + NULL, /* get_qos_capabilities */ + NULL, /* get_qos */ + NULL, /* set_qos */ + NULL, /* get_queue */ + NULL, /* set_queue */ + NULL, /* delete_queue */ + NULL, /* get_queue_stats */ + NULL, /* dump_queues */ + NULL, /* dump_queue_stats */ + + NULL, /* get_in4 */ + NULL, /* set_in4 */ + NULL, /* get_in6 */ + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* get_drv_info */ + NULL, /* arp_lookup */ + + netdev_tunnel_update_flags, + + netdev_tunnel_change_seq +}; diff --git a/lib/netdev.c b/lib/netdev.c index 394d8957f..2b4dec4cb 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -79,12 +79,14 @@ netdev_initialize(void) netdev_register_provider(&netdev_linux_class); netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); + netdev_register_provider(&netdev_tap_pl_class); netdev_vport_register(); #endif #ifdef __FreeBSD__ netdev_register_provider(&netdev_tap_class); netdev_register_provider(&netdev_bsd_class); #endif + netdev_register_provider(&netdev_tunnel_class); } } diff --git a/lib/tunalloc.c b/lib/tunalloc.c new file mode 100644 index 000000000..12c2a70e2 --- /dev/null +++ b/lib/tunalloc.c @@ -0,0 +1,90 @@ +/* Slice-side code to allocate tuntap interface in root slice + * Based on bmsocket.c + * Thom Haddow - 08/10/09 + * + * Call tun_alloc() with IFFTUN or IFFTAP as an argument to get back fd to + * new tuntap interface. Interface name can be acquired via TUNGETIFF ioctl. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tunalloc.h" + +#define VSYS_TUNTAP "/var/run/pl-ovs.control" + +/* Reads vif FD from "fd", writes interface name to vif_name, and returns vif FD. + * vif_name should be IFNAMSIZ chars long. */ +static int receive_vif_fd(int fd, char *vif_name) +{ + struct msghdr msg; + struct iovec iov; + int rv; + size_t ccmsg[CMSG_SPACE(sizeof(int)) / sizeof(size_t)]; + struct cmsghdr *cmsg; + + /* Use IOV to read interface name */ + iov.iov_base = vif_name; + iov.iov_len = IFNAMSIZ; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + /* old BSD implementations should use msg_accrights instead of + * msg_control; the interface is different. */ + msg.msg_control = ccmsg; + msg.msg_controllen = sizeof(ccmsg); + + while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR); + if (rv == -1) { + return -1; + } + if(!rv) { + /* EOF */ + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg->cmsg_type == SCM_RIGHTS) { + return -1; + } + return *(int*)CMSG_DATA(cmsg); +} + + +int tun_alloc(int iftype, char *if_name) +{ + int control_fd; + struct sockaddr_un addr; + int remotefd; + + control_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (control_fd == -1) { + return -1; + } + + memset(&addr, 0, sizeof(struct sockaddr_un)); + /* Clear structure */ + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, VSYS_TUNTAP, + sizeof(addr.sun_path) - 1); + + if (connect(control_fd, (struct sockaddr *) &addr, + sizeof(struct sockaddr_un)) == -1) { + return -1; + } + + remotefd = receive_vif_fd(control_fd, if_name); + + close(control_fd); + + return remotefd; +} diff --git a/lib/tunalloc.h b/lib/tunalloc.h new file mode 100644 index 000000000..3e5caae1d --- /dev/null +++ b/lib/tunalloc.h @@ -0,0 +1,6 @@ +#ifndef _TUNALLOC_H +#define _TUNALLOC_H + +int tun_alloc(int iftype, char *if_name); + +#endif diff --git a/planetlab/automake.mk b/planetlab/automake.mk new file mode 100644 index 000000000..ccc4f7d73 --- /dev/null +++ b/planetlab/automake.mk @@ -0,0 +1,16 @@ +sbin_PROGRAMS += planetlab/pltap-ovs/pltap-ovs +sbin_PROGRAMS += planetlab/vsysc/vsysc + +# this Makefile is not intended to go on the sliver image - esp. not in /usr/sbin +# planetlab/scripts/Makefile +# same goes for showgraph +# planetlab/scripts/showgraph +dist_sbin_SCRIPTS += planetlab/scripts/sliver-ovs + +planetlab_pltap_ovs_pltap_ovs_SOURCES = +planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/pltap-ovs.c +planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/tunalloc.c +planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/tunalloc.h + +planetlab_vsysc_vsysc_SOURCES = +planetlab_vsysc_vsysc_SOURCES += planetlab/vsysc/vsysc.c diff --git a/planetlab/exp-tool/Makefile b/planetlab/exp-tool/Makefile new file mode 100644 index 000000000..ae2700866 --- /dev/null +++ b/planetlab/exp-tool/Makefile @@ -0,0 +1,259 @@ +# see README +# conf.mk is expected to define +# HOST_ and IP_ for all nodes involved, as well as +# LINKS as a list of - elements + +# run make CONF=anotherconfig.mk if you need several configs + +CONF ?= conf.mk +include $(CONF) + +# if undefined in the conf file, use single dash +SEP?=- + +### helper functions +# flip(1) = 2 +# flip(2) = 1 +flip=$(if $(findstring 1,$(1)),2,1) +# cutsep (x-y)-> x y +cutsep=$(subst $(SEP), ,$(1)) +# leftnode (x-y) -> x +leftnode=$(word 1,$(call cutsep,$(1))) +# rightnode (x-y) -> y +rightnode=$(word 2,$(call cutsep,$(1))) +# linkpart(x@y) = x +linkpart=$(word 1,$(subst @, ,$(1))) +# endpart(x@y) = y +endpart=$(word 2,$(subst @, ,$(1))) +# get(x-y@1) = x +# get(x-y@2) = y +get=$(word $(call endpart,$(1)),$(call cutsep,$(call linkpart,$(1)))) +# opp(x-y@1) = x-y@2 +# opp(x-y@2) = x-y@1 +opp=$(call linkpart,$(1))@$(call flip,$(call endpart,$(1))) +# rget(x-y@1) = y +# rget(x-y@2) = x +rget=$(call get,$(call opp,$(1))) +### +solve=$(HOST_$(1)) +solve_ip=$(IP_$(1)) +# can be redefined in conf.mk if that's not the expected behaviour +display?=host $(1) aka $(call solve,$(1)) + +#################### set variables after conf.mk +ifeq "$(SSH_KEY)" "" +SSH_KEY_OPTION ?= +else +SSH_KEY_OPTION ?= -i $(SSH_KEY) +endif + +SSH_OPTIONS ?= $(SSH_KEY_OPTION) -l $(SLICE) +SSH = ssh $(SSH_OPTIONS) + +SUDO ?= sudo -S + +ALL_NODE_IDS=$(sort $(foreach link,$(LINKS),$(call leftnode,$(link))) $(foreach link,$(LINKS),$(call rightnode,$(link)))) +ALL_LINK_IDS=$(addsuffix @1,$(LINKS)) $(addsuffix @2,$(LINKS)) + +#################### +all+init: init all +init: + @[ -d L ] || ( echo Creating tmp dir L; mkdir L) + @[ -d U ] || ( echo Creating tmp dir U; mkdir U) + @[ -d cache ] || ( echo Creating tmp dir cache; mkdir cache) +.PHONY: all+init init + +FORCE: + +.SECONDARY: + +LINKTARGETS=$(addprefix L/,$(LINKS)) +all: $(LINKTARGETS) +.PHONY: all + +# could also do make ++SLICE +showslice: ++SLICE FORCE + +shownodes: + @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id));) +showips: + @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id)) has ip/network set to $(IP_$(id));) +showlinks: + @$(foreach link,$(LINKS), echo $(call display,$(call leftnode,$(link))) '====>' $(call display,$(call rightnode,$(link)));) +.PHONY: shownodes showips showlinks + +sshchecks: $(foreach id,$(ALL_NODE_IDS),cache/sshcheck.$(id)) +.PHONY: sshchecks + +DBS=$(foreach id,$(ALL_NODE_IDS),cache/db.$(id)) +dbs: $(DBS) +.PHONY: dbs + +SWITCHS=$(foreach id,$(ALL_NODE_IDS),cache/switch.$(id)) +switchs: $(SWITCHS) +.PHONY: switchs + +start: dbs switchs +.PHONY: start + +stop:$(foreach id,$(ALL_NODE_IDS),cache/stop.$(id)) +.PHONY: stop + +status:$(foreach id,$(ALL_NODE_IDS),cache/status.$(id)) +.PHONY: status + +BRIDGES=$(foreach id,$(ALL_NODE_IDS),cache/bridge.$(id)) +bridges: $(BRIDGES) +.PHONY: bridges + +### node-oriented targets +# check ssh connectivity +cache/sshcheck.%: FORCE + @if $(SSH) $(HOST_$*) hostname 2> /dev/null; then echo "ssh on" $(call display,$*) "OK" ; \ + else echo "ssh on" $(call display,$*) "KO !!!"; fi + +# should probably replace sshcheck +cache/status.%: FORCE + @echo "=== DB and SWITCH processes on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs status + +cache/host.%: + @echo "IP lookup for $(call display,$*)" + @host $(HOST_$*) | sed -n 's/^.*has address *//p' > $@ + +cache/db.%: + @echo "Starting db server on $(call display,$*) - logs in $@.log" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-db &> $@.log && touch $@ + +cache/switch.%: cache/db.% + @echo "Starting vswitchd on $(call display,$*) - logs in $@.log" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-switch &> $@.log && touch $@ + +cache/bridge.%: cache/switch.% + @echo "Creating bridge on $(call display,$*) - logs in $@.log" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs create-bridge $(IP_$*) > $@ 2> $@.log || { rm $@; exit 1; } + @echo Created bridge $$(cat $@) on $(HOST_$*) + +# xxx this probably needs a more thorough cleanup in cache/ +cache/stop.%: del-bridge.% + @echo "Stopping switch & db on $(call display,$*)" + @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop && rm cache/switch.% cache/db.% + +### link-oriented targets +# L/- +L/%: cache/link.%@1 cache/link.%@2 + @touch $@ + @echo "Created link $*" + +U/%: del-iface.%@1 del-iface.%@2 + @rm -f L/$* + @echo "Deleted link $*" + +del-bridge.%: cache/db.% + @echo "Deleting bridge on $(call display,$*)" + @if [ -f cache/bridge.$* ]; then \ + $(SSH) $(HOST_$*) $(SUDO) sliver-ovs del-bridge $$(cat cache/bridge.$*);\ + fi + @rm -f cache/bridge.$* \ + cache/iface.$*$(SEP)*@1 cache/iface.*$(SEP)$*@2 \ + cache/link.$*$(SEP)*@? cache/link.*$(SEP)$*@? \ + L/$*$(SEP)* L/*$(SEP)$* + +del-switch.%: del-bridge.% + @echo "Shutting down switch on $(call display,$*)" + @[ -f cache/switch.$* ] && $(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-switch + @rm -f cache/switch.$* + +del-db.%: + @echo "Shutting down db on $(call display,$*)" + @[ -f cache/db.$* ] && $(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-db + @rm -f cache/db.$* + +del-links: $(addprefix U/,$(notdir $(filter-out %.log,$(wildcard L/*)))) + +del-switchs: $(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/switch.*)))) + +del-dbs: $(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/db.*)))) + +shutdown: del-switchs del-dbs + +.PHONY: del-links del-switchs del-dbs shutdown + +.SECONDEXPANSION: + +del-iface.%: cache/db.$$(call get,%) + @echo "Removing interface for link $(call linkpart,$*) from $(call get,$*)" + @$(SSH) $(HOST_$(call get,$*)) \ + $(SUDO) sliver-ovs del-port L$(call linkpart,$*) + @rm -f cache/iface.$* cache/link.$* cache/link.$(call opp,$*) + + +### '%' here is leftid-rightid@{1,2} +# we retrieve % as $(*F) +#linkid=$(call linkpart,%) +#nodeid=$(call get,%) +#bridgefile=cache/bridge.$(nodeid) +cache/iface.%: cache/bridge.$$(call get,%) + @echo "Creating interface for link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $@.log" + @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs create-port $$(cat cache/bridge.$(call get,$(*F))) L$(call linkpart,$(*F)) > $@ 2> $@.log || { rm $@; exit 1; } + echo cache/bridge.$(call get,$(*F)) + echo cache/host.$(call get,$(*F)) cache/iface.$(*F) cache/iface.$(call opp,$(*F)) + + +# linkid=$(call linkpart,%) +# nodeid=$(call get,%) +# iface1=cache/iface.% +# iface2=cache/iface.$(call opp,%) +cache/link.%: cache/host.$$(call get,%) cache/iface.% cache/iface.$$(call opp,%) + @echo "Setting port number of link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $@.log" + @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs set-remote-endpoint L$(call linkpart,$(*F)) \ + $$(cat cache/host.$(call rget,$(*F))) \ + $$(cat cache/iface.$(call opp,$(*F))) 2> $@.log \ + && touch $@ + +#################### +CLEANTARGETS=$(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/bridge.*)))) +clean: $(CLEANTARGETS) +distclean: + rm -rf L U cache +.PHONY: clean distclean + +#################### +graph.dot: + ( echo "digraph $(SLICE) {"; ls L | sed 's/$(SEP)/->/;s/$$/;/'; echo "}" ) > $@ +graph.ps: graph.dot + dot -Tps < $^ > $@ + +#################### +# 'virtual' targets in that there's no real file attached +define node_shortcuts +sshcheck.$(1): cache/sshcheck.$(1) FORCE +db.$(1): cache/db.$(1) FORCE +switch.$(1): cache/switch.$(1) FORCE +start.$(1): cache/start.$(1) FORCE +stop.$(1): cache/stop.$(1) FORCE +status.$(1): cache/status.$(1) FORCE +bridge.$(1): cache/bridge.$(1) FORCE +host.$(1): cache/host.$(1) FORCE +# switch already depends on db, but well +cache/start.$(1): cache/db.$(1) cache/switch.$(1) FORCE +endef + +$(foreach id,$(ALL_NODE_IDS), $(eval $(call node_shortcuts,$(id)))) + +define link_shortcuts +iface.%: cache/iface.% +link.%: cache/link.% +endef + +$(foreach id,$(ALL_LINK_IDS), $(eval $(call link_shortcuts,$(id)))) + +#################### convenience, for debugging only +# make +foo : prints the value of $(foo) +# make ++foo : idem but verbose, i.e. foo=$(foo) +++%: varname=$(subst +,,$@) +++%: + @echo "$(varname)=$($(varname))" ++%: varname=$(subst +,,$@) ++%: + @echo "$($(varname))" diff --git a/planetlab/exp-tool/README b/planetlab/exp-tool/README new file mode 100644 index 000000000..641aca23a --- /dev/null +++ b/planetlab/exp-tool/README @@ -0,0 +1,169 @@ +* Introduction + +The Makefile contained in this directory can be used by an +experimenter to dynamically create an overlay network in a PlanetLab +slice, using the sliver-openvswitch distribution. At present, the +Makefile only supports the creation of the basic topology (nodes and +links). +All the additional configuration of the bridges/switches (in +particular, connecting the switches to OpenFlow controllers or +enabling the Spanning Tree Protocol aka STP) has to be done using the +tools available in the Open vSwitch distribution. This may change in +the future. + +The overlay network supported by the Makefile may consist of: + +- at most one Open vSwitch bridge per sliver; +- at most a pair of tunnels between each pair of slivers. + +(Please note that these limitations are due to the simple naming scheme +adopted by the Makefile, and are not limitations of sliver-openvswitch.) + +Each bridge is connected to a tap device in the sliver. The tap device +has an IP address chosen by the experimenter. The idea is to connect +all the tap devices through the overlay network made up of Open vSwitch +bridges and tunnels among them. + + +* Installation + +On each sliver we need to install sliver-openvswitch and obtain the +following tags: + +NAME VALUE +vsys fd_tuntap +vsys vif_up +vsys vif_down +vsys_net (some subnet) + + +On the experimenter box we need: + +- GNU make +- the openssh client +- the host program (usually distributed in bind-tools) +- (optionally) the dot program from the graphviz distribution + +Then, we can simply copy the Makefile in a working directory on the +experimenter box. The directory must also contain subdirectories 'L' +and 'cache': + +$ mkdir work +$ cp /path/to/Makefile work +$ cd work +$ mkdir -p L cache + + +* Example usage + +Assume we have a PlanetLab slice called 'myslice' which +contains four nodes: + +1) onelab7.iet.unipi.it +2) planet2.elte.hu +3) planetlab2.ics.forth.gr +4) planetlab2.urv.cat + + +Assume we have reserverd subnet 10.0.9.0/24 using vsys_net. We are +goint to build the following overlay network: + + 10.0.9.1/24 10.0.9.2/24 10.0.9.3/24 + 1 ----------- 2 ------------ 3 + | + | + | + 4 + 10.0.9.4/24 + + +In the same directory were we have put the Makefile we create a 'conf.mk' +file containing the following variables: + +---------- +SLICE=myslice +HOST_1=onelab7.iet.unipi.it +IP_1=10.0.9.1/24 +HOST_2=planet2.elte.hu +IP_2=10.0.9.2/24 +HOST_3=planetlab2.ics.forth.gr +IP_3=10.0.9.3/24 +HOST_4=planetlab2.urv.cat +IP_4=10.0.9.4/24 + +LINKS := +LINKS += 1-2 +LINKS += 2-3 +LINKS += 2-4 +---------- + +NOTE. In this example we have chosen to use numbers (1,2,3,4) as ids +for nodes, you can use any other name that is convenient for you. +See the example files in this directory for an example of this. + + +Then, we can just type: + +$ make -j + +Assuming everything has been setup correctly, this command Will start +the Open vSwitch servers, create the bridges and setup the tunnels. We +can test that network is up by logging into a node and pinging some +other node using the private subnet addresses: + +$ source conf.mk +$ ssh -l $SLICE $HOST_1 ping 10.0.9.4 + +Links can be destroyed and created dynamically. Assume we now want the +the topology to match the following one: + + 10.0.9.1/24 10.0.9.2/24 + 1 ----------- 2 + | + | + | + 4 ----------- 3 + 10.0.9.4/24 10.0.9.3/24 + + +We can issue the following commands: + +$ make -j U/2-3 # unlink nodes 2 and 3 +$ make -j L/4-3 # link nodes 4 and 3 + +The current state of the links is represented as a set of files in the 'L' +directory. If dot is installed, we can obtain a graphical representation +of the topology by typing: + +$ make graph.ps + +#### BEG WARNING xxx this feature is broken now that LINKS are defined in the same conf.mk file +Or we can save the current state in the 'links' file (so that we can +recreate it later): + +$ ls L > links +#### END WARNING xxx this feature is broken now that LINKS are defined in the same conf.mk file + + +* Command reference + +All targets can be issued with the '-j' flag to (greatly) speed up operations. +It may also be useful to use the '-k' flag, so that errors on some nodes do not +stop the setup on the other nodes. + + +all: do wathever is needed to setup all the links in the 'links' file. + +clean: tear down all existing links + +L/N1-N2: setup a link between nodes HOST_N1 and HOST_N2 + +U/N1-N2: tear down the link (if it exists) between nodes HOST_N1 + and HOST_N2 + +del-bridge.N: delete the bridge running on node HOST_N (this also tears down + all links that have an endpoint in N) + +graph.ps create a postscript file containing a (simple) graphical + representation + of the current topology diff --git a/planetlab/exp-tool/conf.mk.example b/planetlab/exp-tool/conf.mk.example new file mode 100644 index 000000000..f4d9a86f2 --- /dev/null +++ b/planetlab/exp-tool/conf.mk.example @@ -0,0 +1,35 @@ +# the slice that you're using +SLICE=inri_sl1 + +# optionnally, the related ssh (private) key to use +SSH_KEY=key_user.rsa + +# optionally, you can change the way nodes are displayed +display="$(call solve,$(1))" + +#################### the nodes to use +HOST_SENDER=vnode09.pl.sophia.inria.fr +HOST_MUX=vnode02.pl.sophia.inria.fr +HOST_END1=vnode10.pl.sophia.inria.fr +HOST_END2=vnode07.pl.sophia.inria.fr + +# and their related IP and netmask +# not that all these MUST fall in the vsys_vnet tag as granted by your planetlab operator +# (in this example it is 10.0.100.0/24) + +IP_SENDER=10.0.100.1/24 +IP_MUX=10.0.100.2/24 +IP_END1=10.0.100.3/24 +IP_END2=10.0.100.4/24 + +#################### the links to create +LINKS := +# add one from SENDER to MUX +LINKS += SENDER-MUX +# one from MUX to each of the 2 receivers +LINKS += MUX-END1 +LINKS += MUX-END2 + +#################### + + diff --git a/planetlab/exp-tool/showgraph b/planetlab/exp-tool/showgraph new file mode 100755 index 000000000..85421cb3e --- /dev/null +++ b/planetlab/exp-tool/showgraph @@ -0,0 +1,7 @@ +make graph.ps +gv -watch -spartan graph.ps >/dev/null 2>&1 & +while : +do + inotifywait -e CREATE -e DELETE L + make graph.ps +done >/dev/null 2>&1 diff --git a/planetlab/pltap-ovs/pltap-ovs.c b/planetlab/pltap-ovs/pltap-ovs.c new file mode 100644 index 000000000..e1660b09d --- /dev/null +++ b/planetlab/pltap-ovs/pltap-ovs.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tunalloc.h" + +#define OVS_SOCK "/var/run/pl-ovs.control" + +void send_fd(int p, int fd, char* vif_name); + +char *appname; + +#define ERROR(msg) \ + do { \ + fprintf(stderr, "%s: %s: %s", appname, msg, strerror(errno)); \ + exit(1); \ + } while (0) + + +static +int send_vif_fd(int sock_fd, int vif_fd, char *vif_name) +{ + int retval; + struct msghdr msg; + struct cmsghdr *p_cmsg; + struct iovec vec; + size_t cmsgbuf[CMSG_SPACE(sizeof(vif_fd)) / sizeof(size_t)]; + int *p_fds; + + + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + p_cmsg = CMSG_FIRSTHDR(&msg); + p_cmsg->cmsg_level = SOL_SOCKET; + p_cmsg->cmsg_type = SCM_RIGHTS; + p_cmsg->cmsg_len = CMSG_LEN(sizeof(vif_fd)); + p_fds = (int *) CMSG_DATA(p_cmsg); + *p_fds = vif_fd; + msg.msg_controllen = p_cmsg->cmsg_len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_flags = 0; + + /* Send the interface name as the iov */ + vec.iov_base = vif_name; + vec.iov_len = strlen(vif_name)+1; + + while ((retval = sendmsg(sock_fd, &msg, 0)) == -1 && errno == EINTR); + if (retval == -1) { + ERROR("sending file descriptor"); + } + return 0; +} + +void send_fd(int p, int fd, char* vif_name) +{ + int control_fd; + int accept_fd; + struct sockaddr_un addr, accept_addr; + socklen_t addr_len = sizeof(accept_addr); + + control_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (control_fd == -1 && errno != ENOENT) { + ERROR("Could not create UNIX socket"); + } + + memset(&addr, 0, sizeof(struct sockaddr_un)); + /* Clear structure */ + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, OVS_SOCK, + sizeof(addr.sun_path) - 1); + + if (unlink(OVS_SOCK) == -1 && errno != ENOENT) { + ERROR("Could not unlink " OVS_SOCK " control socket"); + } + + if (bind(control_fd, (struct sockaddr *) &addr, + sizeof(struct sockaddr_un)) == -1) { + ERROR("Could not bind to " OVS_SOCK " control socket"); + } + + if (listen(control_fd, 5) == -1) { + ERROR("listen on " OVS_SOCK " failed"); + } + if (write(p, "1", 1) != 1) { + ERROR("writing on the synch pipe"); + } + if ((accept_fd = accept(control_fd, (struct sockaddr*) &accept_addr, + &addr_len)) == -1) { + ERROR("accept on " OVS_SOCK " failed"); + } + send_vif_fd(accept_fd, fd, vif_name); +} + +int main(int argc, char* argv[]) +{ + char if_name[IFNAMSIZ]; + int p[2]; // synchronization pipe + char dummy; + int tun_fd; + + (void) argc; // unused + + if (pipe(p) < 0) { + ERROR("pipe"); + } + + tun_fd = tun_alloc(IFF_TAP, if_name); + + appname = argv[0]; + + switch(fork()) { + case -1: + ERROR("fork"); + exit(1); + case 0: + close(1); + open("/dev/null", O_WRONLY); + close(p[0]); + send_fd(p[1], tun_fd, if_name); + exit(0); + default: + close(p[1]); + if (read(p[0], &dummy, 1) != 1) { + ERROR("reading from the synch pipe"); + } + printf("%s\n", if_name); + } + return 0; +} diff --git a/planetlab/pltap-ovs/tunalloc.c b/planetlab/pltap-ovs/tunalloc.c new file mode 100644 index 000000000..1f083e517 --- /dev/null +++ b/planetlab/pltap-ovs/tunalloc.c @@ -0,0 +1,101 @@ +/* Slice-side code to allocate tuntap interface in root slice + * Based on bmsocket.c + * Thom Haddow - 08/10/09 + * + * Call tun_alloc() with IFFTUN or IFFTAP as an argument to get back fd to + * new tuntap interface. Interface name can be acquired via TUNGETIFF ioctl. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define VSYS_TUNTAP "/vsys/fd_tuntap.control" + + +int tun_alloc(int iftype, char *if_name); + +/* Reads vif FD from "fd", writes interface name to vif_name, and returns vif FD. + * vif_name should be IFNAMSIZ chars long. */ +static +int receive_vif_fd(int fd, char *vif_name) +{ + struct msghdr msg; + struct iovec iov; + int rv; + size_t ccmsg[CMSG_SPACE(sizeof(int)) / sizeof(size_t)]; + struct cmsghdr *cmsg; + unsigned char *data; + + /* Use IOV to read interface name */ + iov.iov_base = vif_name; + iov.iov_len = IFNAMSIZ; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + /* old BSD implementations should use msg_accrights instead of + * msg_control; the interface is different. */ + msg.msg_control = ccmsg; + msg.msg_controllen = sizeof(ccmsg); + + while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR); + if (rv == -1) { + perror("recvmsg"); + return -1; + } + if(!rv) { + /* EOF */ + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg->cmsg_type == SCM_RIGHTS) { + fprintf(stderr, "got control message of unknown type %d\n", + cmsg->cmsg_type); + return -1; + } + data = CMSG_DATA(cmsg); + return *(int*)data; +} + + +int tun_alloc(int iftype, char *if_name) +{ + int control_fd; + struct sockaddr_un addr; + int remotefd; + + control_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (control_fd == -1) { + perror("Could not create UNIX socket\n"); + exit(-1); + } + + memset(&addr, 0, sizeof(struct sockaddr_un)); + /* Clear structure */ + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, VSYS_TUNTAP, + sizeof(addr.sun_path) - 1); + + if (connect(control_fd, (struct sockaddr *) &addr, + sizeof(struct sockaddr_un)) == -1) { + perror("Could not connect to Vsys control socket"); + exit(-1); + } + + /* passing type param */ + if (send(control_fd, &iftype, sizeof(iftype), 0) != sizeof(iftype)) { + perror("Could not send paramater to Vsys control socket"); + exit(-1); + } + + remotefd = receive_vif_fd(control_fd, if_name); + return remotefd; +} diff --git a/planetlab/pltap-ovs/tunalloc.h b/planetlab/pltap-ovs/tunalloc.h new file mode 100644 index 000000000..3e5caae1d --- /dev/null +++ b/planetlab/pltap-ovs/tunalloc.h @@ -0,0 +1,6 @@ +#ifndef _TUNALLOC_H +#define _TUNALLOC_H + +int tun_alloc(int iftype, char *if_name); + +#endif diff --git a/planetlab/scripts/sliver-ovs b/planetlab/scripts/sliver-ovs new file mode 100755 index 000000000..87f173e2b --- /dev/null +++ b/planetlab/scripts/sliver-ovs @@ -0,0 +1,304 @@ +#!/bin/bash +# -*-shell-mode-*- + +### expected to be run as root + +COMMAND=$0 + +#################### global vars +RUN_DIR=/var/run/openvswitch +DB_CONF_FILE=/etc/openvswitch/conf.db +DB_SCHEMA=/usr/share/openvswitch/vswitch.ovsschema +DB_PID_FILE=/var/run/openvswitch/db.pid +DB_LOG=/var/log/ovs-db.log +DB_CTL_PATTERN='ovsdb-server.*.ctl' +## +DB_SOCKET=/var/run/openvswitch/db.sock +## +SWITCH_PID_FILE=/var/run/openvswitch/switch.pid +SWITCH_LOG=/var/log/ovs-switch.log +SWITCH_SOCKET=/var/run/openvswitch/switch.sock + +#################### helper functions + +function kill_pltap_ovs () { + killall pltap-ovs 2>/dev/null || : +} + +function error { + echo "$@" >&2 + exit 1 +} + +function get_params { + params=$1; shift + err_msg="$COMMAND $SUBCOMMAND $(echo $params | perl -pe 's/\S+/<$&>/g')" + for p in $(echo $params); do + [[ -z "$@" ]] && error "$err_msg" + pname=$(echo -n $p|perl -pe 's/\W/_/g') + eval $pname="$1"; shift + done + [[ -n "$@" ]] && error "$err_msg" +} + +function is_switch_running { + ovs-appctl --target=$SWITCH_SOCKET version >& /dev/null +} + +function is_db_running { + ovs-appctl --target=$DB_CTRL_SOCKET version >& /dev/null +} + +function tapname () { + IP=$1; shift + echo $(ip addr show to "$IP/32" | perl -ne '/^\s*\d+:\s*([\w-]+):/ && print $1') +} + +function wait_server () { + pid_file=$1; shift + server_name=$1; shift + timeout=$1; shift + + expire=$(($(date +%s) + $timeout)) + + ## wait for it to be up - xxx todo - could use a timeout of some kind + while [ ! -f "$pid_file" ]; do + echo "Waiting for $server_name to start... $(($expire - $(date +%s)))s left" >&2 + sleep 1; + [ $(date +%s) -ge $expire ] && return 1 + done + cat "$pid_file" +} + +function wait_device () { + tapname=$1; shift + timeout=$1; shift + + expire=$(($(date +%s) + $timeout)) + + while ! ip link show up | egrep -q "^[0-9]+: +$tapname:"; do + echo "Waiting for $tapname to come UP...$(($expire - $(date +%s)))s left" >&2 + sleep 1 + [ $(date +%s) -ge $expire ] && return 1 + done + return 0 +} + +######################################## startup +function start_db () { + get_params "" "$@" + + ## init conf + conf_dir=$(dirname $DB_CONF_FILE) + [ -d $conf_dir ] || mkdir -p $conf_dir + [ -f $DB_CONF_FILE ] || ovsdb-tool create $DB_CONF_FILE $DB_SCHEMA + + ## init run + [ -d $RUN_DIR ] || mkdir -p $RUN_DIR + + ## check + [ -f $DB_CONF_FILE ] || { echo "Could not initialize $DB_CONF_FILE - exiting" ; exit 1 ; } + [ -d $RUN_DIR ] || { echo "Could not initialize $RUN_DIR - exiting" ; exit 1 ; } + + ## run the stuff + if [ ! -f "$DB_PID_FILE" ]; then + ovsdb-server --remote=punix:$DB_SOCKET \ + --remote=db:Open_vSwitch,manager_options \ + --private-key=db:SSL,private_key \ + --certificate=db:SSL,certificate \ + --bootstrap-ca-cert=db:SSL,ca_cert \ + --pidfile=$DB_PID_FILE \ + --log-file=$DB_LOG \ + --unixctl=$DB_CTRL_SOCKET \ + --detach >& /dev/null + else + echo 'ovsdb-server appears to be running already, *not* starting' + fi + wait_server $DB_PID_FILE ovsdb-server 30 + echo $DB_PID_FILE +} + +function start_switch () { + get_params "" "$@" + + # ensure ovsdb-server is running + is_db_running || { echo "ovsdb-server not running" >&2 ; exit 1 ; } + + if [ ! -f "$SWITCH_PID_FILE" ] ; then + ovs-vswitchd \ + --pidfile=$SWITCH_PID_FILE \ + --log-file=$SWITCH_LOG \ + --unixctl=$SWITCH_SOCKET \ + --detach \ + unix:$DB_SOCKET >& /dev/null + else + echo 'ovs-vswitchd appears to be running already, *not* starting' + fi + wait_server $SWITCH_PID_FILE ovs-vswitchd 30 +} + +# first dumb stab just read "pkill ovsdb-server" and "pkill ovs-vswitchd" +# quick and dirty : we locate the control file through a search in /var/run +# caller should be requested to remember and provide this pid instead +function stop_db () { + controlfile=$(ls $RUN_DIR/$DB_CTL_PATTERN) + [ -f $controlfile ] && ovs-appctl --target=$controlfile exit +} + +function stop_switch () { + ovs-appctl --target=$SWITCH_SOCKET exit || : +} + +function status () { + pids=$(pgrep '^ovs') + [ -n "$pids" ] && ps $pids +} + +function start () { + start_db + start_switch +} + +function stop () { + stop_switch + stop_db +} + +#################### create functions +function create_bridge () { + + get_params "IP/PREFIX" "$@" + + IP=${IP_PREFIX%/*} + PREFIX=${IP_PREFIX#*/} + + set -e + # ensure ovs-vswitchd is running + is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; } + + # check whether the address is already assigned + TAPNAME=$(tapname $IP) + if [ ! -z "$TAPNAME" ]; then + if ovs-vsctl --db=unix:$DB_SOCKET br-exists "$TAPNAME"; then + echo $TAPNAME + exit 0 + fi + kill_pltap_ovs + error "$IP already assigned to $TAPNAME" + fi + + # we're clear + TAPNAME=$(pltap-ovs) + trap kill_pltap_ovs EXIT + # xxx wouldn't that be safer if left-aligned ? + vsysc vif_up << EOF + $TAPNAME + $IP + $PREFIX +EOF + wait_device $TAPNAME 60 && \ + ovs-vsctl --db=unix:$DB_SOCKET add-br $TAPNAME -- set bridge $TAPNAME datapath_type=planetlab + echo $TAPNAME + return 0 +} + +function create_port () { + + get_params "bridge port" "$@" + + # ensure ovs-vswitchd is running + is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; } + + set -e + if ! ovs-vsctl --db=unix:$DB_SOCKET list-ports "$bridge" | grep -q "^$port\$"; then + ovs-vsctl --db=unix:$DB_SOCKET add-port "$bridge" "$port" -- set interface "$port" type=tunnel + fi + ovs-appctl --target=$SWITCH_SOCKET netdev-tunnel/get-port "$port" + return 0 +} + +function set_remote_endpoint () { + + get_params "local_port remote_ip remote_UDP_port" "$@" + + # ensure ovs-vswitchd is running + is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; } + + set -e + ovs-vsctl --db=unix:$DB_SOCKET set interface $local_port \ + options:remote_ip=$remote_ip \ + options:remote_port=$remote_UDP_port + return 0 +} + +#################### del functions +function del_bridge () { + + get_params "bridge_name" "$@" + + W= + if ! is_switch_running; then + # we can delete the bridge even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + if ovs-vsctl --db=unix:$DB_SOCKET br-exists "$bridge_name"; then + ovs-vsctl --db=unix:$DB_SOCKET $W del-br $bridge_name + fi + return 0 +} + +function del_port () { + + get_params "port" "$@" + + W= + if ! is_switch_running; then + # we can delete the port even if ovs-vswitchd is not running, + # but we need a running ovsdb-server + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + W="--no-wait" + fi + + set -e + if ovs-vsctl --db=unix:$DB_SOCKET port-to-br "$port" >/dev/null 2>&1; then + ovs-vsctl --db=unix:$DB_SOCKET $W del-port "$port" + fi + return 0 +} + +function show () { + + get_params "" "$@" + + is_db_running || { echo "ovsdb-server not running" >&2; exit 1; } + + ovs-vsctl --db=unix:$DB_SOCKET show +} + +#################### +SUPPORTED_SUBCOMMANDS="start stop status +start_db stop_db start_switch stop_switch +create_bridge create_port del_bridge del_port +show set_remote_endpoint" + +function main () { + message="Usage: $COMMAND ... +Supported subcommands are (dash or underscore is the same): +$SUPPORTED_SUBCOMMANDS" + [[ -z "$@" ]] && error "$message" + + SUBCOMMAND=$1; shift + # support dashes instead of underscores + SUBCOMMAND=$(echo $SUBCOMMAND | sed -e s,-,_,g) + found="" + for supported in $SUPPORTED_SUBCOMMANDS; do [ "$SUBCOMMAND" = "$supported" ] && found=yes; done + + [ -z "$found" ] && error $message + + $SUBCOMMAND "$@" +} + +main "$@" diff --git a/planetlab/vsysc/vsysc.c b/planetlab/vsysc/vsysc.c new file mode 100644 index 000000000..98c90e772 --- /dev/null +++ b/planetlab/vsysc/vsysc.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include +#include +#include + +#define VSYS_PATH "/vsys" + +#define MAXPATH 1024 +#define BUFSIZE 4096 + +#define IN 0 +#define OUT 1 + +char ctrl[2][MAXPATH]; /* paths of vsys.in & vsys.out */ + +static void mkpath(int dir, const char* vsys) +{ + static const char *suffix[] = { "in", "out" }; + int n; + + if ( (n = snprintf(ctrl[dir], MAXPATH, "%s/%s.%s", VSYS_PATH, vsys, suffix[dir])) < 0) { + perror(vsys); + exit(EXIT_FAILURE); + } else if (n >= MAXPATH) { + fprintf(stderr, "argument too long\n"); + exit(EXIT_FAILURE); + } +} + +static int open_ctrl(int dir) +{ + int fd; + + if ( (fd = open(ctrl[dir], (dir == IN ? O_WRONLY : O_RDONLY) | O_NONBLOCK)) < 0) { + perror(ctrl[dir]); + exit(EXIT_FAILURE); + } + return fd; +} + + +static void set_nonblocking(int fd) +{ + int val; + + if ( (val = fcntl(fd, F_GETFL, 0)) < 0) { + perror("fcntl F_GETFL"); + exit(EXIT_FAILURE); + } + if (fcntl(fd, F_SETFL, val | O_NONBLOCK) < 0) { + perror("fcntl F_SETFL"); + exit(EXIT_FAILURE); + } +} + +#if 0 +static void print_set(const char* name, int max, const fd_set* set) +{ + int i, n = 0; + fprintf(stderr, "%s: {", name); + for (i = 0; i < max; i++) { + if (FD_ISSET(i, set)) { + if (n++) fprintf(stderr, ", "); + fprintf(stderr, "%d", i); + } + } + fprintf(stderr, "}\n"); +} +#endif + +struct channel { + const char *name; + int active; + int writing; + char buf[BUFSIZE]; + char *rp, *wp; + int rfd, wfd; +}; + +static int active_channels = 0; + +static void channel_init(struct channel *c, const char* name, int rfd, int wfd) +{ + c->name = name; + c->rp = c->buf; + c->wp = c->buf; + c->rfd = rfd; + c->wfd = wfd; + c->active = 1; + active_channels++; +} + +static void channel_fdset(struct channel *c, fd_set* readset, fd_set* writeset) +{ + if (!c->active) + return; + if (c->writing) { + FD_SET(c->wfd, writeset); + } else { + FD_SET(c->rfd, readset); + } +} + +static void channel_run(struct channel *c, const fd_set* readset, const fd_set* writeset) +{ + int n; + + if (!c->active) + return; + if (c->writing) { + if (FD_ISSET(c->wfd, writeset)) { + if ( (n = write(c->wfd, c->wp, c->rp - c->wp)) < 0) { + perror(c->name); + exit(EXIT_FAILURE); + } + c->wp += n; + if (c->wp == c->rp) { + c->wp = c->rp = c->buf; + c->writing = 0; + } + } + } else { + if (FD_ISSET(c->rfd, readset)) { + if ( (n = read(c->rfd, c->rp, BUFSIZE)) < 0) { + perror(c->name); + exit(EXIT_FAILURE); + } + if (n) { + c->wp = c->rp; + c->rp += n; + c->writing = 1; + } else { + close(c->wfd); + c->active = 0; + active_channels--; + } + } + } +} + +static struct channel channels[2]; + + +int main(int argc, char *argv[]) +{ + int fd[2]; /* fds of vsys.in & vsys.out */ + int maxfd; + + fd_set readset, writeset; + + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + exit(EXIT_FAILURE); + } + + mkpath(IN, argv[1]); + mkpath(OUT, argv[1]); + + maxfd = (STDOUT_FILENO > STDIN_FILENO ? STDOUT_FILENO : STDIN_FILENO); + + fd[OUT] = open_ctrl(OUT); + if (fd[OUT] > maxfd) + maxfd = fd[OUT]; + fd[IN] = open_ctrl(IN); + if (fd[IN] > maxfd) + maxfd = fd[IN]; + + set_nonblocking(STDIN_FILENO); + set_nonblocking(STDOUT_FILENO); + + channel_init(&channels[IN], "IN", STDIN_FILENO, fd[IN]); + channel_init(&channels[OUT], "OUT", fd[OUT], STDOUT_FILENO); + + while (active_channels) { + FD_ZERO(&readset); + FD_ZERO(&writeset); + channel_fdset(&channels[IN], &readset, &writeset); + channel_fdset(&channels[OUT], &readset, &writeset); + if (select(maxfd + 1, &readset, &writeset, NULL, NULL) < 0) { + perror("select"); + exit(EXIT_FAILURE); + } + channel_run(&channels[IN], &readset, &writeset); + channel_run(&channels[OUT], &readset, &writeset); + } + return EXIT_SUCCESS; +} + diff --git a/sliver-openvswitch.spec b/sliver-openvswitch.spec new file mode 100644 index 000000000..d9241aa76 --- /dev/null +++ b/sliver-openvswitch.spec @@ -0,0 +1,51 @@ +%define name sliver-openvswitch +# to check for any change: +# grep AC_INIT configure.ac +%define version 1.6.90 +%define taglevel 1 + +%define debug_package %{nil} + +%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} + +Vendor: OneLab +Packager: OneLab +Distribution: PlanetLab %{plrelease} +URL: %{SCMURL} +#Requires: + +Summary: Openvswitch modified for running from a PlanetLab sliver +Name: %{name} +Version: %{version} +Release: %{release} +License: GPL +Group: System Environment/Applications +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot +Source0: sliver-openvswitch-%{version}.tar.gz + +%description +Openvswitch tuned for running within a PlanetLab sliver + +%prep +%setup -q + +%build +./boot.sh +# let's be as close as the regular linux/fedora layout +./configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var +make + +%install +make install DESTDIR=$RPM_BUILD_ROOT + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +/usr + +%post + +%postun + +%changelog