Merge branch 'master' of git://openvswitch.org/openvswitch
authorGiuseppe Lettieri <g.lettieri@iet.unipi.it>
Thu, 13 Sep 2012 09:16:45 +0000 (11:16 +0200)
committerGiuseppe Lettieri <g.lettieri@iet.unipi.it>
Thu, 13 Sep 2012 09:16:45 +0000 (11:16 +0200)
24 files changed:
.gitignore
.non-distfiles [new file with mode: 0644]
Makefile.am
lib/automake.mk
lib/dpif-netdev.c
lib/dpif-provider.h
lib/dpif.c
lib/netdev-linux.c
lib/netdev-provider.h
lib/netdev-tunnel.c [new file with mode: 0644]
lib/netdev.c
lib/tunalloc.c [new file with mode: 0644]
lib/tunalloc.h [new file with mode: 0644]
planetlab/automake.mk [new file with mode: 0644]
planetlab/exp-tool/Makefile [new file with mode: 0644]
planetlab/exp-tool/README [new file with mode: 0644]
planetlab/exp-tool/conf.mk.example [new file with mode: 0644]
planetlab/exp-tool/showgraph [new file with mode: 0755]
planetlab/pltap-ovs/pltap-ovs.c [new file with mode: 0644]
planetlab/pltap-ovs/tunalloc.c [new file with mode: 0644]
planetlab/pltap-ovs/tunalloc.h [new file with mode: 0644]
planetlab/scripts/sliver-ovs [new file with mode: 0755]
planetlab/vsysc/vsysc.c [new file with mode: 0644]
sliver-openvswitch.spec [new file with mode: 0644]

index 702cc6c..cc8cb23 100644 (file)
@@ -49,3 +49,4 @@ Module.symvers
 TAGS
 cscope.*
 tags
+myexp/
diff --git a/.non-distfiles b/.non-distfiles
new file mode 100644 (file)
index 0000000..0c43af5
--- /dev/null
@@ -0,0 +1,7 @@
+planetlab/exp-tool
+planetlab/exp-tool/Makefile
+planetlab/exp-tool/showgraph
+planetlab/exp-tool/README
+sliver-openvswitch.spec
+.gitignore
+.non-distfiles
index 1b14871..932f495 100644 (file)
@@ -145,7 +145,7 @@ dist-hook-git: distfiles
          (cd datapath && $(MAKE) distfiles);                               \
          (cat distfiles; sed 's|^|datapath/|' datapath/distfiles) |        \
            sort -u > all-distfiles;                                        \
-         (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' |        \
+         (cd $(srcdir) && git ls-files) | grep -vFf $(srcdir)/.non-distfiles |     \
            sort -u > all-gitfiles;                                         \
          comm -1 -3 all-distfiles all-gitfiles > missing-distfiles;        \
          if test -s missing-distfiles; then                                \
@@ -223,3 +223,4 @@ include rhel/automake.mk
 include xenserver/automake.mk
 include python/automake.mk
 include python/compat/automake.mk
+include planetlab/automake.mk
index 94b86f6..a7f469c 100644 (file)
@@ -88,6 +88,7 @@ lib_libopenvswitch_a_SOURCES = \
        lib/multipath.c \
        lib/multipath.h \
        lib/netdev-dummy.c \
+       lib/netdev-tunnel.c \
        lib/netdev-provider.h \
        lib/netdev.c \
        lib/netdev.h \
@@ -181,6 +182,8 @@ lib_libopenvswitch_a_SOURCES = \
        lib/timeval.h \
        lib/token-bucket.c \
        lib/token-bucket.h \
+       lib/tunalloc.c \
+       lib/tunalloc.h \
        lib/type-props.h \
        lib/unaligned.h \
        lib/unicode.c \
index 144b6b6..48bc92d 100644 (file)
@@ -316,6 +316,15 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
     return 0;
 }
 
+static const char* internal_port_type(const struct dp_netdev* dp)
+{
+       if (dp->class == &dpif_netdev_class)
+               return "tap";
+       if (dp->class == &dpif_planetlab_class)
+               return "tap_pl";
+       return "dummy";
+}
+
 static int
 do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
             uint16_t port_no)
@@ -329,9 +338,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     /* XXX reject devices already in some dp_netdev. */
 
     /* Open and validate network device. */
-    open_type = (strcmp(type, "internal") ? type
-                 : dp->class != &dpif_netdev_class ? "dummy"
-                 : "tap");
+    open_type = (strcmp(type, "internal") ? type : internal_port_type(dp));
     error = netdev_open(devname, open_type, &netdev);
     if (error) {
         return error;
@@ -376,7 +383,9 @@ choose_port(struct dpif *dpif, struct netdev *netdev)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     int port_no;
 
-    if (dpif->dpif_class != &dpif_netdev_class) {
+    if (dpif->dpif_class != &dpif_netdev_class &&
+        dpif->dpif_class != &dpif_planetlab_class)
+    {
         /* If the port name contains a number, try to assign that port number.
          * This can make writing unit tests easier because port numbers are
          * predictable. */
@@ -1265,40 +1274,48 @@ dp_netdev_execute_actions(struct dp_netdev *dp,
     }
 }
 
+#define DPIF_NETDEV_CLASS_FUNCTIONS                    \
+    dpif_netdev_enumerate,                             \
+    dpif_netdev_open,                                  \
+    dpif_netdev_close,                                 \
+    dpif_netdev_destroy,                               \
+    dpif_netdev_run,                                   \
+    dpif_netdev_wait,                                  \
+    dpif_netdev_get_stats,                             \
+    dpif_netdev_port_add,                              \
+    dpif_netdev_port_del,                              \
+    dpif_netdev_port_query_by_number,                  \
+    dpif_netdev_port_query_by_name,                    \
+    dpif_netdev_get_max_ports,                         \
+    NULL,                       /* port_get_pid */     \
+    dpif_netdev_port_dump_start,                       \
+    dpif_netdev_port_dump_next,                                \
+    dpif_netdev_port_dump_done,                                \
+    dpif_netdev_port_poll,                             \
+    dpif_netdev_port_poll_wait,                                \
+    dpif_netdev_flow_get,                              \
+    dpif_netdev_flow_put,                              \
+    dpif_netdev_flow_del,                              \
+    dpif_netdev_flow_flush,                            \
+    dpif_netdev_flow_dump_start,                       \
+    dpif_netdev_flow_dump_next,                                \
+    dpif_netdev_flow_dump_done,                                \
+    dpif_netdev_execute,                               \
+    NULL,                       /* operate */          \
+    dpif_netdev_recv_set,                              \
+    dpif_netdev_queue_to_priority,                     \
+    dpif_netdev_recv,                                  \
+    dpif_netdev_recv_wait,                             \
+    dpif_netdev_recv_purge,                            \
+
 const struct dpif_class dpif_netdev_class = {
     "netdev",
-    dpif_netdev_enumerate,
-    dpif_netdev_open,
-    dpif_netdev_close,
-    dpif_netdev_destroy,
-    dpif_netdev_run,
-    dpif_netdev_wait,
-    dpif_netdev_get_stats,
-    dpif_netdev_port_add,
-    dpif_netdev_port_del,
-    dpif_netdev_port_query_by_number,
-    dpif_netdev_port_query_by_name,
-    dpif_netdev_get_max_ports,
-    NULL,                       /* port_get_pid */
-    dpif_netdev_port_dump_start,
-    dpif_netdev_port_dump_next,
-    dpif_netdev_port_dump_done,
-    dpif_netdev_port_poll,
-    dpif_netdev_port_poll_wait,
-    dpif_netdev_flow_get,
-    dpif_netdev_flow_put,
-    dpif_netdev_flow_del,
-    dpif_netdev_flow_flush,
-    dpif_netdev_flow_dump_start,
-    dpif_netdev_flow_dump_next,
-    dpif_netdev_flow_dump_done,
-    dpif_netdev_execute,
-    NULL,                       /* operate */
-    dpif_netdev_recv_set,
-    dpif_netdev_queue_to_priority,
-    dpif_netdev_recv,
-    dpif_netdev_recv_wait,
-    dpif_netdev_recv_purge,
+    DPIF_NETDEV_CLASS_FUNCTIONS
+};
+
+const struct dpif_class dpif_planetlab_class = {
+    "planetlab",
+    DPIF_NETDEV_CLASS_FUNCTIONS
 };
 
 static void
@@ -1331,3 +1348,4 @@ dpif_dummy_register(bool override)
 
     dpif_dummy_register__("dummy");
 }
+
index 317e617..f264e1b 100644 (file)
@@ -340,6 +340,7 @@ struct dpif_class {
 
 extern const struct dpif_class dpif_linux_class;
 extern const struct dpif_class dpif_netdev_class;
+extern const struct dpif_class dpif_planetlab_class;
 
 #ifdef  __cplusplus
 }
index 2968966..7be7b2a 100644 (file)
@@ -62,6 +62,7 @@ static const struct dpif_class *base_dpif_classes[] = {
     &dpif_linux_class,
 #endif
     &dpif_netdev_class,
+    &dpif_planetlab_class,
 };
 
 struct registered_dpif_class {
index 412a92d..08937de 100644 (file)
@@ -69,6 +69,7 @@
 #include "sset.h"
 #include "timer.h"
 #include "vlog.h"
+#include "tunalloc.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_linux);
 
@@ -732,7 +733,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_)
         netdev_dev->tc->ops->tc_destroy(netdev_dev->tc);
     }
 
-    if (class == &netdev_tap_class) {
+    if (class == &netdev_tap_class || class == &netdev_tap_pl_class) {
         destroy_tap(netdev_dev);
     }
     free(netdev_dev);
@@ -768,7 +769,7 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
         }
     }
 
-    if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap") &&
+    if (!strncmp(netdev_dev_get_type(netdev_dev_), "tap", 3) &&
         !netdev_dev->state.tap.opened) {
 
         /* We assume that the first user of the tap device is the primary user
@@ -793,7 +794,7 @@ netdev_linux_close(struct netdev *netdev_)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-    if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) {
+    if (netdev->fd > 0 && strncmp(netdev_get_type(netdev_), "tap", 3)) {
         close(netdev->fd);
     }
     free(netdev);
@@ -867,7 +868,8 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
     for (;;) {
         ssize_t retval;
 
-        retval = (netdev_->netdev_dev->netdev_class == &netdev_tap_class
+        retval = ((netdev_->netdev_dev->netdev_class == &netdev_tap_class ||
+                   netdev_->netdev_dev->netdev_class == &netdev_tap_pl_class)
                   ? read(netdev->fd, data, size)
                   : recv(netdev->fd, data, size, MSG_TRUNC));
         if (retval >= 0) {
@@ -900,7 +902,7 @@ netdev_linux_drain(struct netdev *netdev_)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     if (netdev->fd < 0) {
         return 0;
-    } else if (!strcmp(netdev_get_type(netdev_), "tap")) {
+    } else if (!strncmp(netdev_get_type(netdev_), "tap", 3)) {
         struct ifreq ifr;
         int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr,
                                           SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
@@ -1011,7 +1013,7 @@ netdev_linux_send_wait(struct netdev *netdev_)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     if (netdev->fd < 0) {
         /* Nothing to do. */
-    } else if (strcmp(netdev_get_type(netdev_), "tap")) {
+    } else if (strncmp(netdev_get_type(netdev_), "tap", 3)) {
         poll_fd_wait(netdev->fd, POLLOUT);
     } else {
         /* TAP device always accepts packets.*/
@@ -1786,6 +1788,51 @@ netdev_linux_get_qos_types(const struct netdev *netdev OVS_UNUSED,
     return 0;
 }
 
+static int
+netdev_linux_create_tap_pl(const struct netdev_class *class OVS_UNUSED,
+                        const char *name, struct netdev_dev **netdev_devp)
+{
+    struct netdev_dev_linux *netdev_dev;
+    struct tap_state *state;
+    char real_name[IFNAMSIZ];
+    int error;
+
+    netdev_dev = xzalloc(sizeof *netdev_dev);
+    state = &netdev_dev->state.tap;
+
+    error = cache_notifier_ref();
+    if (error) {
+        goto error;
+    }
+
+    /* Open tap device. */
+    state->fd = tun_alloc(IFF_TAP, real_name);
+    if (state->fd < 0) {
+        error = errno;
+        VLOG_WARN("tun_alloc(IFF_TAP, %s) failed: %s", name, strerror(error));
+        goto error_unref_notifier;
+    }
+    if (strcmp(name, real_name)) {
+        VLOG_WARN("tap_pl: requested %s, created %s", name, real_name);
+    }
+
+    /* Make non-blocking. */
+    error = set_nonblocking(state->fd);
+    if (error) {
+        goto error_unref_notifier;
+    }
+
+    netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_tap_pl_class);
+    *netdev_devp = &netdev_dev->netdev_dev;
+    return 0;
+
+error_unref_notifier:
+    cache_notifier_unref();
+error:
+    free(netdev_dev);
+    return error;
+}
+
 static const struct tc_ops *
 tc_lookup_ovs_name(const char *name)
 {
@@ -2377,6 +2424,13 @@ netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off,
     return error;
 }
 
+static int
+netdev_tap_pl_update_flags(struct netdev *netdev OVS_UNUSED, enum netdev_flags off OVS_UNUSED,
+                          enum netdev_flags on OVS_UNUSED, enum netdev_flags *old_flagsp OVS_UNUSED)
+{
+    return 0;
+}
+
 static unsigned int
 netdev_linux_change_seq(const struct netdev *netdev)
 {
@@ -2384,7 +2438,8 @@ netdev_linux_change_seq(const struct netdev *netdev)
 }
 
 #define NETDEV_LINUX_CLASS(NAME, CREATE, GET_STATS, SET_STATS,  \
-                           GET_FEATURES, GET_STATUS)            \
+                           GET_FEATURES, GET_STATUS,            \
+                           UPDATE_FLAGS)                        \
 {                                                               \
     NAME,                                                       \
                                                                 \
@@ -2442,7 +2497,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
     GET_STATUS,                                                 \
     netdev_linux_arp_lookup,                                    \
                                                                 \
-    netdev_linux_update_flags,                                  \
+    UPDATE_FLAGS,                                               \
                                                                 \
     netdev_linux_change_seq                                     \
 }
@@ -2454,7 +2509,8 @@ const struct netdev_class netdev_linux_class =
         netdev_linux_get_stats,
         NULL,                    /* set_stats */
         netdev_linux_get_features,
-        netdev_linux_get_drv_info);
+        netdev_linux_get_drv_info,
+        netdev_linux_update_flags);
 
 const struct netdev_class netdev_tap_class =
     NETDEV_LINUX_CLASS(
@@ -2463,7 +2519,8 @@ const struct netdev_class netdev_tap_class =
         netdev_tap_get_stats,
         NULL,                   /* set_stats */
         netdev_linux_get_features,
-        netdev_linux_get_drv_info);
+        netdev_linux_get_drv_info,
+        netdev_linux_update_flags);
 
 const struct netdev_class netdev_internal_class =
     NETDEV_LINUX_CLASS(
@@ -2472,7 +2529,18 @@ const struct netdev_class netdev_internal_class =
         netdev_internal_get_stats,
         netdev_vport_set_stats,
         NULL,                  /* get_features */
-        netdev_internal_get_drv_info);
+        netdev_internal_get_drv_info,
+        netdev_linux_update_flags);
+
+const struct netdev_class netdev_tap_pl_class =
+    NETDEV_LINUX_CLASS(
+        "tap_pl",
+        netdev_linux_create_tap_pl,
+        netdev_tap_get_stats,
+        NULL,                   /* set_stats */
+        netdev_linux_get_features,
+        netdev_linux_get_drv_info,
+       netdev_tap_pl_update_flags);    
 \f
 /* HTB traffic control class. */
 
index 94f60af..f56a0ce 100644 (file)
@@ -599,6 +599,9 @@ extern const struct netdev_class netdev_tap_class;
 #ifdef __FreeBSD__
 extern const struct netdev_class netdev_bsd_class;
 #endif
+extern const struct netdev_class netdev_tap_pl_class;
+
+extern const struct netdev_class netdev_tunnel_class;
 
 #ifdef  __cplusplus
 }
diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c
new file mode 100644 (file)
index 0000000..d2318db
--- /dev/null
@@ -0,0 +1,510 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <errno.h>
+
+#include "flow.h"
+#include "list.h"
+#include "netdev-provider.h"
+#include "odp-util.h"
+#include "ofp-print.h"
+#include "ofpbuf.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "shash.h"
+#include "sset.h"
+#include "unixctl.h"
+#include "socket-util.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(netdev_tunnel);
+
+struct netdev_dev_tunnel {
+    struct netdev_dev netdev_dev;
+    uint8_t hwaddr[ETH_ADDR_LEN];
+    struct netdev_stats stats;
+    enum netdev_flags flags;
+    int sockfd;
+    struct sockaddr_in local_addr;
+    struct sockaddr_in remote_addr;
+    bool valid_remote_ip;
+    bool valid_remote_port;
+    bool connected;
+    unsigned int change_seq;
+};
+
+struct netdev_tunnel {
+    struct netdev netdev;
+} ;
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+
+static struct shash tunnel_netdev_devs = SHASH_INITIALIZER(&tunnel_netdev_devs);
+
+static int netdev_tunnel_create(const struct netdev_class *, const char *,
+                               struct netdev_dev **);
+static void netdev_tunnel_update_seq(struct netdev_dev_tunnel *);
+
+static bool
+is_tunnel_class(const struct netdev_class *class)
+{
+    return class->create == netdev_tunnel_create;
+}
+
+static struct netdev_dev_tunnel *
+netdev_dev_tunnel_cast(const struct netdev_dev *netdev_dev)
+{
+    assert(is_tunnel_class(netdev_dev_get_class(netdev_dev)));
+    return CONTAINER_OF(netdev_dev, struct netdev_dev_tunnel, netdev_dev);
+}
+
+static struct netdev_tunnel *
+netdev_tunnel_cast(const struct netdev *netdev)
+{
+    struct netdev_dev *netdev_dev = netdev_get_dev(netdev);
+    assert(is_tunnel_class(netdev_dev_get_class(netdev_dev)));
+    return CONTAINER_OF(netdev, struct netdev_tunnel, netdev);
+}
+
+static int
+netdev_tunnel_create(const struct netdev_class *class, const char *name,
+                    struct netdev_dev **netdev_devp)
+{
+    static unsigned int n = 0xaa550000;
+    struct netdev_dev_tunnel *netdev_dev;
+    int error;
+
+    netdev_dev = xzalloc(sizeof *netdev_dev);
+    netdev_dev_init(&netdev_dev->netdev_dev, name, class);
+    netdev_dev->hwaddr[0] = 0x55;
+    netdev_dev->hwaddr[1] = 0xaa;
+    netdev_dev->hwaddr[2] = n >> 24;
+    netdev_dev->hwaddr[3] = n >> 16;
+    netdev_dev->hwaddr[4] = n >> 8;
+    netdev_dev->hwaddr[5] = n;
+    netdev_dev->flags = 0;
+    netdev_dev->change_seq = 1;
+    memset(&netdev_dev->remote_addr, 0, sizeof(netdev_dev->remote_addr));
+    netdev_dev->valid_remote_ip = false;
+    netdev_dev->valid_remote_port = false;
+    netdev_dev->connected = false;
+
+
+    netdev_dev->sockfd = inet_open_passive(SOCK_DGRAM, "", 0, &netdev_dev->local_addr, 0);
+    if (netdev_dev->sockfd < 0) {
+       error = netdev_dev->sockfd;
+        goto error;
+    }
+
+
+    shash_add(&tunnel_netdev_devs, name, netdev_dev);
+
+    n++;
+
+    *netdev_devp = &netdev_dev->netdev_dev;
+
+    VLOG_DBG("tunnel_create: name=%s, fd=%d, port=%d", name, netdev_dev->sockfd, netdev_dev->local_addr.sin_port);
+
+    return 0;
+
+error:
+    free(netdev_dev);
+    return error;
+}
+
+static void
+netdev_tunnel_destroy(struct netdev_dev *netdev_dev_)
+{
+    struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(netdev_dev_);
+
+    if (netdev_dev->sockfd != -1)
+       close(netdev_dev->sockfd);
+
+    shash_find_and_delete(&tunnel_netdev_devs,
+                          netdev_dev_get_name(netdev_dev_));
+    free(netdev_dev);
+}
+
+static int
+netdev_tunnel_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
+{
+    struct netdev_tunnel *netdev;
+
+    netdev = xmalloc(sizeof *netdev);
+    netdev_init(&netdev->netdev, netdev_dev_);
+
+    *netdevp = &netdev->netdev;
+    return 0;
+}
+
+static void
+netdev_tunnel_close(struct netdev *netdev_)
+{
+    struct netdev_tunnel *netdev = netdev_tunnel_cast(netdev_);
+    free(netdev);
+}
+
+static int
+netdev_tunnel_get_config(struct netdev_dev *dev_, struct smap *args)
+{
+    struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(dev_);
+
+    if (netdev_dev->valid_remote_ip)
+       smap_add(args, "remote_ip",
+           xasprintf(IP_FMT, IP_ARGS(&netdev_dev->remote_addr.sin_addr)));
+    if (netdev_dev->valid_remote_port)
+        smap_add(args, "remote_port",
+           xasprintf("%"PRIu16, ntohs(netdev_dev->remote_addr.sin_port)));
+    return 0;
+}
+
+static int
+netdev_tunnel_connect(struct netdev_dev_tunnel *dev)
+{
+    if (dev->sockfd < 0)
+        return EBADF;
+    if (!dev->valid_remote_ip || !dev->valid_remote_port)
+        return 0;
+    dev->remote_addr.sin_family = AF_INET;
+    if (connect(dev->sockfd, (struct sockaddr*) &dev->remote_addr, sizeof(dev->remote_addr)) < 0) {
+        return errno;
+    }
+    dev->connected = true;
+    netdev_tunnel_update_seq(dev);
+    VLOG_DBG("%s: connected to (%s, %d)", netdev_dev_get_name(&dev->netdev_dev),
+        inet_ntoa(dev->remote_addr.sin_addr), ntohs(dev->remote_addr.sin_port));
+    return 0;
+}
+
+static int
+netdev_tunnel_set_config(struct netdev_dev *dev_, const struct smap *args)
+{
+    struct netdev_dev_tunnel *netdev_dev = netdev_dev_tunnel_cast(dev_);
+    struct shash_node *node;
+
+    VLOG_DBG("tunnel_set_config(%s)", netdev_dev_get_name(dev_));
+    SMAP_FOR_EACH(node, args) {
+        VLOG_DBG("arg: %s->%s", node->name, (char*)node->data);
+       if (!strcmp(node->name, "remote_ip")) {
+           struct in_addr addr;
+           if (lookup_ip(node->data, &addr)) {
+               VLOG_WARN("%s: bad 'remote_ip'", node->name);
+           } else {
+               netdev_dev->remote_addr.sin_addr = addr;
+               netdev_dev->valid_remote_ip = true;
+           }
+       } else if (!strcmp(node->name, "remote_port")) {
+           netdev_dev->remote_addr.sin_port = htons(atoi(node->data));
+           netdev_dev->valid_remote_port = true;
+       } else {
+           VLOG_WARN("%s: unknown argument '%s'", 
+               netdev_dev_get_name(dev_), node->name);
+       }
+    }
+    return netdev_tunnel_connect(netdev_dev);        
+}
+
+static int
+netdev_tunnel_listen(struct netdev *netdev_ OVS_UNUSED)
+{
+    return 0;
+}
+
+static int
+netdev_tunnel_recv(struct netdev *netdev_, void *buffer, size_t size)
+{
+    struct netdev_dev_tunnel *dev = 
+       netdev_dev_tunnel_cast(netdev_get_dev(netdev_));
+    if (!dev->connected)
+        return -EAGAIN;
+    for (;;) {
+        ssize_t retval;
+        retval = recv(dev->sockfd, buffer, size, MSG_TRUNC);
+       VLOG_DBG("%s: recv(%"PRIxPTR", %"PRIu64", MSG_TRUNC) = %"PRId64,
+                netdev_get_name(netdev_), (uintptr_t)buffer, size, retval);
+        if (retval >= 0) {
+           dev->stats.rx_packets++;
+           dev->stats.rx_bytes += retval;
+            if (retval <= size) {
+               return retval;
+           } else {
+               dev->stats.rx_errors++;
+               dev->stats.rx_length_errors++;
+               return -EMSGSIZE;
+           }
+        } else if (errno != EINTR) {
+            if (errno != EAGAIN) {
+                VLOG_WARN_RL(&rl, "error receiveing Ethernet packet on %s: %s",
+                    netdev_get_name(netdev_), strerror(errno));
+               dev->stats.rx_errors++;
+            }
+            return -errno;
+        }
+    }
+}
+
+static void
+netdev_tunnel_recv_wait(struct netdev *netdev_)
+{
+    struct netdev_dev_tunnel *dev = 
+       netdev_dev_tunnel_cast(netdev_get_dev(netdev_));
+    if (dev->sockfd >= 0) {
+        poll_fd_wait(dev->sockfd, POLLIN);
+    }
+}
+
+static int
+netdev_tunnel_send(struct netdev *netdev_, const void *buffer, size_t size)
+{
+    struct netdev_dev_tunnel *dev = 
+       netdev_dev_tunnel_cast(netdev_get_dev(netdev_));
+    if (!dev->connected)
+        return EAGAIN;
+    for (;;) {
+        ssize_t retval;
+        retval = send(dev->sockfd, buffer, size, 0);
+       VLOG_DBG("%s: send(%"PRIxPTR", %"PRIu64") = %"PRId64,
+                netdev_get_name(netdev_), (uintptr_t)buffer, size, retval);
+        if (retval >= 0) {
+           dev->stats.tx_packets++;
+           dev->stats.tx_bytes++;
+           if (retval != size) {
+               VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRId64" bytes of "
+                            "%"PRIu64") on %s", retval, size, netdev_get_name(netdev_));
+               dev->stats.tx_errors++;
+           }
+            return 0;
+        } else if (errno != EINTR) {
+            if (errno != EAGAIN) {
+                VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+                    netdev_get_name(netdev_), strerror(errno));
+               dev->stats.tx_errors++;
+            }
+            return errno;
+        }
+    }
+}
+
+static void
+netdev_tunnel_send_wait(struct netdev *netdev_)
+{
+    struct netdev_dev_tunnel *dev = 
+       netdev_dev_tunnel_cast(netdev_get_dev(netdev_));
+    if (dev->sockfd >= 0) {
+        poll_fd_wait(dev->sockfd, POLLOUT);
+    }
+}
+
+static int
+netdev_tunnel_drain(struct netdev *netdev_)
+{
+    struct netdev_dev_tunnel *dev = 
+       netdev_dev_tunnel_cast(netdev_get_dev(netdev_));
+    char buffer[128];
+    int error;
+
+    if (!dev->connected)
+       return 0;
+    for (;;) {
+       error = recv(dev->sockfd, buffer, 128, MSG_TRUNC);
+       if (error) {
+            if (error == -EAGAIN)
+               break;
+            else if (error != -EMSGSIZE)
+               return error;
+       }
+    }
+    return 0;
+}
+
+static int
+netdev_tunnel_set_etheraddr(struct netdev *netdev,
+                           const uint8_t mac[ETH_ADDR_LEN])
+{
+    struct netdev_dev_tunnel *dev =
+        netdev_dev_tunnel_cast(netdev_get_dev(netdev));
+
+    if (!eth_addr_equals(dev->hwaddr, mac)) {
+        memcpy(dev->hwaddr, mac, ETH_ADDR_LEN);
+        netdev_tunnel_update_seq(dev);
+    }
+
+    return 0;
+}
+
+static int
+netdev_tunnel_get_etheraddr(const struct netdev *netdev,
+                           uint8_t mac[ETH_ADDR_LEN])
+{
+    const struct netdev_dev_tunnel *dev =
+        netdev_dev_tunnel_cast(netdev_get_dev(netdev));
+
+    memcpy(mac, dev->hwaddr, ETH_ADDR_LEN);
+    return 0;
+}
+
+
+static int
+netdev_tunnel_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
+{
+    const struct netdev_dev_tunnel *dev =
+        netdev_dev_tunnel_cast(netdev_get_dev(netdev));
+
+    *stats = dev->stats;
+    return 0;
+}
+
+static int
+netdev_tunnel_set_stats(struct netdev *netdev, const struct netdev_stats *stats)
+{
+    struct netdev_dev_tunnel *dev =
+        netdev_dev_tunnel_cast(netdev_get_dev(netdev));
+
+    dev->stats = *stats;
+    return 0;
+}
+
+static int
+netdev_tunnel_update_flags(struct netdev *netdev,
+                          enum netdev_flags off, enum netdev_flags on,
+                          enum netdev_flags *old_flagsp)
+{
+    struct netdev_dev_tunnel *dev =
+        netdev_dev_tunnel_cast(netdev_get_dev(netdev));
+
+    if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
+        return EINVAL;
+    }
+
+    // XXX should we actually do something with this flags?
+    *old_flagsp = dev->flags;
+    dev->flags |= on;
+    dev->flags &= ~off;
+    if (*old_flagsp != dev->flags) {
+        netdev_tunnel_update_seq(dev);
+    }
+    return 0;
+}
+
+static unsigned int
+netdev_tunnel_change_seq(const struct netdev *netdev)
+{
+    return netdev_dev_tunnel_cast(netdev_get_dev(netdev))->change_seq;
+}
+\f
+/* Helper functions. */
+
+static void
+netdev_tunnel_update_seq(struct netdev_dev_tunnel *dev)
+{
+    dev->change_seq++;
+    if (!dev->change_seq) {
+        dev->change_seq++;
+    }
+}
+
+static void
+netdev_tunnel_get_port(struct unixctl_conn *conn,
+                     int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
+{
+    struct netdev_dev_tunnel *tunnel_dev;
+    char buf[6];
+
+    tunnel_dev = shash_find_data(&tunnel_netdev_devs, argv[1]);
+    if (!tunnel_dev) {
+        unixctl_command_reply_error(conn, "no such tunnel netdev");
+        return;
+    }
+
+    sprintf(buf, "%d", ntohs(tunnel_dev->local_addr.sin_port));
+    unixctl_command_reply(conn, buf);
+}
+
+
+static int
+netdev_tunnel_init(void)
+{
+    unixctl_command_register("netdev-tunnel/get-port", "NAME",
+                             1, 1, netdev_tunnel_get_port, NULL);
+    return 0;
+}
+
+const struct netdev_class netdev_tunnel_class = {
+    "tunnel",
+    netdev_tunnel_init,         /* init */
+    NULL,                       /* run */
+    NULL,                       /* wait */
+
+    netdev_tunnel_create,
+    netdev_tunnel_destroy,
+    netdev_tunnel_get_config,
+    netdev_tunnel_set_config, 
+
+    netdev_tunnel_open,
+    netdev_tunnel_close,
+
+    netdev_tunnel_listen,
+    netdev_tunnel_recv,
+    netdev_tunnel_recv_wait,
+    netdev_tunnel_drain,
+
+    netdev_tunnel_send, 
+    netdev_tunnel_send_wait,  
+
+    netdev_tunnel_set_etheraddr,
+    netdev_tunnel_get_etheraddr,
+    NULL,                      /* get_mtu */
+    NULL,                      /* set_mtu */
+    NULL,                       /* get_ifindex */
+    NULL,                      /* get_carrier */
+    NULL,                       /* get_carrier_resets */
+    NULL,                       /* get_miimon */
+    netdev_tunnel_get_stats,
+    netdev_tunnel_set_stats,
+
+    NULL,                       /* get_features */
+    NULL,                       /* set_advertisements */
+
+    NULL,                       /* set_policing */
+    NULL,                       /* get_qos_types */
+    NULL,                       /* get_qos_capabilities */
+    NULL,                       /* get_qos */
+    NULL,                       /* set_qos */
+    NULL,                       /* get_queue */
+    NULL,                       /* set_queue */
+    NULL,                       /* delete_queue */
+    NULL,                       /* get_queue_stats */
+    NULL,                       /* dump_queues */
+    NULL,                       /* dump_queue_stats */
+
+    NULL,                       /* get_in4 */
+    NULL,                       /* set_in4 */
+    NULL,                       /* get_in6 */
+    NULL,                       /* add_router */
+    NULL,                       /* get_next_hop */
+    NULL,                       /* get_drv_info */
+    NULL,                       /* arp_lookup */
+
+    netdev_tunnel_update_flags,
+
+    netdev_tunnel_change_seq
+};
index 394d895..2b4dec4 100644 (file)
@@ -79,12 +79,14 @@ netdev_initialize(void)
         netdev_register_provider(&netdev_linux_class);
         netdev_register_provider(&netdev_internal_class);
         netdev_register_provider(&netdev_tap_class);
+       netdev_register_provider(&netdev_tap_pl_class);
         netdev_vport_register();
 #endif
 #ifdef __FreeBSD__
         netdev_register_provider(&netdev_tap_class);
         netdev_register_provider(&netdev_bsd_class);
 #endif
+       netdev_register_provider(&netdev_tunnel_class);
     }
 }
 
diff --git a/lib/tunalloc.c b/lib/tunalloc.c
new file mode 100644 (file)
index 0000000..12c2a70
--- /dev/null
@@ -0,0 +1,90 @@
+/* Slice-side code to allocate tuntap interface in root slice
+ * Based on bmsocket.c
+ *  Thom Haddow - 08/10/09
+ *
+ * Call tun_alloc() with IFFTUN or IFFTAP as an argument to get back fd to
+ * new tuntap interface. Interface name can be acquired via TUNGETIFF ioctl.
+ */
+
+#include <sys/un.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
+#include "tunalloc.h"
+
+#define VSYS_TUNTAP "/var/run/pl-ovs.control"
+
+/* Reads vif FD from "fd", writes interface name to vif_name, and returns vif FD.
+ * vif_name should be IFNAMSIZ chars long. */
+static int receive_vif_fd(int fd, char *vif_name)
+{
+       struct msghdr msg;
+       struct iovec iov;
+       int rv;
+       size_t ccmsg[CMSG_SPACE(sizeof(int)) / sizeof(size_t)];
+       struct cmsghdr *cmsg;
+
+    /* Use IOV to read interface name */
+       iov.iov_base = vif_name;
+       iov.iov_len = IFNAMSIZ;
+
+       msg.msg_name = 0;
+       msg.msg_namelen = 0;
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+       /* old BSD implementations should use msg_accrights instead of
+        * msg_control; the interface is different. */
+       msg.msg_control = ccmsg;
+       msg.msg_controllen = sizeof(ccmsg);
+
+       while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR);
+       if (rv == -1) {
+               return -1;
+       }
+       if(!rv) {
+               /* EOF */
+               return -1;
+       }
+
+       cmsg = CMSG_FIRSTHDR(&msg);
+       if (!cmsg->cmsg_type == SCM_RIGHTS) {
+               return -1;
+       }
+       return *(int*)CMSG_DATA(cmsg);
+}
+
+
+int tun_alloc(int iftype, char *if_name)
+{
+    int control_fd;
+    struct sockaddr_un addr;
+    int remotefd;
+
+    control_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (control_fd == -1) {
+        return -1;
+    }
+
+    memset(&addr, 0, sizeof(struct sockaddr_un));
+    /* Clear structure */
+    addr.sun_family = AF_UNIX;
+    strncpy(addr.sun_path, VSYS_TUNTAP,
+            sizeof(addr.sun_path) - 1);
+
+    if (connect(control_fd, (struct sockaddr *) &addr,
+                sizeof(struct sockaddr_un)) == -1) {
+        return -1;
+    }
+
+    remotefd = receive_vif_fd(control_fd, if_name);
+
+    close(control_fd);
+
+    return remotefd;
+}
diff --git a/lib/tunalloc.h b/lib/tunalloc.h
new file mode 100644 (file)
index 0000000..3e5caae
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _TUNALLOC_H
+#define _TUNALLOC_H
+
+int tun_alloc(int iftype, char *if_name);
+
+#endif
diff --git a/planetlab/automake.mk b/planetlab/automake.mk
new file mode 100644 (file)
index 0000000..ccc4f7d
--- /dev/null
@@ -0,0 +1,16 @@
+sbin_PROGRAMS += planetlab/pltap-ovs/pltap-ovs 
+sbin_PROGRAMS += planetlab/vsysc/vsysc
+
+# this Makefile is not intended to go on the sliver image - esp. not in /usr/sbin
+#      planetlab/scripts/Makefile
+# same goes for showgraph
+#      planetlab/scripts/showgraph
+dist_sbin_SCRIPTS += planetlab/scripts/sliver-ovs 
+
+planetlab_pltap_ovs_pltap_ovs_SOURCES =
+planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/pltap-ovs.c
+planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/tunalloc.c
+planetlab_pltap_ovs_pltap_ovs_SOURCES += planetlab/pltap-ovs/tunalloc.h
+
+planetlab_vsysc_vsysc_SOURCES =
+planetlab_vsysc_vsysc_SOURCES += planetlab/vsysc/vsysc.c
diff --git a/planetlab/exp-tool/Makefile b/planetlab/exp-tool/Makefile
new file mode 100644 (file)
index 0000000..ae27008
--- /dev/null
@@ -0,0 +1,259 @@
+# see README
+# conf.mk is expected to define
+# HOST_<id> and IP_<id> for all nodes involved, as well as 
+# LINKS as a list of <node_id>-<node_id> elements
+
+# run make CONF=anotherconfig.mk if you need several configs
+
+CONF ?= conf.mk
+include $(CONF)
+
+# if undefined in the conf file, use single dash
+SEP?=-
+
+### helper functions
+# flip(1) = 2
+# flip(2) = 1
+flip=$(if $(findstring 1,$(1)),2,1)
+# cutsep (x-y)-> x y
+cutsep=$(subst $(SEP), ,$(1))
+# leftnode (x-y) -> x
+leftnode=$(word 1,$(call cutsep,$(1)))
+# rightnode (x-y) -> y
+rightnode=$(word 2,$(call cutsep,$(1)))
+# linkpart(x@y) = x
+linkpart=$(word 1,$(subst @, ,$(1)))
+# endpart(x@y) = y
+endpart=$(word 2,$(subst @, ,$(1)))
+# get(x-y@1) = x
+# get(x-y@2) = y
+get=$(word $(call endpart,$(1)),$(call cutsep,$(call linkpart,$(1))))
+# opp(x-y@1) = x-y@2
+# opp(x-y@2) = x-y@1
+opp=$(call linkpart,$(1))@$(call flip,$(call endpart,$(1)))
+# rget(x-y@1) = y
+# rget(x-y@2) = x
+rget=$(call get,$(call opp,$(1)))
+###
+solve=$(HOST_$(1))
+solve_ip=$(IP_$(1))
+# can be redefined in conf.mk if that's not the expected behaviour
+display?=host $(1) aka $(call solve,$(1))
+
+#################### set variables after conf.mk
+ifeq "$(SSH_KEY)" ""
+SSH_KEY_OPTION ?=
+else
+SSH_KEY_OPTION ?= -i $(SSH_KEY)
+endif
+
+SSH_OPTIONS ?= $(SSH_KEY_OPTION) -l $(SLICE)
+SSH = ssh $(SSH_OPTIONS)
+
+SUDO ?= sudo -S
+
+ALL_NODE_IDS=$(sort $(foreach link,$(LINKS),$(call leftnode,$(link))) $(foreach link,$(LINKS),$(call rightnode,$(link))))
+ALL_LINK_IDS=$(addsuffix @1,$(LINKS)) $(addsuffix @2,$(LINKS))
+
+####################
+all+init: init all
+init:
+       @[ -d L ] || ( echo Creating tmp dir L; mkdir L)
+       @[ -d U ] || ( echo Creating tmp dir U; mkdir U)
+       @[ -d cache ] || ( echo Creating tmp dir cache; mkdir cache)
+.PHONY: all+init init
+
+FORCE:
+
+.SECONDARY:
+
+LINKTARGETS=$(addprefix L/,$(LINKS))
+all: $(LINKTARGETS)
+.PHONY: all
+
+# could also do make ++SLICE
+showslice: ++SLICE FORCE
+
+shownodes:
+       @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id));)
+showips:
+       @$(foreach id,$(ALL_NODE_IDS),echo $(id)=$(call display,$(id)) has ip/network set to $(IP_$(id));)
+showlinks:
+       @$(foreach link,$(LINKS), echo $(call display,$(call leftnode,$(link))) '====>' $(call display,$(call rightnode,$(link)));)
+.PHONY: shownodes showips showlinks
+
+sshchecks: $(foreach id,$(ALL_NODE_IDS),cache/sshcheck.$(id))
+.PHONY: sshchecks
+
+DBS=$(foreach id,$(ALL_NODE_IDS),cache/db.$(id))
+dbs: $(DBS)
+.PHONY: dbs
+
+SWITCHS=$(foreach id,$(ALL_NODE_IDS),cache/switch.$(id))
+switchs: $(SWITCHS)
+.PHONY: switchs
+
+start: dbs switchs
+.PHONY: start
+
+stop:$(foreach id,$(ALL_NODE_IDS),cache/stop.$(id))
+.PHONY: stop
+
+status:$(foreach id,$(ALL_NODE_IDS),cache/status.$(id))
+.PHONY: status
+
+BRIDGES=$(foreach id,$(ALL_NODE_IDS),cache/bridge.$(id))
+bridges: $(BRIDGES)
+.PHONY: bridges
+
+### node-oriented targets
+# check ssh connectivity
+cache/sshcheck.%: FORCE
+       @if $(SSH) $(HOST_$*) hostname 2> /dev/null; then echo "ssh on" $(call display,$*) "OK" ; \
+        else echo "ssh on" $(call display,$*) "KO !!!"; fi
+
+# should probably replace sshcheck
+cache/status.%: FORCE
+       @echo "=== DB and SWITCH processes on $(call display,$*)"
+       @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs status
+
+cache/host.%:
+       @echo "IP lookup for $(call display,$*)"
+       @host $(HOST_$*) | sed -n 's/^.*has address *//p' > $@
+
+cache/db.%:
+       @echo "Starting db server on $(call display,$*) - logs in $@.log"
+       @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-db &> $@.log && touch $@
+
+cache/switch.%: cache/db.%
+       @echo "Starting vswitchd on $(call display,$*) - logs in $@.log"
+       @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs start-switch &> $@.log && touch $@
+
+cache/bridge.%: cache/switch.%
+       @echo "Creating bridge on $(call display,$*) - logs in $@.log"
+       @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs create-bridge $(IP_$*) > $@ 2> $@.log || { rm $@; exit 1; }
+       @echo Created bridge $$(cat $@) on $(HOST_$*)
+
+# xxx this probably needs a more thorough cleanup in cache/
+cache/stop.%: del-bridge.%
+       @echo "Stopping switch & db on $(call display,$*)"
+       @$(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop && rm cache/switch.% cache/db.%
+
+### link-oriented targets
+# L/<nodeid>-<node_id>
+L/%: cache/link.%@1 cache/link.%@2
+       @touch $@
+       @echo "Created link $*"
+
+U/%: del-iface.%@1 del-iface.%@2
+       @rm -f L/$*
+       @echo "Deleted link $*"
+
+del-bridge.%: cache/db.%
+       @echo "Deleting bridge on $(call display,$*)"
+       @if [ -f cache/bridge.$* ]; then \
+               $(SSH) $(HOST_$*) $(SUDO) sliver-ovs del-bridge $$(cat cache/bridge.$*);\
+        fi
+       @rm -f cache/bridge.$* \
+             cache/iface.$*$(SEP)*@1 cache/iface.*$(SEP)$*@2 \
+             cache/link.$*$(SEP)*@?  cache/link.*$(SEP)$*@?  \
+             L/$*$(SEP)*             L/*$(SEP)$*
+
+del-switch.%: del-bridge.%
+       @echo "Shutting down switch on $(call display,$*)"
+       @[ -f cache/switch.$* ] && $(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-switch
+       @rm -f cache/switch.$*
+
+del-db.%:
+       @echo "Shutting down db on $(call display,$*)"
+       @[ -f cache/db.$* ] && $(SSH) $(HOST_$*) $(SUDO) sliver-ovs stop-db
+       @rm -f cache/db.$*
+
+del-links: $(addprefix U/,$(notdir $(filter-out %.log,$(wildcard L/*))))
+
+del-switchs: $(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/switch.*))))
+
+del-dbs: $(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/db.*))))
+
+shutdown: del-switchs del-dbs
+
+.PHONY: del-links del-switchs del-dbs shutdown
+
+.SECONDEXPANSION:
+
+del-iface.%: cache/db.$$(call get,%)
+       @echo "Removing interface for link $(call linkpart,$*) from $(call get,$*)"
+       @$(SSH) $(HOST_$(call get,$*)) \
+               $(SUDO) sliver-ovs del-port L$(call linkpart,$*)
+       @rm -f cache/iface.$* cache/link.$* cache/link.$(call opp,$*)
+
+
+### '%' here is leftid-rightid@{1,2}
+# we retrieve % as $(*F)
+#linkid=$(call linkpart,%)
+#nodeid=$(call get,%)
+#bridgefile=cache/bridge.$(nodeid)
+cache/iface.%: cache/bridge.$$(call get,%)
+       @echo "Creating interface for link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $@.log"
+       @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs create-port $$(cat cache/bridge.$(call get,$(*F))) L$(call linkpart,$(*F)) > $@ 2> $@.log || { rm $@; exit 1; }
+       echo cache/bridge.$(call get,$(*F))
+       echo cache/host.$(call get,$(*F)) cache/iface.$(*F) cache/iface.$(call opp,$(*F))
+
+
+# linkid=$(call linkpart,%)
+# nodeid=$(call get,%)
+# iface1=cache/iface.%
+# iface2=cache/iface.$(call opp,%)
+cache/link.%: cache/host.$$(call get,%) cache/iface.% cache/iface.$$(call opp,%)
+       @echo "Setting port number of link $(call linkpart,$(*F)) on $(call display,$(call get,$(*F))) - logs in $@.log"
+       @$(SSH) $(call solve,$(call get,$(*F))) $(SUDO) sliver-ovs set-remote-endpoint L$(call linkpart,$(*F)) \
+                       $$(cat cache/host.$(call rget,$(*F))) \
+                       $$(cat cache/iface.$(call opp,$(*F))) 2> $@.log \
+        && touch $@
+
+####################
+CLEANTARGETS=$(addprefix del-,$(notdir $(filter-out %.log,$(wildcard cache/bridge.*))))
+clean: $(CLEANTARGETS)
+distclean:
+       rm -rf L U cache
+.PHONY: clean distclean
+
+####################
+graph.dot:
+       ( echo "digraph $(SLICE) {"; ls L | sed 's/$(SEP)/->/;s/$$/;/'; echo "}" ) > $@
+graph.ps: graph.dot
+       dot -Tps < $^ > $@      
+
+####################
+# 'virtual' targets in that there's no real file attached
+define node_shortcuts
+sshcheck.$(1): cache/sshcheck.$(1) FORCE
+db.$(1): cache/db.$(1) FORCE
+switch.$(1): cache/switch.$(1) FORCE
+start.$(1): cache/start.$(1) FORCE
+stop.$(1): cache/stop.$(1) FORCE
+status.$(1): cache/status.$(1) FORCE
+bridge.$(1): cache/bridge.$(1) FORCE
+host.$(1): cache/host.$(1) FORCE
+# switch already depends on db, but well
+cache/start.$(1): cache/db.$(1) cache/switch.$(1) FORCE
+endef
+
+$(foreach id,$(ALL_NODE_IDS), $(eval $(call node_shortcuts,$(id))))
+
+define link_shortcuts
+iface.%: cache/iface.%
+link.%: cache/link.%
+endef
+
+$(foreach id,$(ALL_LINK_IDS), $(eval $(call link_shortcuts,$(id))))
+
+#################### convenience, for debugging only
+# make +foo : prints the value of $(foo)
+# make ++foo : idem but verbose, i.e. foo=$(foo)
+++%: varname=$(subst +,,$@)
+++%:
+       @echo "$(varname)=$($(varname))"
++%: varname=$(subst +,,$@)
++%:
+       @echo "$($(varname))"
diff --git a/planetlab/exp-tool/README b/planetlab/exp-tool/README
new file mode 100644 (file)
index 0000000..641aca2
--- /dev/null
@@ -0,0 +1,169 @@
+* Introduction
+
+The Makefile contained in this directory can be used by an
+experimenter to dynamically create an overlay network in a PlanetLab
+slice, using the sliver-openvswitch distribution. At present, the
+Makefile only supports the creation of the basic topology (nodes and
+links). 
+All the additional configuration of the bridges/switches (in
+particular, connecting the switches to OpenFlow controllers or
+enabling the Spanning Tree Protocol aka STP) has to be done using the
+tools available in the Open vSwitch distribution. This may change in
+the future.
+
+The overlay network supported by the Makefile may consist of:
+
+- at most one Open vSwitch bridge per sliver;
+- at most a pair of tunnels between each pair of slivers.
+
+(Please note that these limitations are due to the simple naming scheme
+adopted by the Makefile, and are not limitations of sliver-openvswitch.)
+
+Each bridge is connected to a tap device in the sliver. The tap device
+has an IP address chosen by the experimenter. The idea is to connect
+all the tap devices through the overlay network made up of Open vSwitch
+bridges and tunnels among them.
+
+
+* Installation
+
+On each sliver we need to install sliver-openvswitch and obtain the
+following tags:
+
+NAME           VALUE
+vsys           fd_tuntap
+vsys           vif_up
+vsys           vif_down
+vsys_net       (some subnet)
+
+
+On the experimenter box we need:
+
+- GNU make
+- the openssh client
+- the host program (usually distributed in bind-tools)
+- (optionally) the dot program from the graphviz distribution
+
+Then, we can simply copy the Makefile in a working directory on the
+experimenter box. The directory must also contain subdirectories 'L'
+and 'cache':
+
+$ mkdir work
+$ cp /path/to/Makefile work
+$ cd work
+$ mkdir -p L cache
+
+
+* Example usage
+
+Assume we have a PlanetLab slice called 'myslice' which
+contains four nodes:
+
+1) onelab7.iet.unipi.it
+2) planet2.elte.hu
+3) planetlab2.ics.forth.gr
+4) planetlab2.urv.cat
+
+
+Assume we have reserverd subnet 10.0.9.0/24 using vsys_net.  We are
+goint to build the following overlay network:
+
+   10.0.9.1/24   10.0.9.2/24   10.0.9.3/24
+       1 ----------- 2 ------------ 3
+                     |
+                     |
+                      |
+                     4 
+                 10.0.9.4/24
+
+
+In the same directory were we have put the Makefile we create a 'conf.mk'
+file containing the following variables:
+
+----------
+SLICE=myslice
+HOST_1=onelab7.iet.unipi.it
+IP_1=10.0.9.1/24
+HOST_2=planet2.elte.hu
+IP_2=10.0.9.2/24
+HOST_3=planetlab2.ics.forth.gr
+IP_3=10.0.9.3/24
+HOST_4=planetlab2.urv.cat
+IP_4=10.0.9.4/24
+
+LINKS :=
+LINKS += 1-2
+LINKS += 2-3
+LINKS += 2-4
+----------
+
+NOTE. In this example we have chosen to use numbers (1,2,3,4) as ids
+for nodes, you can use any other name that is convenient for you.
+See the example files in this directory for an example of this.
+
+
+Then, we can just type:
+
+$ make -j
+
+Assuming everything has been setup correctly, this command Will start
+the Open vSwitch servers, create the bridges and setup the tunnels. We
+can test that network is up by logging into a node and pinging some
+other node using the private subnet addresses:
+
+$ source conf.mk
+$ ssh -l $SLICE $HOST_1 ping 10.0.9.4
+
+Links can be destroyed and created dynamically. Assume we now want the
+the topology to match the following one:
+
+   10.0.9.1/24   10.0.9.2/24 
+       1 ----------- 2 
+                     |
+                     |
+                      |
+                     4 ----------- 3 
+                 10.0.9.4/24  10.0.9.3/24
+
+
+We can issue the following commands:
+
+$ make -j U/2-3      # unlink nodes 2 and 3
+$ make -j L/4-3      # link nodes 4 and 3
+
+The current state of the links is represented as a set of files in the 'L'
+directory.  If dot is installed, we can obtain a graphical representation
+of the topology by typing:
+
+$ make graph.ps
+
+#### BEG WARNING xxx this feature is broken now that LINKS are defined in the same conf.mk file
+Or we can save the current state in the 'links' file (so that we can
+recreate it later):
+
+$ ls L > links
+#### END WARNING xxx this feature is broken now that LINKS are defined in the same conf.mk file
+
+
+* Command reference
+
+All targets can be issued with the '-j' flag to (greatly) speed up operations.
+It may also be useful to use the '-k' flag, so that errors on some nodes do not
+stop the setup on the other nodes.
+
+
+all:           do wathever is needed to setup all the links in the 'links' file.
+
+clean:         tear down all existing links
+
+L/N1-N2:       setup a link between nodes HOST_N1 and HOST_N2
+
+U/N1-N2:       tear down the link (if it exists) between nodes HOST_N1
+               and HOST_N2
+
+del-bridge.N:  delete the bridge running on node HOST_N (this also tears down
+               all links that have an endpoint in N)
+
+graph.ps       create a postscript file containing a (simple) graphical
+               representation
+               of the current topology
diff --git a/planetlab/exp-tool/conf.mk.example b/planetlab/exp-tool/conf.mk.example
new file mode 100644 (file)
index 0000000..f4d9a86
--- /dev/null
@@ -0,0 +1,35 @@
+# the slice that you're using
+SLICE=inri_sl1
+
+# optionnally, the related ssh (private) key to use
+SSH_KEY=key_user.rsa
+
+# optionally, you can change the way nodes are displayed
+display="$(call solve,$(1))"
+
+#################### the nodes to use
+HOST_SENDER=vnode09.pl.sophia.inria.fr
+HOST_MUX=vnode02.pl.sophia.inria.fr
+HOST_END1=vnode10.pl.sophia.inria.fr
+HOST_END2=vnode07.pl.sophia.inria.fr
+
+# and their related IP and netmask
+# not that all these MUST fall in the vsys_vnet tag as granted by your planetlab operator
+# (in this example it is 10.0.100.0/24)
+
+IP_SENDER=10.0.100.1/24
+IP_MUX=10.0.100.2/24
+IP_END1=10.0.100.3/24
+IP_END2=10.0.100.4/24
+
+#################### the links to create
+LINKS :=
+# add one from SENDER to MUX
+LINKS += SENDER-MUX 
+# one from MUX to each of the 2 receivers
+LINKS += MUX-END1
+LINKS += MUX-END2
+
+####################
+
+
diff --git a/planetlab/exp-tool/showgraph b/planetlab/exp-tool/showgraph
new file mode 100755 (executable)
index 0000000..85421cb
--- /dev/null
@@ -0,0 +1,7 @@
+make graph.ps
+gv -watch -spartan graph.ps >/dev/null 2>&1 &
+while :
+do
+       inotifywait -e CREATE -e DELETE L
+       make graph.ps 
+done >/dev/null 2>&1
diff --git a/planetlab/pltap-ovs/pltap-ovs.c b/planetlab/pltap-ovs/pltap-ovs.c
new file mode 100644 (file)
index 0000000..e1660b0
--- /dev/null
@@ -0,0 +1,141 @@
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
+#include "tunalloc.h"
+
+#define OVS_SOCK "/var/run/pl-ovs.control"
+
+void send_fd(int p, int fd, char* vif_name);
+
+char *appname;
+
+#define ERROR(msg)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "%s: %s: %s", appname, msg, strerror(errno));  \
+                exit(1);                                                       \
+        } while (0)
+
+
+static
+int send_vif_fd(int sock_fd, int vif_fd, char *vif_name)
+{
+        int retval;
+        struct msghdr msg;
+        struct cmsghdr *p_cmsg;
+        struct iovec vec;
+        size_t cmsgbuf[CMSG_SPACE(sizeof(vif_fd)) / sizeof(size_t)];
+        int *p_fds;
+
+
+        msg.msg_control = cmsgbuf;
+        msg.msg_controllen = sizeof(cmsgbuf);
+        p_cmsg = CMSG_FIRSTHDR(&msg);
+        p_cmsg->cmsg_level = SOL_SOCKET;
+        p_cmsg->cmsg_type = SCM_RIGHTS;
+        p_cmsg->cmsg_len = CMSG_LEN(sizeof(vif_fd));
+        p_fds = (int *) CMSG_DATA(p_cmsg);
+        *p_fds = vif_fd;
+        msg.msg_controllen = p_cmsg->cmsg_len;
+        msg.msg_name = NULL;
+        msg.msg_namelen = 0;
+        msg.msg_iov = &vec;
+        msg.msg_iovlen = 1;
+        msg.msg_flags = 0;
+
+        /* Send the interface name as the iov */
+        vec.iov_base = vif_name;
+        vec.iov_len = strlen(vif_name)+1;
+
+        while ((retval = sendmsg(sock_fd, &msg, 0)) == -1 && errno == EINTR);
+        if (retval == -1) {
+                ERROR("sending file descriptor");
+        }
+        return 0;
+}
+
+void send_fd(int p, int fd, char* vif_name)
+{
+        int control_fd;
+        int accept_fd;
+        struct sockaddr_un addr, accept_addr;
+        socklen_t addr_len = sizeof(accept_addr);
+
+        control_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+        if (control_fd == -1 && errno != ENOENT) {
+                ERROR("Could not create UNIX socket");
+        }
+
+        memset(&addr, 0, sizeof(struct sockaddr_un));
+        /* Clear structure */
+        addr.sun_family = AF_UNIX;
+        strncpy(addr.sun_path, OVS_SOCK,
+                        sizeof(addr.sun_path) - 1);
+
+        if (unlink(OVS_SOCK) == -1 && errno != ENOENT) {
+                ERROR("Could not unlink " OVS_SOCK " control socket");
+        }
+
+        if (bind(control_fd, (struct sockaddr *) &addr,
+                                sizeof(struct sockaddr_un)) == -1) {
+                ERROR("Could not bind to " OVS_SOCK " control socket");
+        }
+
+        if (listen(control_fd, 5) == -1) {
+                ERROR("listen on " OVS_SOCK " failed");
+        }
+        if (write(p, "1", 1) != 1) {
+                ERROR("writing on the synch pipe");
+        }
+        if ((accept_fd = accept(control_fd, (struct sockaddr*) &accept_addr,
+                                                &addr_len)) == -1) {
+                ERROR("accept on " OVS_SOCK " failed");
+        }
+        send_vif_fd(accept_fd, fd, vif_name);
+}
+
+int main(int argc, char* argv[])
+{
+        char if_name[IFNAMSIZ];
+        int p[2]; // synchronization pipe
+        char dummy;
+       int tun_fd;
+
+       (void) argc; // unused
+
+        if (pipe(p) < 0) {
+                ERROR("pipe");
+        }
+
+        tun_fd = tun_alloc(IFF_TAP, if_name);
+
+        appname = argv[0];
+
+        switch(fork()) {
+        case -1:
+                ERROR("fork");
+                exit(1);
+        case 0:
+                close(1);
+                open("/dev/null", O_WRONLY);
+                close(p[0]);
+                send_fd(p[1], tun_fd, if_name);
+                exit(0);
+        default:
+                close(p[1]);
+                if (read(p[0], &dummy, 1) != 1) {
+                        ERROR("reading from the synch pipe");
+                }
+                printf("%s\n", if_name);
+        }
+        return 0;
+}
diff --git a/planetlab/pltap-ovs/tunalloc.c b/planetlab/pltap-ovs/tunalloc.c
new file mode 100644 (file)
index 0000000..1f083e5
--- /dev/null
@@ -0,0 +1,101 @@
+/* Slice-side code to allocate tuntap interface in root slice
+ * Based on bmsocket.c
+ *  Thom Haddow - 08/10/09
+ *
+ * Call tun_alloc() with IFFTUN or IFFTAP as an argument to get back fd to
+ * new tuntap interface. Interface name can be acquired via TUNGETIFF ioctl.
+ */
+
+#include <sys/un.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
+#define VSYS_TUNTAP "/vsys/fd_tuntap.control"
+
+
+int tun_alloc(int iftype, char *if_name);
+
+/* Reads vif FD from "fd", writes interface name to vif_name, and returns vif FD.
+ * vif_name should be IFNAMSIZ chars long. */
+static
+int receive_vif_fd(int fd, char *vif_name)
+{
+       struct msghdr msg;
+       struct iovec iov;
+       int rv;
+       size_t ccmsg[CMSG_SPACE(sizeof(int)) / sizeof(size_t)];
+       struct cmsghdr *cmsg;
+       unsigned char *data;
+
+    /* Use IOV to read interface name */
+       iov.iov_base = vif_name;
+       iov.iov_len = IFNAMSIZ;
+
+       msg.msg_name = 0;
+       msg.msg_namelen = 0;
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+       /* old BSD implementations should use msg_accrights instead of
+        * msg_control; the interface is different. */
+       msg.msg_control = ccmsg;
+       msg.msg_controllen = sizeof(ccmsg);
+
+       while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR);
+       if (rv == -1) {
+               perror("recvmsg");
+               return -1;
+       }
+       if(!rv) {
+               /* EOF */
+               return -1;
+       }
+
+       cmsg = CMSG_FIRSTHDR(&msg);
+       if (!cmsg->cmsg_type == SCM_RIGHTS) {
+               fprintf(stderr, "got control message of unknown type %d\n",
+                       cmsg->cmsg_type);
+               return -1;
+       }
+       data = CMSG_DATA(cmsg);
+       return *(int*)data;
+}
+
+
+int tun_alloc(int iftype, char *if_name)
+{
+    int control_fd;
+    struct sockaddr_un addr;
+    int remotefd;
+
+    control_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (control_fd == -1) {
+        perror("Could not create UNIX socket\n");
+        exit(-1);
+    }
+
+    memset(&addr, 0, sizeof(struct sockaddr_un));
+    /* Clear structure */
+    addr.sun_family = AF_UNIX;
+    strncpy(addr.sun_path, VSYS_TUNTAP,
+            sizeof(addr.sun_path) - 1);
+
+    if (connect(control_fd, (struct sockaddr *) &addr,
+                sizeof(struct sockaddr_un)) == -1) {
+        perror("Could not connect to Vsys control socket");
+        exit(-1);
+    }
+
+    /* passing type param */
+    if (send(control_fd, &iftype, sizeof(iftype), 0) != sizeof(iftype)) {
+        perror("Could not send paramater to Vsys control socket");
+        exit(-1);
+    }
+
+    remotefd = receive_vif_fd(control_fd, if_name);
+    return remotefd;
+}
diff --git a/planetlab/pltap-ovs/tunalloc.h b/planetlab/pltap-ovs/tunalloc.h
new file mode 100644 (file)
index 0000000..3e5caae
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _TUNALLOC_H
+#define _TUNALLOC_H
+
+int tun_alloc(int iftype, char *if_name);
+
+#endif
diff --git a/planetlab/scripts/sliver-ovs b/planetlab/scripts/sliver-ovs
new file mode 100755 (executable)
index 0000000..87f173e
--- /dev/null
@@ -0,0 +1,304 @@
+#!/bin/bash
+# -*-shell-mode-*-
+
+### expected to be run as root
+
+COMMAND=$0
+
+#################### global vars
+RUN_DIR=/var/run/openvswitch
+DB_CONF_FILE=/etc/openvswitch/conf.db
+DB_SCHEMA=/usr/share/openvswitch/vswitch.ovsschema
+DB_PID_FILE=/var/run/openvswitch/db.pid
+DB_LOG=/var/log/ovs-db.log
+DB_CTL_PATTERN='ovsdb-server.*.ctl'
+##
+DB_SOCKET=/var/run/openvswitch/db.sock
+##
+SWITCH_PID_FILE=/var/run/openvswitch/switch.pid
+SWITCH_LOG=/var/log/ovs-switch.log
+SWITCH_SOCKET=/var/run/openvswitch/switch.sock
+
+#################### helper functions
+
+function kill_pltap_ovs () {
+    killall pltap-ovs 2>/dev/null || :
+}
+
+function error {
+    echo "$@" >&2
+    exit 1
+}
+
+function get_params {
+    params=$1; shift
+    err_msg="$COMMAND $SUBCOMMAND $(echo $params | perl -pe 's/\S+/<$&>/g')"
+    for p in $(echo $params); do
+        [[ -z "$@" ]] && error "$err_msg"
+        pname=$(echo -n $p|perl -pe 's/\W/_/g')
+        eval $pname="$1"; shift
+    done
+    [[ -n "$@" ]] && error "$err_msg"
+}
+
+function is_switch_running {
+    ovs-appctl --target=$SWITCH_SOCKET version >& /dev/null
+}
+
+function is_db_running {
+    ovs-appctl --target=$DB_CTRL_SOCKET version >& /dev/null
+}
+
+function tapname () {
+    IP=$1; shift
+    echo $(ip addr show to "$IP/32" | perl -ne '/^\s*\d+:\s*([\w-]+):/ && print $1')
+}
+    
+function wait_server () {
+    pid_file=$1; shift
+    server_name=$1; shift
+    timeout=$1; shift
+
+    expire=$(($(date +%s) + $timeout))
+
+    ## wait for it to be up - xxx todo - could use a timeout of some kind
+    while [ ! -f "$pid_file" ]; do
+       echo "Waiting for $server_name to start... $(($expire - $(date +%s)))s left" >&2
+       sleep 1;
+       [ $(date +%s) -ge $expire ] && return 1
+    done
+    cat "$pid_file"
+}
+
+function wait_device () {
+    tapname=$1; shift
+    timeout=$1; shift
+
+    expire=$(($(date +%s) + $timeout))
+
+    while ! ip link show up | egrep -q "^[0-9]+: +$tapname:"; do
+       echo "Waiting for $tapname to come UP...$(($expire - $(date +%s)))s left" >&2
+       sleep 1
+       [ $(date +%s) -ge $expire ] && return 1
+    done
+    return 0
+}
+
+######################################## startup
+function start_db () {
+    get_params "" "$@"
+
+    ## init conf
+    conf_dir=$(dirname $DB_CONF_FILE)
+    [ -d $conf_dir ] || mkdir -p $conf_dir
+    [ -f $DB_CONF_FILE ] || ovsdb-tool create $DB_CONF_FILE $DB_SCHEMA
+
+    ## init run
+    [ -d $RUN_DIR ] || mkdir -p $RUN_DIR
+
+    ## check 
+    [ -f $DB_CONF_FILE ] || { echo "Could not initialize $DB_CONF_FILE - exiting" ; exit 1 ; }
+    [ -d $RUN_DIR ] || { echo "Could not initialize $RUN_DIR - exiting" ; exit 1 ; }
+
+    ## run the stuff
+    if [ ! -f "$DB_PID_FILE" ]; then
+       ovsdb-server --remote=punix:$DB_SOCKET \
+           --remote=db:Open_vSwitch,manager_options \
+           --private-key=db:SSL,private_key \
+           --certificate=db:SSL,certificate \
+           --bootstrap-ca-cert=db:SSL,ca_cert \
+           --pidfile=$DB_PID_FILE \
+           --log-file=$DB_LOG \
+           --unixctl=$DB_CTRL_SOCKET \
+           --detach >& /dev/null
+    else
+       echo 'ovsdb-server appears to be running already, *not* starting'
+    fi
+    wait_server $DB_PID_FILE ovsdb-server 30
+    echo $DB_PID_FILE
+}
+
+function start_switch () {
+    get_params "" "$@"
+
+    # ensure ovsdb-server is running
+    is_db_running || { echo "ovsdb-server not running" >&2 ; exit 1 ; }
+
+    if [ ! -f "$SWITCH_PID_FILE" ] ; then
+       ovs-vswitchd \
+           --pidfile=$SWITCH_PID_FILE \
+           --log-file=$SWITCH_LOG \
+           --unixctl=$SWITCH_SOCKET \
+           --detach \
+           unix:$DB_SOCKET >& /dev/null
+    else
+       echo 'ovs-vswitchd appears to be running already, *not* starting'
+    fi
+    wait_server $SWITCH_PID_FILE ovs-vswitchd 30
+}
+
+# first dumb stab just read "pkill ovsdb-server" and "pkill ovs-vswitchd"
+# quick and dirty : we locate the control file through a search in /var/run
+# caller should be requested to remember and provide this pid instead
+function stop_db () { 
+    controlfile=$(ls $RUN_DIR/$DB_CTL_PATTERN)
+    [ -f $controlfile ] && ovs-appctl --target=$controlfile exit 
+}
+
+function stop_switch () { 
+    ovs-appctl --target=$SWITCH_SOCKET exit || :
+}
+
+function status () {
+    pids=$(pgrep '^ovs')
+    [ -n "$pids" ] && ps $pids
+}
+
+function start () {
+    start_db
+    start_switch
+}
+
+function stop () {
+    stop_switch
+    stop_db
+}
+
+#################### create functions
+function create_bridge () {
+    
+    get_params "IP/PREFIX" "$@"
+
+    IP=${IP_PREFIX%/*}
+    PREFIX=${IP_PREFIX#*/}
+
+    set -e
+    # ensure ovs-vswitchd is running
+    is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; }
+
+    # check whether the address is already assigned
+    TAPNAME=$(tapname $IP)
+    if [ ! -z "$TAPNAME" ]; then
+       if ovs-vsctl --db=unix:$DB_SOCKET br-exists "$TAPNAME"; then
+           echo $TAPNAME
+           exit 0
+       fi
+       kill_pltap_ovs
+       error "$IP already assigned to $TAPNAME"
+    fi
+
+    # we're clear
+    TAPNAME=$(pltap-ovs)
+    trap kill_pltap_ovs EXIT
+    # xxx wouldn't that be safer if left-aligned ?
+    vsysc vif_up << EOF
+       $TAPNAME
+       $IP
+       $PREFIX
+EOF
+    wait_device $TAPNAME 60 && \
+       ovs-vsctl --db=unix:$DB_SOCKET add-br $TAPNAME -- set bridge $TAPNAME datapath_type=planetlab
+    echo $TAPNAME
+    return 0
+}
+
+function create_port () {
+
+    get_params "bridge port" "$@"
+
+    # ensure ovs-vswitchd is running
+    is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; }
+
+    set -e
+    if ! ovs-vsctl --db=unix:$DB_SOCKET list-ports "$bridge" | grep -q "^$port\$"; then
+       ovs-vsctl --db=unix:$DB_SOCKET add-port "$bridge" "$port" -- set interface "$port" type=tunnel
+    fi
+    ovs-appctl --target=$SWITCH_SOCKET netdev-tunnel/get-port "$port"
+    return 0
+}
+
+function set_remote_endpoint () {
+
+    get_params "local_port remote_ip remote_UDP_port" "$@"
+
+    # ensure ovs-vswitchd is running
+    is_switch_running || { echo "ovs-vswitchd not running" >&2 ; exit 1 ; }
+
+    set -e
+    ovs-vsctl --db=unix:$DB_SOCKET set interface $local_port \
+        options:remote_ip=$remote_ip \
+       options:remote_port=$remote_UDP_port
+    return 0
+}
+
+#################### del functions
+function del_bridge () {
+    
+    get_params "bridge_name" "$@"
+
+    W=
+    if ! is_switch_running; then
+       # we can delete the bridge even if ovs-vswitchd is not running,
+       # but we need a running ovsdb-server
+       is_db_running || { echo "ovsdb-server not running" >&2; exit 1; }
+       W="--no-wait"
+    fi
+
+    if ovs-vsctl --db=unix:$DB_SOCKET br-exists "$bridge_name"; then
+       ovs-vsctl --db=unix:$DB_SOCKET $W del-br $bridge_name
+    fi
+    return 0
+}
+
+function del_port () {
+    
+    get_params "port" "$@"
+
+    W=
+    if ! is_switch_running; then
+       # we can delete the port even if ovs-vswitchd is not running,
+       # but we need a running ovsdb-server
+       is_db_running || { echo "ovsdb-server not running" >&2; exit 1; }
+       W="--no-wait"
+    fi
+
+    set -e
+    if ovs-vsctl --db=unix:$DB_SOCKET port-to-br "$port" >/dev/null 2>&1; then
+       ovs-vsctl --db=unix:$DB_SOCKET $W del-port "$port"
+    fi
+    return 0
+}
+
+function show () {
+
+    get_params "" "$@"
+
+    is_db_running || { echo "ovsdb-server not running" >&2; exit 1; }
+
+    ovs-vsctl --db=unix:$DB_SOCKET show
+}
+
+####################
+SUPPORTED_SUBCOMMANDS="start stop status 
+start_db stop_db start_switch stop_switch
+create_bridge create_port del_bridge del_port
+show set_remote_endpoint"
+
+function main () {
+       message="Usage: $COMMAND <subcommand> ...
+Supported subcommands are (dash or underscore is the same):
+$SUPPORTED_SUBCOMMANDS"
+       [[ -z "$@" ]] && error "$message"
+
+       SUBCOMMAND=$1; shift
+       # support dashes instead of underscores
+       SUBCOMMAND=$(echo $SUBCOMMAND | sed -e s,-,_,g)
+        found=""
+        for supported in $SUPPORTED_SUBCOMMANDS; do [ "$SUBCOMMAND" = "$supported" ] && found=yes; done
+
+       [ -z "$found" ] && error $message
+
+       $SUBCOMMAND "$@"
+}
+
+main "$@"
diff --git a/planetlab/vsysc/vsysc.c b/planetlab/vsysc/vsysc.c
new file mode 100644 (file)
index 0000000..98c90e7
--- /dev/null
@@ -0,0 +1,191 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/select.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define VSYS_PATH "/vsys"
+
+#define MAXPATH 1024
+#define BUFSIZE 4096
+
+#define IN 0
+#define OUT 1
+
+char ctrl[2][MAXPATH]; /* paths of vsys.in & vsys.out */
+
+static void mkpath(int dir, const char* vsys)
+{
+       static const char *suffix[] = { "in", "out" };
+       int n;
+
+       if ( (n = snprintf(ctrl[dir], MAXPATH, "%s/%s.%s", VSYS_PATH, vsys, suffix[dir])) < 0) {
+               perror(vsys);
+               exit(EXIT_FAILURE);
+       } else if (n >= MAXPATH) {
+               fprintf(stderr, "argument too long\n");
+               exit(EXIT_FAILURE);
+       }
+}
+
+static int open_ctrl(int dir)
+{
+       int fd;
+       
+       if ( (fd = open(ctrl[dir], (dir == IN ? O_WRONLY : O_RDONLY) | O_NONBLOCK)) < 0) {
+               perror(ctrl[dir]);
+               exit(EXIT_FAILURE);
+       }
+       return fd;
+}
+
+
+static void set_nonblocking(int fd)
+{
+       int val;
+
+       if ( (val = fcntl(fd, F_GETFL, 0)) < 0) {
+               perror("fcntl F_GETFL");
+               exit(EXIT_FAILURE);
+       }
+       if (fcntl(fd, F_SETFL, val | O_NONBLOCK) < 0) {
+               perror("fcntl F_SETFL");
+               exit(EXIT_FAILURE);
+       }
+}
+
+#if 0
+static void print_set(const char* name, int max, const fd_set* set)
+{
+       int i, n = 0;
+       fprintf(stderr, "%s: {", name);
+       for (i = 0; i < max; i++) {
+               if (FD_ISSET(i, set)) {
+                       if (n++) fprintf(stderr, ", ");
+                       fprintf(stderr, "%d", i);
+               }
+       }
+       fprintf(stderr, "}\n");
+}
+#endif
+
+struct channel {
+       const char *name;
+       int active;
+       int writing;
+       char buf[BUFSIZE];
+       char *rp, *wp;
+       int rfd, wfd;
+};
+
+static int active_channels = 0;
+
+static void channel_init(struct channel *c, const char* name, int rfd, int wfd)
+{
+       c->name = name;
+       c->rp = c->buf;
+       c->wp = c->buf;
+       c->rfd = rfd;
+       c->wfd = wfd;
+       c->active = 1;
+       active_channels++;
+}
+
+static void channel_fdset(struct channel *c, fd_set* readset, fd_set* writeset)
+{
+       if (!c->active)
+               return;
+       if (c->writing) {
+               FD_SET(c->wfd, writeset);
+       } else {
+               FD_SET(c->rfd, readset);
+       } 
+}
+
+static void channel_run(struct channel *c, const fd_set* readset, const fd_set* writeset)
+{
+       int n;
+
+       if (!c->active)
+               return;
+       if (c->writing) {
+               if (FD_ISSET(c->wfd, writeset)) {
+                       if ( (n = write(c->wfd, c->wp, c->rp - c->wp)) < 0) {
+                               perror(c->name);
+                               exit(EXIT_FAILURE);
+                       }
+                       c->wp += n;
+                       if (c->wp == c->rp) {
+                               c->wp = c->rp = c->buf;
+                               c->writing = 0;
+                       } 
+               }
+       } else {
+               if (FD_ISSET(c->rfd, readset)) {
+                       if ( (n = read(c->rfd, c->rp, BUFSIZE)) < 0) {
+                               perror(c->name);
+                               exit(EXIT_FAILURE);
+                       }
+                       if (n) {
+                               c->wp = c->rp;
+                               c->rp += n;
+                               c->writing = 1;
+                       } else {
+                               close(c->wfd);
+                               c->active = 0;
+                               active_channels--;
+                       }
+               }
+       }
+}
+
+static struct channel channels[2];
+
+
+int main(int argc, char *argv[])
+{
+       int fd[2]; /* fds of vsys.in & vsys.out */
+       int maxfd;
+
+       fd_set readset, writeset;
+
+       if (argc != 2) {
+               fprintf(stderr, "Usage: %s <vsys>\n", argv[0]);
+               exit(EXIT_FAILURE);
+       }
+
+       mkpath(IN,  argv[1]);
+       mkpath(OUT, argv[1]);
+
+       maxfd = (STDOUT_FILENO > STDIN_FILENO ? STDOUT_FILENO : STDIN_FILENO);
+
+       fd[OUT] = open_ctrl(OUT);
+       if (fd[OUT] > maxfd)
+               maxfd = fd[OUT];
+       fd[IN]  = open_ctrl(IN);
+       if (fd[IN] > maxfd)
+               maxfd = fd[IN];
+
+       set_nonblocking(STDIN_FILENO);
+       set_nonblocking(STDOUT_FILENO);
+
+       channel_init(&channels[IN], "IN", STDIN_FILENO, fd[IN]);
+       channel_init(&channels[OUT], "OUT", fd[OUT], STDOUT_FILENO);
+
+       while (active_channels) {
+               FD_ZERO(&readset);
+               FD_ZERO(&writeset);
+               channel_fdset(&channels[IN], &readset, &writeset);
+               channel_fdset(&channels[OUT], &readset, &writeset);
+               if (select(maxfd + 1, &readset, &writeset, NULL, NULL) < 0) {
+                       perror("select");
+                       exit(EXIT_FAILURE);
+               }
+               channel_run(&channels[IN], &readset, &writeset);
+               channel_run(&channels[OUT], &readset, &writeset);
+       }
+       return EXIT_SUCCESS;
+}
+
diff --git a/sliver-openvswitch.spec b/sliver-openvswitch.spec
new file mode 100644 (file)
index 0000000..d9241aa
--- /dev/null
@@ -0,0 +1,51 @@
+%define name sliver-openvswitch
+# to check for any change:
+# grep AC_INIT configure.ac 
+%define version 1.6.90
+%define taglevel 1
+
+%define debug_package %{nil}
+
+%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}
+
+Vendor: OneLab
+Packager: OneLab <support@planet-lab.eu>
+Distribution: PlanetLab %{plrelease}
+URL: %{SCMURL}
+#Requires: 
+
+Summary: Openvswitch modified for running from a PlanetLab sliver
+Name: %{name}
+Version: %{version}
+Release: %{release}
+License: GPL
+Group: System Environment/Applications
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
+Source0: sliver-openvswitch-%{version}.tar.gz
+
+%description
+Openvswitch tuned for running within a PlanetLab sliver
+
+%prep 
+%setup -q
+
+%build
+./boot.sh
+# let's be as close as the regular linux/fedora layout
+./configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var
+make
+
+%install
+make install DESTDIR=$RPM_BUILD_ROOT
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+/usr
+
+%post
+
+%postun
+
+%changelog