Merge branch 'master' into next
authorJustin Pettit <jpettit@nicira.com>
Fri, 5 Feb 2010 23:58:27 +0000 (15:58 -0800)
committerJustin Pettit <jpettit@nicira.com>
Sat, 6 Feb 2010 01:14:55 +0000 (17:14 -0800)
Conflicts:
COPYING
datapath/datapath.h
lib/automake.mk
lib/dpif-provider.h
lib/dpif.c
lib/hmap.h
lib/netdev-provider.h
lib/netdev.c
lib/stream-ssl.h
ofproto/executer.c
ofproto/ofproto.c
ofproto/ofproto.h
tests/automake.mk
utilities/ovs-ofctl.c
utilities/ovs-vsctl.in
vswitchd/ovs-vswitchd.conf.5.in
xenserver/etc_init.d_vswitch
xenserver/etc_xensource_scripts_vif
xenserver/opt_xensource_libexec_interface-reconfigure

57 files changed:
COPYING
ChangeLog
README
acinclude.m4
configure.ac
datapath/actions.c
datapath/datapath.c
datapath/datapath.h
datapath/dp_dev.c
include/openvswitch/datapath-protocol.h
lib/automake.mk
lib/dpif-linux.c
lib/dpif-netdev.c
lib/dpif-provider.h
lib/dpif.c
lib/dpif.h
lib/hmap.h
lib/netdev-provider.h
lib/netdev.c
lib/netdev.h
lib/sflow.h [new file with mode: 0644]
lib/sflow_agent.c [new file with mode: 0644]
lib/sflow_api.h [new file with mode: 0644]
lib/sflow_poller.c [new file with mode: 0644]
lib/sflow_receiver.c [new file with mode: 0644]
lib/sflow_sampler.c [new file with mode: 0644]
lib/stream-ssl.h
lib/vlog-modules.def
ofproto/automake.mk
ofproto/collectors.c
ofproto/collectors.h
ofproto/ofproto-sflow.c [new file with mode: 0644]
ofproto/ofproto-sflow.h [new file with mode: 0644]
ofproto/ofproto.c
ofproto/ofproto.h
tests/automake.mk
tests/library.at
tests/test-strtok_r.c [new file with mode: 0644]
utilities/automake.mk
utilities/ovs-discover.c
utilities/ovs-dpctl.c
utilities/ovs-ofctl.c
vswitchd/automake.mk
vswitchd/bridge.c
vswitchd/ovs-vswitchd.8.in
vswitchd/vswitch-idl.ann
vswitchd/vswitch.ovsschema
xenserver/automake.mk
xenserver/etc_init.d_vswitch
xenserver/etc_init.d_vswitch-xapi-update
xenserver/etc_xensource_scripts_vif
xenserver/opt_xensource_libexec_InterfaceReconfigure.py [new file with mode: 0644]
xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py [new file with mode: 0644]
xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py [new file with mode: 0644]
xenserver/opt_xensource_libexec_interface-reconfigure
xenserver/usr_sbin_xen-bugtool
xenserver/vswitch-xen.spec

diff --git a/COPYING b/COPYING
index 3f0659d..39dd45c 100644 (file)
--- a/COPYING
+++ b/COPYING
@@ -46,3 +46,7 @@ The files under ovsdb/simplejson are covered by the following license:
     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     DEALINGS IN THE SOFTWARE.
+
+Files lib/sflow*.[ch] are licensed under the terms of the InMon sFlow
+licence that is available at:
+        http://www.inmon.com/technology/sflowlicense.txt
index eff97aa..88653d3 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+v0.99.1 - 25 Jan 2010
+---------------------
+    - Add support for sFlow(R)
+    - Make headers compatible with C++
+    - Bug fixes
+
+v0.99.0 - 14 Jan 2010
+---------------------
+    - User-space forwarding engine
+    - Bug fixes
+
 v0.90.6 - 6 Oct 2009
 --------------------
     - Bug fixes
diff --git a/README b/README
index 7871c76..a0c9a2e 100644 (file)
--- a/README
+++ b/README
@@ -6,8 +6,8 @@ What is Open vSwitch?
 Open vSwitch is a multilayer software switch licensed under the open
 source Apache 2 license.  Our goal is to implement a production
 quality switch platform that supports standard management interfaces
-(e.g. NetFlow, RSPAN, ERSPAN, IOS-like CLI), and opens the forwarding
-functions to programmatic extension and control.
+(e.g. NetFlow, sFlow(R), RSPAN, ERSPAN, IOS-like CLI), and opens the
+forwarding functions to programmatic extension and control.
 
 Open vSwitch is well suited to function as a virtual switch in VM
 environments.  In addition to exposing standard control and visibility
@@ -20,7 +20,8 @@ The bulk of the code is written in platform-independent C and is
 easily ported to other environments.  The current release of Open
 vSwitch supports the following features:
 
-    * Visibility into inter-VM communication via NetFlow, SPAN, and RSPAN
+    * Visibility into inter-VM communication via NetFlow, sFlow, SPAN,
+      and RSPAN
     * Standard 802.1Q VLAN model with trunking
     * Per VM policing
     * NIC bonding with source-MAC load balancing
index d33a7c4..e38676f 100644 (file)
@@ -215,6 +215,7 @@ AC_DEFUN([OVS_CHECK_STRTOK_R],
                            char *token1, *token2;
                            token1 = strtok_r(string, ":", &save_ptr);
                            token2 = strtok_r(NULL, ":", &save_ptr);
+                           freopen ("/dev/null", "w", stdout);
                            printf ("%s %s\n", token1, token2);
                            return 0;
                           ]])],
@@ -259,4 +260,14 @@ dnl Example: OVS_ENABLE_OPTION([-Wdeclaration-after-statement])
 AC_DEFUN([OVS_ENABLE_OPTION], 
   [OVS_CHECK_CC_OPTION([$1], [WARNING_FLAGS="$WARNING_FLAGS $1"])
    AC_SUBST([WARNING_FLAGS])])
+
+dnl OVS_CONDITIONAL_CC_OPTION([OPTION], [CONDITIONAL])
+dnl Check whether the given C compiler OPTION is accepted.
+dnl If so, enable the given Automake CONDITIONAL.
+
+dnl Example: OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED])
+AC_DEFUN([OVS_CONDITIONAL_CC_OPTION],
+  [OVS_CHECK_CC_OPTION(
+    [$1], [ovs_have_cc_option=yes], [ovs_have_cc_option=no])
+   AM_CONDITIONAL([$2], [test $ovs_have_cc_option = yes])])
 dnl ----------------------------------------------------------------------
index 2f5c87f..6a8a1ea 100644 (file)
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 0.90.6, ovs-bugs@openvswitch.org)
+AC_INIT(openvswitch, 0.99.1, ovs-bugs@openvswitch.org)
 NX_BUILDNR
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
@@ -76,6 +76,7 @@ OVS_ENABLE_OPTION([-Wold-style-definition])
 OVS_ENABLE_OPTION([-Wmissing-prototypes])
 OVS_ENABLE_OPTION([-Wmissing-field-initializers])
 OVS_ENABLE_OPTION([-Wno-override-init])
+OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED])
 
 AC_ARG_VAR(KARCH, [Kernel Architecture String])
 AC_SUBST(KARCH)
index b059cc0..4fc0a4a 100644 (file)
@@ -400,6 +400,28 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
        return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg);
 }
 
+/* Send a copy of this packet up to the sFlow agent, along with extra
+ * information about what happened to it. */
+static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
+                        const union odp_action *a, int n_actions,
+                        gfp_t gfp, struct net_bridge_port *nbp)
+{
+       struct odp_sflow_sample_header *hdr;
+       unsigned int actlen = n_actions * sizeof(union odp_action);
+       unsigned int hdrlen = sizeof(struct odp_sflow_sample_header);
+       struct sk_buff *nskb;
+
+       nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp);
+       if (!nskb)
+               return;
+
+       memcpy(__skb_push(nskb, actlen), a, actlen);
+       hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen);
+       hdr->n_actions = n_actions;
+       hdr->sample_pool = atomic_read(&nbp->sflow_pool);
+       dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0);
+}
+
 /* Execute a list of actions against 'skb'. */
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
                    struct odp_flow_key *key,
@@ -412,6 +434,17 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
         * is slightly obscure just to avoid that. */
        int prev_port = -1;
        int err;
+
+       if (dp->sflow_probability) {
+               struct net_bridge_port *p = skb->dev->br_port;
+               if (p) {
+                       atomic_inc(&p->sflow_pool);
+                       if (dp->sflow_probability == UINT_MAX ||
+                           net_random() < dp->sflow_probability)
+                               sflow_sample(dp, skb, a, n_actions, gfp, p);
+               }
+       }
+
        for (; n_actions > 0; a++, n_actions--) {
                WARN_ON_ONCE(skb_shared(skb));
                if (prev_port != -1) {
index 116fd98..6a3b9ec 100644 (file)
@@ -349,6 +349,7 @@ static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
        p->port_no = port_no;
        p->dp = dp;
        p->dev = dev;
+       atomic_set(&p->sflow_pool, 0);
        if (!is_dp_dev(dev))
                rcu_assign_pointer(dev->br_port, p);
        else {
@@ -646,9 +647,7 @@ int vswitch_skb_checksum_setup(struct sk_buff *skb)
 out:
        return err;
 }
-#else
-int vswitch_skb_checksum_setup(struct sk_buff *skb) { return 0; }
-#endif /* CONFIG_XEN && linux == 2.6.18 */
+#endif /* CONFIG_XEN && HAVE_PROTO_DATA_VALID */
 
  /* Types of checksums that we can receive (these all refer to L4 checksums):
  * 1. CHECKSUM_NONE: Device that did not compute checksum, contains full
@@ -796,8 +795,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
        int err;
 
        WARN_ON_ONCE(skb_shared(skb));
-       BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR);
-
+       BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR && queue_no != _ODPL_SFLOW_NR);
        queue = &dp->queues[queue_no];
        err = -ENOBUFS;
        if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
@@ -1499,6 +1497,7 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
        int dp_idx = iminor(f->f_dentry->d_inode);
        struct datapath *dp;
        int drop_frags, listeners, port_no;
+       unsigned int sflow_probability;
        int err;
 
        /* Handle commands with special locking requirements up front. */
@@ -1562,6 +1561,16 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
                set_listen_mask(f, listeners);
                break;
 
+       case ODP_GET_SFLOW_PROBABILITY:
+               err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
+               break;
+
+       case ODP_SET_SFLOW_PROBABILITY:
+               err = get_user(sflow_probability, (unsigned int __user *)argp);
+               if (!err)
+                       dp->sflow_probability = sflow_probability;
+               break;
+
        case ODP_PORT_QUERY:
                err = query_port(dp, (struct odp_port __user *)argp);
                break;
index d6883db..6732b59 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -79,9 +79,22 @@ struct dp_bucket {
        struct sw_flow *flows[];
 };
 
-#define DP_N_QUEUES 2
+#define DP_N_QUEUES 3
 #define DP_MAX_QUEUE_LEN 100
 
+/**
+ * struct dp_stats_percpu - per-cpu packet processing statistics for a given
+ * datapath.
+ * @n_frags: Number of IP fragments processed by datapath.
+ * @n_hit: Number of received packets for which a matching flow was found in
+ * the flow table.
+ * @n_miss: Number of received packets that had no matching flow in the flow
+ * table.  The sum of @n_hit and @n_miss is the number of packets that have
+ * been received by the datapath.
+ * @n_lost: Number of received packets that had no matching flow in the flow
+ * table that could not be sent to userspace (normally due to an overflow in
+ * one of the datapath's queues).
+ */
 struct dp_stats_percpu {
        u64 n_frags;
        u64 n_hit;
@@ -95,10 +108,29 @@ struct dp_port_group {
        u16 ports[];
 };
 
+/**
+ * struct datapath - datapath for flow-based packet switching
+ * @mutex: Mutual exclusion for ioctls.
+ * @dp_idx: Datapath number (index into the dps[] array in datapath.c).
+ * @ifobj: Represents /sys/class/net/<devname>/brif.
+ * @drop_frags: Drop all IP fragments if nonzero.
+ * @queues: %DP_N_QUEUES sets of queued packets for userspace to handle.
+ * @waitqueue: Waitqueue, for waiting for new packets in @queues.
+ * @n_flows: Number of flows currently in flow table.
+ * @table: Current flow table (RCU protected).
+ * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected).
+ * @n_ports: Number of ports currently in @ports.
+ * @ports: Map from port number to &struct net_bridge_port.  %ODPP_LOCAL port
+ * always exists, other ports may be %NULL.
+ * @port_list: List of all ports in @ports in arbitrary order.
+ * @stats_percpu: Per-CPU datapath statistics.
+ * @sflow_probability: Number of packets out of UINT_MAX to sample to the
+ * %ODPL_SFLOW queue, e.g. (@sflow_probability/UINT_MAX) is the probability of
+ * sampling a given packet.
+ */
 struct datapath {
        struct mutex mutex;
        int dp_idx;
-
        struct kobject ifobj;
 
        int drop_frags;
@@ -117,19 +149,37 @@ struct datapath {
        /* Switch ports. */
        unsigned int n_ports;
        struct net_bridge_port *ports[DP_MAX_PORTS];
-       struct list_head port_list; /* All ports, including local_port. */
+       struct list_head port_list;
 
        /* Stats. */
        struct dp_stats_percpu *stats_percpu;
+
+       /* sFlow Sampling */
+       unsigned int sflow_probability;
 };
 
+/**
+ * struct net_bridge_port - one port within a datapath
+ * @port_no: Index into @dp's @ports array.
+ * @dp: Datapath to which this port belongs.
+ * @dev: The network device attached to this port.  The @br_port member in @dev
+ * points back to this &struct net_bridge_port.
+ * @kobj: Represents /sys/class/net/<devname>/brport.
+ * @linkname: The name of the link from /sys/class/net/<datapath>/brif to this
+ * &struct net_bridge_port.  (We keep this around so that we can delete it
+ * if @dev gets renamed.)  Set to the null string when no link exists.
+ * @node: Element in @dp's @port_list.
+ * @sflow_pool: Number of packets that were candidates for sFlow sampling,
+ * regardless of whether they were actually chosen and sent down to userspace.
+ */
 struct net_bridge_port {
        u16 port_no;
        struct datapath *dp;
        struct net_device *dev;
        struct kobject kobj;
        char linkname[IFNAMSIZ];
-       struct list_head node;   /* Element in datapath.ports. */
+       struct list_head node;
+       atomic_t sflow_pool;
 };
 
 extern struct notifier_block dp_device_notifier;
@@ -160,16 +210,15 @@ static inline const char *dp_name(const struct datapath *dp)
        return dp->ports[ODPP_LOCAL]->dev->name;
 }
 
-#ifdef CONFIG_XEN
-int skb_checksum_setup(struct sk_buff *skb);
+#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
+int vswitch_skb_checksum_setup(struct sk_buff *skb);
 #else
-static inline int skb_checksum_setup(struct sk_buff *skb)
+static inline int vswitch_skb_checksum_setup(struct sk_buff *skb)
 {
        return 0;
 }
 #endif
 
-int vswitch_skb_checksum_setup(struct sk_buff *skb);
 void forward_ip_summed(struct sk_buff *skb);
 
 #endif /* datapath.h */
index 284a6b5..5b434c1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/preempt.h>
 #include <linux/rcupdate.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
@@ -62,9 +63,13 @@ int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb)
        else
                netif_rx_ni(skb);
        netdev->last_rx = jiffies;
+
+       preempt_disable();
        lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id());
        lb_stats->rx_packets++;
        lb_stats->rx_bytes += len;
+       preempt_enable();
+
        return len;
 }
 
index ab7eb9e..b079f52 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
  *
  * This file is offered under your choice of two licenses: Apache 2.0 or GNU
  * GPL 2.0 or later.  The permission statements for each of these licenses is
@@ -77,6 +77,9 @@
 
 #define ODP_EXECUTE             _IOR('O', 18, struct odp_execute)
 
+#define ODP_SET_SFLOW_PROBABILITY _IOR('O', 19, int)
+#define ODP_GET_SFLOW_PROBABILITY _IOW('O', 20, int)
+
 struct odp_stats {
     /* Flows. */
     __u32 n_flows;              /* Number of flows in flow table. */
@@ -98,6 +101,7 @@ struct odp_stats {
     /* Queues. */
     __u16 max_miss_queue;       /* Max length of ODPL_MISS queue. */
     __u16 max_action_queue;     /* Max length of ODPL_ACTION queue. */
+    __u16 max_sflow_queue;      /* Max length of ODPL_SFLOW queue. */
 };
 
 /* Logical ports. */
@@ -109,16 +113,51 @@ struct odp_stats {
 #define ODPL_MISS       (1 << _ODPL_MISS_NR)
 #define _ODPL_ACTION_NR 1       /* Packet output to ODPP_CONTROLLER. */
 #define ODPL_ACTION     (1 << _ODPL_ACTION_NR)
-#define ODPL_ALL        (ODPL_MISS | ODPL_ACTION)
-
-/* Format of messages read from datapath fd. */
+#define _ODPL_SFLOW_NR  2       /* sFlow samples. */
+#define ODPL_SFLOW      (1 << _ODPL_SFLOW_NR)
+#define ODPL_ALL        (ODPL_MISS | ODPL_ACTION | ODPL_SFLOW)
+
+/**
+ * struct odp_msg - format of messages read from datapath fd.
+ * @type: One of the %_ODPL_* constants.
+ * @length: Total length of message, including this header.
+ * @port: Port that received the packet embedded in this message.
+ * @reserved: Not currently used.  Should be set to 0.
+ * @arg: Argument value whose meaning depends on @type.
+ *
+ * For @type == %_ODPL_MISS_NR, the header is followed by packet data.  The
+ * @arg member is unused and set to 0.
+ *
+ * For @type == %_ODPL_ACTION_NR, the header is followed by packet data.  The
+ * @arg member is copied from the &struct odp_action_controller that caused
+ * the &struct odp_msg to be composed.
+ *
+ * For @type == %_ODPL_SFLOW_NR, the header is followed by &struct
+ * odp_sflow_sample_header, then by an array of &union odp_action (the number
+ * of which is specified in &struct odp_sflow_sample_header), then by packet
+ * data.
+ */
 struct odp_msg {
-    __u32 type;                 /* _ODPL_MISS_NR or _ODPL_ACTION_NR. */
-    __u32 length;               /* Message length, including header. */
-    __u16 port;                 /* Port on which frame was received. */
+    __u32 type;
+    __u32 length;
+    __u16 port;
     __u16 reserved;
-    __u32 arg;                  /* Argument value specified in action. */
-    /* Followed by packet data. */
+    __u32 arg;
+};
+
+/**
+ * struct odp_sflow_sample_header - header added to sFlow sampled packet.
+ * @sample_pool: Number of packets that were candidates for sFlow sampling,
+ * regardless of whether they were actually chosen and sent down to userspace.
+ * @n_actions: Number of "union odp_action"s immediately following this header.
+ *
+ * This header follows &struct odp_msg when that structure's @type is
+ * %_ODPL_SFLOW_NR, and it is itself followed by an array of &union odp_action
+ * (the number of which is specified in @n_actions) and then by packet data.
+ */
+struct odp_sflow_sample_header {
+    __u32 sample_pool;
+    __u32 n_actions;
 };
 
 #define ODP_PORT_INTERNAL (1 << 0) /* This port is simulated. */
index 31cab8a..51d3c11 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 Nicira Networks, Inc.
+# Copyright (C) 2009, 2010 Nicira Networks, Inc.
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
@@ -125,6 +125,7 @@ lib_libopenvswitch_a_SOURCES = \
        lib/stream-unix.c \
        lib/stream.c \
        lib/stream.h \
+       lib/string.h \
        lib/svec.c \
        lib/svec.h \
        lib/tag.c \
@@ -154,6 +155,19 @@ nodist_lib_libopenvswitch_a_SOURCES = \
        lib/dirs.c
 CLEANFILES += $(nodist_lib_libopenvswitch_a_SOURCES)
 
+noinst_LIBRARIES += lib/libsflow.a
+lib_libsflow_a_SOURCES = \
+       lib/sflow_api.h \
+       lib/sflow.h \
+       lib/sflow_agent.c \
+       lib/sflow_sampler.c \
+       lib/sflow_poller.c \
+       lib/sflow_receiver.c
+lib_libsflow_a_CFLAGS = $(AM_CFLAGS)
+if HAVE_WNO_UNUSED
+lib_libsflow_a_CFLAGS += -Wno-unused
+endif
+
 if HAVE_NETLINK
 lib_libopenvswitch_a_SOURCES += \
        lib/netlink-protocol.h \
index 94c3cf5..d707b46 100644 (file)
@@ -196,6 +196,7 @@ dpif_linux_delete(struct dpif *dpif_)
 static int
 dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
 {
+    memset(stats, 0, sizeof *stats);
     return do_ioctl(dpif_, ODP_DP_STATS, stats);
 }
 
@@ -395,6 +396,19 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
     return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask);
 }
 
+static int
+dpif_linux_get_sflow_probability(const struct dpif *dpif_,
+                                 uint32_t *probability)
+{
+    return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability);
+}
+
+static int
+dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
+{
+    return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability);
+}
+
 static int
 dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
 {
@@ -474,6 +488,8 @@ const struct dpif_class dpif_linux_class = {
     dpif_linux_execute,
     dpif_linux_recv_get_mask,
     dpif_linux_recv_set_mask,
+    dpif_linux_get_sflow_probability,
+    dpif_linux_set_sflow_probability,
     dpif_linux_recv,
     dpif_linux_recv_wait,
 };
@@ -554,13 +570,14 @@ make_openvswitch_device(int minor, char **fnp)
     struct stat s;
     char fn[128];
 
+    *fnp = NULL;
+
     major = get_openvswitch_major();
     if (major < 0) {
         return -major;
     }
     dev = makedev(major, minor);
 
-    *fnp = NULL;
     sprintf(fn, "%s/dp%d", dirname, minor);
     if (!stat(fn, &s)) {
         if (!S_ISCHR(s.st_mode)) {
index e9fb216..8abf1f9 100644 (file)
@@ -1331,6 +1331,8 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_execute,
     dpif_netdev_recv_get_mask,
     dpif_netdev_recv_set_mask,
+    NULL,                       /* get_sflow_probability */
+    NULL,                       /* set_sflow_probability */
     dpif_netdev_recv,
     dpif_netdev_recv_wait,
 };
index 699f724..fddc8ea 100644 (file)
 #include <assert.h>
 #include "dpif.h"
 
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
 /* Open vSwitch datapath interface.
  *
  * This structure should be treated as opaque by dpif implementations. */
 struct dpif {
-    const struct dpif_class *class;
+    const struct dpif_class *dpif_class;
     char *base_name;
     char *full_name;
     uint8_t netflow_engine_type;
@@ -39,9 +43,9 @@ void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
 void dpif_uninit(struct dpif *dpif, bool close);
 
 static inline void dpif_assert_class(const struct dpif *dpif,
-                                     const struct dpif_class *class)
+                                     const struct dpif_class *dpif_class)
 {
-    assert(dpif->class == class);
+    assert(dpif->dpif_class == dpif_class);
 }
 
 /* Datapath interface class structure, to be defined by each implementation of
@@ -114,7 +118,7 @@ struct dpif_class {
      *
      * If successful, 'dpif' will not be used again except as an argument for
      * the 'close' member function. */
-    int (*delete)(struct dpif *dpif);
+    int (*destroy)(struct dpif *dpif);
 
     /* Retrieves statistics for 'dpif' into 'stats'. */
     int (*get_stats)(const struct dpif *dpif, struct odp_stats *stats);
@@ -275,6 +279,25 @@ struct dpif_class {
      * corresponding type when it calls the recv member function. */
     int (*recv_set_mask)(struct dpif *dpif, int listen_mask);
 
+    /* Retrieves 'dpif''s sFlow sampling probability into '*probability'.
+     * Return value is 0 or a positive errno value.  EOPNOTSUPP indicates that
+     * the datapath does not support sFlow, as does a null pointer.
+     *
+     * '*probability' is expressed as the number of packets out of UINT_MAX to
+     * sample, e.g. probability/UINT_MAX is the probability of sampling a given
+     * packet. */
+    int (*get_sflow_probability)(const struct dpif *dpif,
+                                 uint32_t *probability);
+
+    /* Sets 'dpif''s sFlow sampling probability to 'probability'.  Return value
+     * is 0 or a positive errno value.  EOPNOTSUPP indicates that the datapath
+     * does not support sFlow, as does a null pointer.
+     *
+     * 'probability' is expressed as the number of packets out of UINT_MAX to
+     * sample, e.g. probability/UINT_MAX is the probability of sampling a given
+     * packet. */
+    int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability);
+
     /* Attempts to receive a message from 'dpif'.  If successful, stores the
      * message into '*packetp'.  The message, if one is received, must begin
      * with 'struct odp_msg' as a header.  Only messages of the types selected
@@ -292,4 +315,8 @@ struct dpif_class {
 extern const struct dpif_class dpif_linux_class;
 extern const struct dpif_class dpif_netdev_class;
 
+#ifdef  __cplusplus
+}
+#endif
+
 #endif /* dpif-provider.h */
index f3d6713..315f11f 100644 (file)
@@ -319,7 +319,8 @@ dpif_close(struct dpif *dpif)
     if (dpif) {
         struct registered_dpif_class *registered_class;
 
-        registered_class = shash_find_data(&dpif_classes, dpif->class->type);
+        registered_class = shash_find_data(&dpif_classes, 
+                dpif->dpif_class->type);
         assert(registered_class);
         assert(registered_class->refcount);
 
@@ -356,8 +357,8 @@ dpif_base_name(const struct dpif *dpif)
 int
 dpif_get_all_names(const struct dpif *dpif, struct svec *all_names)
 {
-    if (dpif->class->get_all_names) {
-        int error = dpif->class->get_all_names(dpif, all_names);
+    if (dpif->dpif_class->get_all_names) {
+        int error = dpif->dpif_class->get_all_names(dpif, all_names);
         if (error) {
             VLOG_WARN_RL(&error_rl,
                          "failed to retrieve names for datpath %s: %s",
@@ -380,7 +381,7 @@ dpif_delete(struct dpif *dpif)
 
     COVERAGE_INC(dpif_destroy);
 
-    error = dpif->class->delete(dpif);
+    error = dpif->dpif_class->destroy(dpif);
     log_operation(dpif, "delete", error);
     return error;
 }
@@ -390,7 +391,7 @@ dpif_delete(struct dpif *dpif)
 int
 dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
 {
-    int error = dpif->class->get_stats(dpif, stats);
+    int error = dpif->dpif_class->get_stats(dpif, stats);
     if (error) {
         memset(stats, 0, sizeof *stats);
     }
@@ -406,7 +407,7 @@ dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
 int
 dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
 {
-    int error = dpif->class->get_drop_frags(dpif, drop_frags);
+    int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
     if (error) {
         *drop_frags = false;
     }
@@ -420,7 +421,7 @@ dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
 int
 dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
 {
-    int error = dpif->class->set_drop_frags(dpif, drop_frags);
+    int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
     log_operation(dpif, "set_drop_frags", error);
     return error;
 }
@@ -439,7 +440,7 @@ dpif_port_add(struct dpif *dpif, const char *devname, uint16_t flags,
 
     COVERAGE_INC(dpif_port_add);
 
-    error = dpif->class->port_add(dpif, devname, flags, &port_no);
+    error = dpif->dpif_class->port_add(dpif, devname, flags, &port_no);
     if (!error) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16,
                     dpif_name(dpif), devname, port_no);
@@ -463,7 +464,7 @@ dpif_port_del(struct dpif *dpif, uint16_t port_no)
 
     COVERAGE_INC(dpif_port_del);
 
-    error = dpif->class->port_del(dpif, port_no);
+    error = dpif->dpif_class->port_del(dpif, port_no);
     log_operation(dpif, "port_del", error);
     return error;
 }
@@ -475,7 +476,7 @@ int
 dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
                           struct odp_port *port)
 {
-    int error = dpif->class->port_query_by_number(dpif, port_no, port);
+    int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
     if (!error) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu16" is device %s",
                     dpif_name(dpif), port_no, port->devname);
@@ -494,7 +495,7 @@ int
 dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
                         struct odp_port *port)
 {
-    int error = dpif->class->port_query_by_name(dpif, devname, port);
+    int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
     if (!error) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu16,
                     dpif_name(dpif), devname, port->port);
@@ -559,7 +560,7 @@ dpif_port_list(const struct dpif *dpif,
         }
 
         ports = xcalloc(stats.n_ports, sizeof *ports);
-        retval = dpif->class->port_list(dpif, ports, stats.n_ports);
+        retval = dpif->dpif_class->port_list(dpif, ports, stats.n_ports);
         if (retval < 0) {
             /* Hard error. */
             error = -retval;
@@ -607,7 +608,7 @@ exit:
 int
 dpif_port_poll(const struct dpif *dpif, char **devnamep)
 {
-    int error = dpif->class->port_poll(dpif, devnamep);
+    int error = dpif->dpif_class->port_poll(dpif, devnamep);
     if (error) {
         *devnamep = NULL;
     }
@@ -619,7 +620,7 @@ dpif_port_poll(const struct dpif *dpif, char **devnamep)
 void
 dpif_port_poll_wait(const struct dpif *dpif)
 {
-    dpif->class->port_poll_wait(dpif);
+    dpif->dpif_class->port_poll_wait(dpif);
 }
 
 /* Retrieves a list of the port numbers in port group 'group' in 'dpif'.
@@ -640,8 +641,8 @@ dpif_port_group_get(const struct dpif *dpif, uint16_t group,
     *ports = NULL;
     *n_ports = 0;
     for (;;) {
-        int retval = dpif->class->port_group_get(dpif, group,
-                                                 *ports, *n_ports);
+        int retval = dpif->dpif_class->port_group_get(dpif, group,
+                                                      *ports, *n_ports);
         if (retval < 0) {
             /* Hard error. */
             error = -retval;
@@ -679,7 +680,7 @@ dpif_port_group_set(struct dpif *dpif, uint16_t group,
 
     COVERAGE_INC(dpif_port_group_set);
 
-    error = dpif->class->port_group_set(dpif, group, ports, n_ports);
+    error = dpif->dpif_class->port_group_set(dpif, group, ports, n_ports);
     log_operation(dpif, "port_group_set", error);
     return error;
 }
@@ -693,7 +694,7 @@ dpif_flow_flush(struct dpif *dpif)
 
     COVERAGE_INC(dpif_flow_flush);
 
-    error = dpif->class->flow_flush(dpif);
+    error = dpif->dpif_class->flow_flush(dpif);
     log_operation(dpif, "flow_flush", error);
     return error;
 }
@@ -719,7 +720,7 @@ dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
     COVERAGE_INC(dpif_flow_get);
 
     check_rw_odp_flow(flow);
-    error = dpif->class->flow_get(dpif, flow, 1);
+    error = dpif->dpif_class->flow_get(dpif, flow, 1);
     if (!error) {
         error = flow->stats.error;
     }
@@ -769,7 +770,7 @@ dpif_flow_get_multiple(const struct dpif *dpif,
         check_rw_odp_flow(&flows[i]);
     }
 
-    error = dpif->class->flow_get(dpif, flows, n);
+    error = dpif->dpif_class->flow_get(dpif, flows, n);
     log_operation(dpif, "flow_get_multiple", error);
     return error;
 }
@@ -797,7 +798,7 @@ dpif_flow_put(struct dpif *dpif, struct odp_flow_put *put)
 
     COVERAGE_INC(dpif_flow_put);
 
-    error = dpif->class->flow_put(dpif, put);
+    error = dpif->dpif_class->flow_put(dpif, put);
     if (should_log_flow_message(error)) {
         log_flow_put(dpif, error, put);
     }
@@ -819,7 +820,7 @@ dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
     check_rw_odp_flow(flow);
     memset(&flow->stats, 0, sizeof flow->stats);
 
-    error = dpif->class->flow_del(dpif, flow);
+    error = dpif->dpif_class->flow_del(dpif, flow);
     if (should_log_flow_message(error)) {
         log_flow_operation(dpif, "delete flow", error, flow);
     }
@@ -848,7 +849,7 @@ dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
             flows[i].n_actions = 0;
         }
     }
-    retval = dpif->class->flow_list(dpif, flows, n);
+    retval = dpif->dpif_class->flow_list(dpif, flows, n);
     if (retval < 0) {
         *n_out = 0;
         VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)",
@@ -925,7 +926,8 @@ dpif_execute(struct dpif *dpif, uint16_t in_port,
 
     COVERAGE_INC(dpif_execute);
     if (n_actions > 0) {
-        error = dpif->class->execute(dpif, in_port, actions, n_actions, buf);
+        error = dpif->dpif_class->execute(dpif, in_port, actions,
+                                          n_actions, buf);
     } else {
         error = 0;
     }
@@ -952,7 +954,7 @@ dpif_execute(struct dpif *dpif, uint16_t in_port,
 int
 dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask)
 {
-    int error = dpif->class->recv_get_mask(dpif, listen_mask);
+    int error = dpif->dpif_class->recv_get_mask(dpif, listen_mask);
     if (error) {
         *listen_mask = 0;
     }
@@ -966,11 +968,46 @@ dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask)
 int
 dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
 {
-    int error = dpif->class->recv_set_mask(dpif, listen_mask);
+    int error = dpif->dpif_class->recv_set_mask(dpif, listen_mask);
     log_operation(dpif, "recv_set_mask", error);
     return error;
 }
 
+/* Retrieve the sFlow sampling probability.  '*probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
+int
+dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability)
+{
+    int error = (dpif->dpif_class->get_sflow_probability
+                 ? dpif->dpif_class->get_sflow_probability(dpif, probability)
+                 : EOPNOTSUPP);
+    if (error) {
+        *probability = 0;
+    }
+    log_operation(dpif, "get_sflow_probability", error);
+    return error;
+}
+
+/* Set the sFlow sampling probability.  'probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
+ * indicates that 'dpif' does not support sFlow sampling. */
+int
+dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
+{
+    int error = (dpif->dpif_class->set_sflow_probability
+                 ? dpif->dpif_class->set_sflow_probability(dpif, probability)
+                 : EOPNOTSUPP);
+    log_operation(dpif, "set_sflow_probability", error);
+    return error;
+}
+
 /* Attempts to receive a message from 'dpif'.  If successful, stores the
  * message into '*packetp'.  The message, if one is received, will begin with
  * 'struct odp_msg' as a header.  Only messages of the types selected with
@@ -982,7 +1019,7 @@ dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
 int
 dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
 {
-    int error = dpif->class->recv(dpif, packetp);
+    int error = dpif->dpif_class->recv(dpif, packetp);
     if (!error) {
         if (VLOG_IS_DBG_ENABLED()) {
             struct ofpbuf *buf = *packetp;
@@ -994,6 +1031,7 @@ dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
                         "%zu on port %"PRIu16": %s", dpif_name(dpif),
                         (msg->type == _ODPL_MISS_NR ? "miss"
                          : msg->type == _ODPL_ACTION_NR ? "action"
+                         : msg->type == _ODPL_SFLOW_NR ? "sFlow"
                          : "<unknown>"),
                         payload_len, msg->port, s);
             free(s);
@@ -1020,7 +1058,7 @@ dpif_recv_purge(struct dpif *dpif)
         return error;
     }
 
-    for (i = 0; i < stats.max_miss_queue + stats.max_action_queue; i++) {
+    for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) {
         struct ofpbuf *buf;
         error = dpif_recv(dpif, &buf);
         if (error) {
@@ -1036,7 +1074,7 @@ dpif_recv_purge(struct dpif *dpif)
 void
 dpif_recv_wait(struct dpif *dpif)
 {
-    dpif->class->recv_wait(dpif);
+    dpif->dpif_class->recv_wait(dpif);
 }
 
 /* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
@@ -1050,12 +1088,13 @@ dpif_get_netflow_ids(const struct dpif *dpif,
 }
 \f
 void
-dpif_init(struct dpif *dpif, const struct dpif_class *class, const char *name,
+dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
+          const char *name,
           uint8_t netflow_engine_type, uint8_t netflow_engine_id)
 {
-    dpif->class = class;
+    dpif->dpif_class = dpif_class;
     dpif->base_name = xstrdup(name);
-    dpif->full_name = xasprintf("%s@%s", class->type, name);
+    dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
     dpif->netflow_engine_type = netflow_engine_type;
     dpif->netflow_engine_id = netflow_engine_id;
 }
@@ -1073,7 +1112,7 @@ dpif_uninit(struct dpif *dpif, bool close)
     char *full_name = dpif->full_name;
 
     if (close) {
-        dpif->class->close(dpif);
+        dpif->dpif_class->close(dpif);
     }
 
     free(base_name);
index dae0ef8..b171793 100644 (file)
@@ -88,6 +88,8 @@ int dpif_execute(struct dpif *, uint16_t in_port,
 
 int dpif_recv_get_mask(const struct dpif *, int *listen_mask);
 int dpif_recv_set_mask(struct dpif *, int listen_mask);
+int dpif_get_sflow_probability(const struct dpif *, uint32_t *probability);
+int dpif_set_sflow_probability(struct dpif *, uint32_t probability);
 int dpif_recv(struct dpif *, struct ofpbuf **);
 int dpif_recv_purge(struct dpif *);
 void dpif_recv_wait(struct dpif *);
index abf380b..2f4a302 100644 (file)
 #include <stdlib.h>
 #include "util.h"
 
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
 /* A hash map node, to be embedded inside the data structure being mapped. */
 struct hmap_node {
     size_t hash;                /* Hash value. */
@@ -66,7 +70,7 @@ struct hmap {
 void hmap_init(struct hmap *);
 void hmap_destroy(struct hmap *);
 void hmap_swap(struct hmap *a, struct hmap *b);
-void hmap_moved(struct hmap *);
+void hmap_moved(struct hmap *hmap);
 static inline size_t hmap_count(const struct hmap *);
 static inline bool hmap_is_empty(const struct hmap *);
 
@@ -80,6 +84,7 @@ static inline void hmap_insert_fast(struct hmap *,
                                     struct hmap_node *, size_t hash);
 static inline void hmap_insert(struct hmap *, struct hmap_node *, size_t hash);
 static inline void hmap_remove(struct hmap *, struct hmap_node *);
+
 void hmap_node_moved(struct hmap *, struct hmap_node *, struct hmap_node *);
 static inline void hmap_replace(struct hmap *, const struct hmap_node *old,
                                 struct hmap_node *new);
@@ -207,24 +212,24 @@ hmap_remove(struct hmap *hmap, struct hmap_node *node)
     hmap->n--;
 }
 
-/* Puts 'new' in the position in 'hmap' currently occupied by 'old'.  The 'new'
- * node must hash to the same value as 'old'.  The client is responsible for
- * ensuring that the replacement does not violate any client-imposed
- * invariants (e.g. uniqueness of keys within a map).
+/* Puts 'new_node' in the position in 'hmap' currently occupied by 'old_node'.
+ * The 'new_node' must hash to the same value as 'old_node'.  The client is
+ * responsible for ensuring that the replacement does not violate any
+ * client-imposed invariants (e.g. uniqueness of keys within a map).
  *
- * Afterward, 'old' is not part of 'hmap', and the client is responsible for
- * freeing it (if this is desirable). */
+ * Afterward, 'old_node' is not part of 'hmap', and the client is responsible
+ * for freeing it (if this is desirable). */
 static inline void
 hmap_replace(struct hmap *hmap,
-             const struct hmap_node *old, struct hmap_node *new)
+             const struct hmap_node *old_node, struct hmap_node *new_node)
 {
-    struct hmap_node **bucket = &hmap->buckets[old->hash & hmap->mask];
-    while (*bucket != old) {
+    struct hmap_node **bucket = &hmap->buckets[old_node->hash & hmap->mask];
+    while (*bucket != old_node) {
         bucket = &(*bucket)->next;
     }
-    *bucket = new;
-    new->hash = old->hash;
-    new->next = old->next;
+    *bucket = new_node;
+    new_node->hash = old_node->hash;
+    new_node->next = old_node->next;
 }
 
 static inline struct hmap_node *
@@ -316,4 +321,8 @@ hmap_next(const struct hmap *hmap, const struct hmap_node *node)
             : hmap_next__(hmap, (node->hash & hmap->mask) + 1));
 }
 
+#ifdef  __cplusplus
+}
+#endif
+
 #endif /* hmap.h */
index 43a330c..1eb1b1e 100644 (file)
 #include "list.h"
 #include "shash.h"
 
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
 struct arg {
     char *key;
     char *value;
@@ -36,7 +40,8 @@ struct arg {
  * implementations. */
 struct netdev_dev {
     char *name;                         /* Name of network device. */
-    const struct netdev_class *class;   /* Functions to control this device. */
+    const struct netdev_class *netdev_class; /* Functions to control 
+                                                this device. */
     int ref_cnt;                        /* Times this devices was opened. */
     struct shash_node *node;            /* Pointer to element in global map. */
     struct arg *args;                   /* Argument list from last config. */
@@ -53,9 +58,9 @@ void netdev_dev_get_devices(const struct netdev_class *,
                             struct shash *device_list);
 
 static inline void netdev_dev_assert_class(const struct netdev_dev *netdev_dev,
-                                           const struct netdev_class *class)
+                                           const struct netdev_class *class_)
 {
-    assert(netdev_dev->class == class);
+    assert(netdev_dev->netdev_class == class_);
 }
 
 /* A instance of an open network device.
@@ -75,9 +80,9 @@ void netdev_uninit(struct netdev *, bool close);
 struct netdev_dev *netdev_get_dev(const struct netdev *);
 
 static inline void netdev_assert_class(const struct netdev *netdev,
-                                       const struct netdev_class *class)
+                                       const struct netdev_class *netdev_class)
 {
-    netdev_dev_assert_class(netdev_get_dev(netdev), class);
+    netdev_dev_assert_class(netdev_get_dev(netdev), netdev_class);
 }
 
 /* A network device notifier.
@@ -358,4 +363,8 @@ extern const struct netdev_class netdev_linux_class;
 extern const struct netdev_class netdev_tap_class;
 extern const struct netdev_class netdev_gre_class;
 
+#ifdef  __cplusplus
+}
+#endif
+
 #endif /* netdev.h */
index 88ba017..ddd6e92 100644 (file)
@@ -32,6 +32,7 @@
 #include "list.h"
 #include "netdev-provider.h"
 #include "ofpbuf.h"
+#include "openflow/openflow.h"
 #include "packets.h"
 #include "poll-loop.h"
 #include "shash.h"
@@ -158,7 +159,7 @@ netdev_unregister_provider(const char *type)
 
     SHASH_FOR_EACH(netdev_dev_node, &netdev_dev_shash) {
         struct netdev_dev *netdev_dev = netdev_dev_node->data;
-        if (!strcmp(netdev_dev->class->type, type)) {
+        if (!strcmp(netdev_dev->netdev_class->type, type)) {
             VLOG_WARN("attempted to unregister in use netdev provider: %s",
                       type);
             return EBUSY;
@@ -340,7 +341,8 @@ netdev_open(struct netdev_options *options, struct netdev **netdevp)
         return EEXIST;
     }
 
-    error = netdev_dev->class->open(netdev_dev, options->ethertype, netdevp);
+    error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype, 
+                netdevp);
 
     if (!error) {
         netdev_dev->ref_cnt++;
@@ -380,10 +382,10 @@ netdev_reconfigure(struct netdev *netdev, const struct shash *args)
         args = &empty_args;
     }
 
-    if (netdev_dev->class->reconfigure) {
+    if (netdev_dev->netdev_class->reconfigure) {
         if (!compare_device_args(netdev_dev, args)) {
             update_device_args(netdev_dev, args);
-            return netdev_dev->class->reconfigure(netdev_dev, args);
+            return netdev_dev->netdev_class->reconfigure(netdev_dev, args);
         }
     } else if (!shash_is_empty(args)) {
         VLOG_WARN("%s: arguments provided to device that does not have a "
@@ -479,7 +481,7 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
     assert(buffer->size == 0);
     assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
 
-    retval = netdev_get_dev(netdev)->class->recv(netdev, buffer->data,
+    retval = netdev_get_dev(netdev)->netdev_class->recv(netdev, buffer->data,
              ofpbuf_tailroom(buffer));
     if (retval >= 0) {
         COVERAGE_INC(netdev_received);
@@ -498,14 +500,14 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
 void
 netdev_recv_wait(struct netdev *netdev)
 {
-    netdev_get_dev(netdev)->class->recv_wait(netdev);
+    netdev_get_dev(netdev)->netdev_class->recv_wait(netdev);
 }
 
 /* Discards all packets waiting to be received from 'netdev'. */
 int
 netdev_drain(struct netdev *netdev)
 {
-    return netdev_get_dev(netdev)->class->drain(netdev);
+    return netdev_get_dev(netdev)->netdev_class->drain(netdev);
 }
 
 /* Sends 'buffer' on 'netdev'.  Returns 0 if successful, otherwise a positive
@@ -520,8 +522,8 @@ netdev_drain(struct netdev *netdev)
 int
 netdev_send(struct netdev *netdev, const struct ofpbuf *buffer)
 {
-    int error = netdev_get_dev(netdev)->class->send(netdev, buffer->data,
-                                                    buffer->size);
+    int error = netdev_get_dev(netdev)->netdev_class->send(netdev, 
+            buffer->data, buffer->size);
     if (!error) {
         COVERAGE_INC(netdev_sent);
     }
@@ -538,7 +540,7 @@ netdev_send(struct netdev *netdev, const struct ofpbuf *buffer)
 void
 netdev_send_wait(struct netdev *netdev)
 {
-    return netdev_get_dev(netdev)->class->send_wait(netdev);
+    return netdev_get_dev(netdev)->netdev_class->send_wait(netdev);
 }
 
 /* Attempts to set 'netdev''s MAC address to 'mac'.  Returns 0 if successful,
@@ -546,7 +548,7 @@ netdev_send_wait(struct netdev *netdev)
 int
 netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN])
 {
-    return netdev_get_dev(netdev)->class->set_etheraddr(netdev, mac);
+    return netdev_get_dev(netdev)->netdev_class->set_etheraddr(netdev, mac);
 }
 
 /* Retrieves 'netdev''s MAC address.  If successful, returns 0 and copies the
@@ -555,7 +557,7 @@ netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN])
 int
 netdev_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN])
 {
-    return netdev_get_dev(netdev)->class->get_etheraddr(netdev, mac);
+    return netdev_get_dev(netdev)->netdev_class->get_etheraddr(netdev, mac);
 }
 
 /* Returns the name of the network device that 'netdev' represents,
@@ -576,7 +578,7 @@ netdev_get_name(const struct netdev *netdev)
 int
 netdev_get_mtu(const struct netdev *netdev, int *mtup)
 {
-    int error = netdev_get_dev(netdev)->class->get_mtu(netdev, mtup);
+    int error = netdev_get_dev(netdev)->netdev_class->get_mtu(netdev, mtup);
     if (error) {
         VLOG_WARN_RL(&rl, "failed to retrieve MTU for network device %s: %s",
                      netdev_get_name(netdev), strerror(error));
@@ -597,7 +599,7 @@ netdev_get_mtu(const struct netdev *netdev, int *mtup)
 int
 netdev_get_ifindex(const struct netdev *netdev)
 {
-    return netdev_get_dev(netdev)->class->get_ifindex(netdev);
+    return netdev_get_dev(netdev)->netdev_class->get_ifindex(netdev);
 }
 
 /* Stores the features supported by 'netdev' into each of '*current',
@@ -626,23 +628,51 @@ netdev_get_features(struct netdev *netdev,
         peer = &dummy[3];
     }
 
-    error = netdev_get_dev(netdev)->class->get_features(netdev, current,
-                                                        advertised, supported,
-                                                        peer);
+    error = netdev_get_dev(netdev)->netdev_class->get_features(netdev, current,
+            advertised, supported, peer);
     if (error) {
         *current = *advertised = *supported = *peer = 0;
     }
     return error;
 }
 
+/* Returns the maximum speed of a network connection that has the "enum
+ * ofp_port_features" bits in 'features', in bits per second.  If no bits that
+ * indicate a speed are set in 'features', assumes 100Mbps. */
+uint64_t
+netdev_features_to_bps(uint32_t features)
+{
+    enum {
+        F_10000MB = OFPPF_10GB_FD,
+        F_1000MB = OFPPF_1GB_HD | OFPPF_1GB_FD,
+        F_100MB = OFPPF_100MB_HD | OFPPF_100MB_FD,
+        F_10MB = OFPPF_10MB_HD | OFPPF_10MB_FD
+    };
+
+    return (  features & F_10000MB  ? UINT64_C(10000000000)
+            : features & F_1000MB   ? UINT64_C(1000000000)
+            : features & F_100MB    ? UINT64_C(100000000)
+            : features & F_10MB     ? UINT64_C(10000000)
+                                    : UINT64_C(100000000));
+}
+
+/* Returns true if any of the "enum ofp_port_features" bits that indicate a
+ * full-duplex link are set in 'features', otherwise false. */
+bool
+netdev_features_is_full_duplex(uint32_t features)
+{
+    return (features & (OFPPF_10MB_FD | OFPPF_100MB_FD | OFPPF_1GB_FD
+                        | OFPPF_10GB_FD)) != 0;
+}
+
 /* Set the features advertised by 'netdev' to 'advertise'.  Returns 0 if
  * successful, otherwise a positive errno value. */
 int
 netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
 {
-    return (netdev_get_dev(netdev)->class->set_advertisements
-            ? netdev_get_dev(netdev)->class->set_advertisements(netdev,
-                                                                advertise)
+    return (netdev_get_dev(netdev)->netdev_class->set_advertisements
+            ? netdev_get_dev(netdev)->netdev_class->set_advertisements(
+                    netdev, advertise)
             : EOPNOTSUPP);
 }
 
@@ -666,9 +696,9 @@ netdev_get_in4(const struct netdev *netdev,
     struct in_addr netmask;
     int error;
 
-    error = (netdev_get_dev(netdev)->class->get_in4
-             ? netdev_get_dev(netdev)->class->get_in4(netdev, &address,
-                                                      &netmask)
+    error = (netdev_get_dev(netdev)->netdev_class->get_in4
+             ? netdev_get_dev(netdev)->netdev_class->get_in4(netdev, 
+                    &address, &netmask)
              : EOPNOTSUPP);
     if (address_) {
         address_->s_addr = error ? 0 : address.s_addr;
@@ -685,8 +715,8 @@ netdev_get_in4(const struct netdev *netdev,
 int
 netdev_set_in4(struct netdev *netdev, struct in_addr addr, struct in_addr mask)
 {
-    return (netdev_get_dev(netdev)->class->set_in4
-            ? netdev_get_dev(netdev)->class->set_in4(netdev, addr, mask)
+    return (netdev_get_dev(netdev)->netdev_class->set_in4
+            ? netdev_get_dev(netdev)->netdev_class->set_in4(netdev, addr, mask)
             : EOPNOTSUPP);
 }
 
@@ -696,8 +726,8 @@ int
 netdev_add_router(struct netdev *netdev, struct in_addr router)
 {
     COVERAGE_INC(netdev_add_router);
-    return (netdev_get_dev(netdev)->class->add_router
-            ? netdev_get_dev(netdev)->class->add_router(netdev, router)
+    return (netdev_get_dev(netdev)->netdev_class->add_router
+            ? netdev_get_dev(netdev)->netdev_class->add_router(netdev, router)
             : EOPNOTSUPP);
 }
 
@@ -713,9 +743,9 @@ netdev_get_next_hop(const struct netdev *netdev,
                     const struct in_addr *host, struct in_addr *next_hop,
                     char **netdev_name)
 {
-    int error = (netdev_get_dev(netdev)->class->get_next_hop
-                 ? netdev_get_dev(netdev)->class->get_next_hop(host, next_hop,
-                                                               netdev_name)
+    int error = (netdev_get_dev(netdev)->netdev_class->get_next_hop
+                 ? netdev_get_dev(netdev)->netdev_class->get_next_hop(
+                        host, next_hop, netdev_name)
                  : EOPNOTSUPP);
     if (error) {
         next_hop->s_addr = 0;
@@ -741,9 +771,9 @@ netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6)
     struct in6_addr dummy;
     int error;
 
-    error = (netdev_get_dev(netdev)->class->get_in6
-             ? netdev_get_dev(netdev)->class->get_in6(netdev, in6 ? in6
-                                                                  : &dummy)
+    error = (netdev_get_dev(netdev)->netdev_class->get_in6
+             ? netdev_get_dev(netdev)->netdev_class->get_in6(netdev, 
+                    in6 ? in6 : &dummy)
              : EOPNOTSUPP);
     if (error && in6) {
         memset(in6, 0, sizeof *in6);
@@ -763,8 +793,8 @@ do_update_flags(struct netdev *netdev, enum netdev_flags off,
     enum netdev_flags old_flags;
     int error;
 
-    error = netdev_get_dev(netdev)->class->update_flags(netdev, off & ~on, on,
-                                                        &old_flags);
+    error = netdev_get_dev(netdev)->netdev_class->update_flags(netdev, 
+                off & ~on, on, &old_flags);
     if (error) {
         VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s",
                      off || on ? "set" : "get", netdev_get_name(netdev),
@@ -837,8 +867,9 @@ int
 netdev_arp_lookup(const struct netdev *netdev,
                   uint32_t ip, uint8_t mac[ETH_ADDR_LEN])
 {
-    int error = (netdev_get_dev(netdev)->class->arp_lookup
-                 ? netdev_get_dev(netdev)->class->arp_lookup(netdev, ip, mac)
+    int error = (netdev_get_dev(netdev)->netdev_class->arp_lookup
+                 ? netdev_get_dev(netdev)->netdev_class->arp_lookup(netdev, 
+                        ip, mac)
                  : EOPNOTSUPP);
     if (error) {
         memset(mac, 0, ETH_ADDR_LEN);
@@ -851,8 +882,9 @@ netdev_arp_lookup(const struct netdev *netdev,
 int
 netdev_get_carrier(const struct netdev *netdev, bool *carrier)
 {
-    int error = (netdev_get_dev(netdev)->class->get_carrier
-                 ? netdev_get_dev(netdev)->class->get_carrier(netdev, carrier)
+    int error = (netdev_get_dev(netdev)->netdev_class->get_carrier
+                 ? netdev_get_dev(netdev)->netdev_class->get_carrier(netdev, 
+                        carrier)
                  : EOPNOTSUPP);
     if (error) {
         *carrier = false;
@@ -867,8 +899,8 @@ netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
     int error;
 
     COVERAGE_INC(netdev_get_stats);
-    error = (netdev_get_dev(netdev)->class->get_stats
-             ? netdev_get_dev(netdev)->class->get_stats(netdev, stats)
+    error = (netdev_get_dev(netdev)->netdev_class->get_stats
+             ? netdev_get_dev(netdev)->netdev_class->get_stats(netdev, stats)
              : EOPNOTSUPP);
     if (error) {
         memset(stats, 0xff, sizeof *stats);
@@ -883,9 +915,9 @@ int
 netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate,
                     uint32_t kbits_burst)
 {
-    return (netdev_get_dev(netdev)->class->set_policing
-            ? netdev_get_dev(netdev)->class->set_policing(netdev, kbits_rate,
-                                                          kbits_burst)
+    return (netdev_get_dev(netdev)->netdev_class->set_policing
+            ? netdev_get_dev(netdev)->netdev_class->set_policing(netdev, 
+                    kbits_rate, kbits_burst)
             : EOPNOTSUPP);
 }
 
@@ -897,8 +929,9 @@ netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate,
 int
 netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid)
 {
-    int error = (netdev_get_dev(netdev)->class->get_vlan_vid
-                 ? netdev_get_dev(netdev)->class->get_vlan_vid(netdev, vlan_vid)
+    int error = (netdev_get_dev(netdev)->netdev_class->get_vlan_vid
+                 ? netdev_get_dev(netdev)->netdev_class->get_vlan_vid(netdev, 
+                        vlan_vid)
                  : ENOENT);
     if (error) {
         *vlan_vid = 0;
@@ -935,19 +968,19 @@ exit:
 }
 \f
 /* Initializes 'netdev_dev' as a netdev device named 'name' of the
- * specified 'class'.
+ * specified 'netdev_class'.
  *
  * This function adds 'netdev_dev' to a netdev-owned shash, so it is
  * very important that 'netdev_dev' only be freed after calling
  * the refcount drops to zero.  */
 void
 netdev_dev_init(struct netdev_dev *netdev_dev, const char *name,
-                const struct netdev_class *class)
+                const struct netdev_class *class_)
 {
     assert(!shash_find(&netdev_dev_shash, name));
 
     memset(netdev_dev, 0, sizeof *netdev_dev);
-    netdev_dev->class = class;
+    netdev_dev->netdev_class = class_;
     netdev_dev->name = xstrdup(name);
     netdev_dev->node = shash_add(&netdev_dev_shash, name, netdev_dev);
 }
@@ -970,7 +1003,7 @@ netdev_dev_uninit(struct netdev_dev *netdev_dev, bool destroy)
     update_device_args(netdev_dev, NULL);
 
     if (destroy) {
-        netdev_dev->class->destroy(netdev_dev);
+        netdev_dev->netdev_class->destroy(netdev_dev);
     }
     free(name);
 }
@@ -981,7 +1014,7 @@ netdev_dev_uninit(struct netdev_dev *netdev_dev, bool destroy)
 const char *
 netdev_dev_get_type(const struct netdev_dev *netdev_dev)
 {
-    return netdev_dev->class->type;
+    return netdev_dev->netdev_class->type;
 }
 
 /* Returns the name of 'netdev_dev'.
@@ -1007,14 +1040,14 @@ netdev_dev_from_name(const char *name)
  * The caller is responsible for initializing and destroying 'device_list'
  * but the contained netdev_devs must not be freed. */
 void
-netdev_dev_get_devices(const struct netdev_class *class,
+netdev_dev_get_devices(const struct netdev_class *class_,
                        struct shash *device_list)
 {
     struct shash_node *node;
     SHASH_FOR_EACH (node, &netdev_dev_shash) {
         struct netdev_dev *dev = node->data;
 
-        if (dev->class == class) {
+        if (dev->netdev_class == class_) {
             shash_add(device_list, node->name, node->data);
         }
     }
@@ -1050,7 +1083,7 @@ netdev_uninit(struct netdev *netdev, bool close)
     }
 
     if (close) {
-        netdev_get_dev(netdev)->class->close(netdev);
+        netdev_get_dev(netdev)->netdev_class->close(netdev);
     }
 }
 
@@ -1061,7 +1094,7 @@ netdev_uninit(struct netdev *netdev, bool close)
 const char *
 netdev_get_type(const struct netdev *netdev)
 {
-    return netdev_get_dev(netdev)->class->type;
+    return netdev_get_dev(netdev)->netdev_class->type;
 }
 
 struct netdev_dev *
@@ -1107,7 +1140,8 @@ netdev_monitor_destroy(struct netdev_monitor *monitor)
 
         SHASH_FOR_EACH (node, &monitor->polled_netdevs) {
             struct netdev_notifier *notifier = node->data;
-            netdev_get_dev(notifier->netdev)->class->poll_remove(notifier);
+            netdev_get_dev(notifier->netdev)->netdev_class->poll_remove(
+                    notifier);
         }
 
         shash_destroy(&monitor->polled_netdevs);
@@ -1137,12 +1171,11 @@ netdev_monitor_add(struct netdev_monitor *monitor, struct netdev *netdev)
     const char *netdev_name = netdev_get_name(netdev);
     int error = 0;
     if (!shash_find(&monitor->polled_netdevs, netdev_name)
-        && netdev_get_dev(netdev)->class->poll_add)
+            && netdev_get_dev(netdev)->netdev_class->poll_add)
     {
         struct netdev_notifier *notifier;
-        error = netdev_get_dev(netdev)->class->poll_add(netdev,
-                                                        netdev_monitor_cb,
-                                                        monitor, &notifier);
+        error = netdev_get_dev(netdev)->netdev_class->poll_add(netdev,
+                    netdev_monitor_cb, monitor, &notifier);
         if (!error) {
             assert(notifier->netdev == netdev);
             shash_add(&monitor->polled_netdevs, netdev_name, notifier);
@@ -1164,7 +1197,7 @@ netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev)
     if (node) {
         /* Cancel future notifications. */
         struct netdev_notifier *notifier = node->data;
-        netdev_get_dev(netdev)->class->poll_remove(notifier);
+        netdev_get_dev(netdev)->netdev_class->poll_remove(notifier);
         shash_delete(&monitor->polled_netdevs, node);
 
         /* Drop any pending notification. */
@@ -1222,7 +1255,7 @@ restore_flags(struct netdev *netdev)
     if (netdev->changed_flags) {
         enum netdev_flags restore = netdev->save_flags & netdev->changed_flags;
         enum netdev_flags old_flags;
-        return netdev_get_dev(netdev)->class->update_flags(netdev,
+        return netdev_get_dev(netdev)->netdev_class->update_flags(netdev,
                                            netdev->changed_flags & ~restore,
                                            restore, &old_flags);
     }
index e0cdd82..e3a3176 100644 (file)
@@ -123,6 +123,8 @@ int netdev_get_carrier(const struct netdev *, bool *carrier);
 int netdev_get_features(struct netdev *,
                         uint32_t *current, uint32_t *advertised,
                         uint32_t *supported, uint32_t *peer);
+uint64_t netdev_features_to_bps(uint32_t features);
+bool netdev_features_is_full_duplex(uint32_t features);
 int netdev_set_advertisements(struct netdev *, uint32_t advertise);
 
 int netdev_get_in4(const struct netdev *, struct in_addr *address,
diff --git a/lib/sflow.h b/lib/sflow.h
new file mode 100644 (file)
index 0000000..397ae2d
--- /dev/null
@@ -0,0 +1,548 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#ifndef SFLOW_H
+#define SFLOW_H 1
+
+enum SFLAddress_type {
+    SFLADDRESSTYPE_IP_V4 = 1,
+    SFLADDRESSTYPE_IP_V6 = 2
+};
+
+typedef struct {
+    u_int32_t addr;
+} SFLIPv4;
+
+typedef struct {
+    u_char addr[16];
+} SFLIPv6;
+
+typedef union _SFLAddress_value {
+    SFLIPv4 ip_v4;
+    SFLIPv6 ip_v6;
+} SFLAddress_value;
+
+typedef struct _SFLAddress {
+    u_int32_t type;           /* enum SFLAddress_type */
+    SFLAddress_value address;
+} SFLAddress;
+
+/* Packet header data */
+
+#define SFL_DEFAULT_HEADER_SIZE 128
+#define SFL_DEFAULT_COLLECTOR_PORT 6343
+#define SFL_DEFAULT_SAMPLING_RATE 400
+#define SFL_DEFAULT_POLLING_INTERVAL 30
+
+/* The header protocol describes the format of the sampled header */
+enum SFLHeader_protocol {
+    SFLHEADER_ETHERNET_ISO8023     = 1,
+    SFLHEADER_ISO88024_TOKENBUS    = 2,
+    SFLHEADER_ISO88025_TOKENRING   = 3,
+    SFLHEADER_FDDI                 = 4,
+    SFLHEADER_FRAME_RELAY          = 5,
+    SFLHEADER_X25                  = 6,
+    SFLHEADER_PPP                  = 7,
+    SFLHEADER_SMDS                 = 8,
+    SFLHEADER_AAL5                 = 9,
+    SFLHEADER_AAL5_IP              = 10, /* e.g. Cisco AAL5 mux */
+    SFLHEADER_IPv4                 = 11,
+    SFLHEADER_IPv6                 = 12,
+    SFLHEADER_MPLS                 = 13
+};
+
+/* raw sampled header */
+
+typedef struct _SFLSampled_header {
+    u_int32_t header_protocol;            /* (enum SFLHeader_protocol) */
+    u_int32_t frame_length;               /* Original length of packet before sampling */
+    u_int32_t stripped;                   /* header/trailer bytes stripped by sender */
+    u_int32_t header_length;              /* length of sampled header bytes to follow */
+    u_int8_t *header_bytes;               /* Header bytes */
+} SFLSampled_header;
+
+/* decoded ethernet header */
+
+typedef struct _SFLSampled_ethernet {
+    u_int32_t eth_len;       /* The length of the MAC packet excluding 
+                               lower layer encapsulations */
+    u_int8_t src_mac[8];    /* 6 bytes + 2 pad */
+    u_int8_t dst_mac[8];
+    u_int32_t eth_type;
+} SFLSampled_ethernet;
+
+/* decoded IP version 4 header */
+
+typedef struct _SFLSampled_ipv4 {
+    u_int32_t length;      /* The length of the IP packet
+                             excluding lower layer encapsulations */
+    u_int32_t protocol;    /* IP Protocol type (for example, TCP = 6, UDP = 17) */
+    SFLIPv4   src_ip;      /* Source IP Address */
+    SFLIPv4   dst_ip;      /* Destination IP Address */
+    u_int32_t src_port;    /* TCP/UDP source port number or equivalent */
+    u_int32_t dst_port;    /* TCP/UDP destination port number or equivalent */
+    u_int32_t tcp_flags;   /* TCP flags */
+    u_int32_t tos;         /* IP type of service */
+} SFLSampled_ipv4;
+
+/* decoded IP version 6 data */
+
+typedef struct _SFLSampled_ipv6 {
+    u_int32_t length;       /* The length of the IP packet
+                              excluding lower layer encapsulations */
+    u_int32_t protocol;     /* IP Protocol type (for example, TCP = 6, UDP = 17) */
+    SFLIPv6   src_ip;       /* Source IP Address */
+    SFLIPv6   dst_ip;       /* Destination IP Address */
+    u_int32_t src_port;     /* TCP/UDP source port number or equivalent */
+    u_int32_t dst_port;     /* TCP/UDP destination port number or equivalent */
+    u_int32_t tcp_flags;    /* TCP flags */
+    u_int32_t priority;     /* IP priority */
+} SFLSampled_ipv6;
+
+/* Extended data types */
+
+/* Extended switch data */
+
+typedef struct _SFLExtended_switch {
+    u_int32_t src_vlan;       /* The 802.1Q VLAN id of incomming frame */
+    u_int32_t src_priority;   /* The 802.1p priority */
+    u_int32_t dst_vlan;       /* The 802.1Q VLAN id of outgoing frame */
+    u_int32_t dst_priority;   /* The 802.1p priority */
+} SFLExtended_switch;
+
+/* Extended router data */
+
+typedef struct _SFLExtended_router {
+    SFLAddress nexthop;               /* IP address of next hop router */
+    u_int32_t src_mask;               /* Source address prefix mask bits */
+    u_int32_t dst_mask;               /* Destination address prefix mask bits */
+} SFLExtended_router;
+
+/* Extended gateway data */
+enum SFLExtended_as_path_segment_type {
+    SFLEXTENDED_AS_SET = 1,      /* Unordered set of ASs */
+    SFLEXTENDED_AS_SEQUENCE = 2  /* Ordered sequence of ASs */
+};
+  
+typedef struct _SFLExtended_as_path_segment {
+    u_int32_t type;   /* enum SFLExtended_as_path_segment_type */
+    u_int32_t length; /* number of AS numbers in set/sequence */
+    union {
+       u_int32_t *set;
+       u_int32_t *seq;
+    } as;
+} SFLExtended_as_path_segment;
+
+typedef struct _SFLExtended_gateway {
+    SFLAddress nexthop;                       /* Address of the border router that should
+                                                be used for the destination network */
+    u_int32_t as;                             /* AS number for this gateway */
+    u_int32_t src_as;                         /* AS number of source (origin) */
+    u_int32_t src_peer_as;                    /* AS number of source peer */
+    u_int32_t dst_as_path_segments;           /* number of segments in path */
+    SFLExtended_as_path_segment *dst_as_path; /* list of seqs or sets */
+    u_int32_t communities_length;             /* number of communities */
+    u_int32_t *communities;                   /* set of communities */
+    u_int32_t localpref;                      /* LocalPref associated with this route */
+} SFLExtended_gateway;
+
+typedef struct _SFLString {
+    u_int32_t len;
+    char *str;
+} SFLString;
+
+/* Extended user data */
+
+typedef struct _SFLExtended_user {
+    u_int32_t src_charset;  /* MIBEnum value of character set used to encode a string - See RFC 2978
+                              Where possible UTF-8 encoding (MIBEnum=106) should be used. A value
+                              of zero indicates an unknown encoding. */
+    SFLString src_user;
+    u_int32_t dst_charset;
+    SFLString dst_user;
+} SFLExtended_user;
+
+/* Extended URL data */
+
+enum SFLExtended_url_direction {
+    SFLEXTENDED_URL_SRC = 1, /* URL is associated with source address */
+    SFLEXTENDED_URL_DST = 2  /* URL is associated with destination address */
+};
+
+typedef struct _SFLExtended_url {
+    u_int32_t direction;   /* enum SFLExtended_url_direction */
+    SFLString url;         /* URL associated with the packet flow.
+                             Must be URL encoded */
+    SFLString host;        /* The host field from the HTTP header */
+} SFLExtended_url;
+
+/* Extended MPLS data */
+
+typedef struct _SFLLabelStack {
+    u_int32_t depth;
+    u_int32_t *stack; /* first entry is top of stack - see RFC 3032 for encoding */
+} SFLLabelStack;
+
+typedef struct _SFLExtended_mpls {
+    SFLAddress nextHop;        /* Address of the next hop */ 
+    SFLLabelStack in_stack;
+    SFLLabelStack out_stack;
+} SFLExtended_mpls;
+
+/* Extended NAT data
+   Packet header records report addresses as seen at the sFlowDataSource.
+   The extended_nat structure reports on translated source and/or destination
+   addesses for this packet. If an address was not translated it should 
+   be equal to that reported for the header. */
+
+typedef struct _SFLExtended_nat {
+    SFLAddress src;    /* Source address */
+    SFLAddress dst;    /* Destination address */
+} SFLExtended_nat;
+
+/* additional Extended MPLS stucts */
+
+typedef struct _SFLExtended_mpls_tunnel {
+    SFLString tunnel_lsp_name;  /* Tunnel name */
+    u_int32_t tunnel_id;        /* Tunnel ID */
+    u_int32_t tunnel_cos;       /* Tunnel COS value */
+} SFLExtended_mpls_tunnel;
+
+typedef struct _SFLExtended_mpls_vc {
+    SFLString vc_instance_name; /* VC instance name */
+    u_int32_t vll_vc_id;        /* VLL/VC instance ID */
+    u_int32_t vc_label_cos;     /* VC Label COS value */
+} SFLExtended_mpls_vc;
+
+/* Extended MPLS FEC
+   - Definitions from MPLS-FTN-STD-MIB mplsFTNTable */
+
+typedef struct _SFLExtended_mpls_FTN {
+    SFLString mplsFTNDescr;
+    u_int32_t mplsFTNMask;
+} SFLExtended_mpls_FTN;
+
+/* Extended MPLS LVP FEC
+   - Definition from MPLS-LDP-STD-MIB mplsFecTable
+   Note: mplsFecAddrType, mplsFecAddr information available
+   from packet header */
+
+typedef struct _SFLExtended_mpls_LDP_FEC {
+    u_int32_t mplsFecAddrPrefixLength;
+} SFLExtended_mpls_LDP_FEC;
+
+/* Extended VLAN tunnel information 
+   Record outer VLAN encapsulations that have 
+   been stripped. extended_vlantunnel information 
+   should only be reported if all the following conditions are satisfied: 
+   1. The packet has nested vlan tags, AND 
+   2. The reporting device is VLAN aware, AND 
+   3. One or more VLAN tags have been stripped, either 
+   because they represent proprietary encapsulations, or 
+   because switch hardware automatically strips the outer VLAN 
+   encapsulation. 
+   Reporting extended_vlantunnel information is not a substitute for 
+   reporting extended_switch information. extended_switch data must 
+   always be reported to describe the ingress/egress VLAN information 
+   for the packet. The extended_vlantunnel information only applies to 
+   nested VLAN tags, and then only when one or more tags has been 
+   stripped. */ 
+
+typedef SFLLabelStack SFLVlanStack;
+typedef struct _SFLExtended_vlan_tunnel { 
+    SFLVlanStack stack;  /* List of stripped 802.1Q TPID/TCI layers. Each 
+                           TPID,TCI pair is represented as a single 32 bit 
+                           integer. Layers listed from outermost to 
+                           innermost. */ 
+} SFLExtended_vlan_tunnel;
+
+enum SFLFlow_type_tag {
+    /* enterprise = 0, format = ... */
+    SFLFLOW_HEADER    = 1,      /* Packet headers are sampled */
+    SFLFLOW_ETHERNET  = 2,      /* MAC layer information */
+    SFLFLOW_IPV4      = 3,      /* IP version 4 data */
+    SFLFLOW_IPV6      = 4,      /* IP version 6 data */
+    SFLFLOW_EX_SWITCH    = 1001,      /* Extended switch information */
+    SFLFLOW_EX_ROUTER    = 1002,      /* Extended router information */
+    SFLFLOW_EX_GATEWAY   = 1003,      /* Extended gateway router information */
+    SFLFLOW_EX_USER      = 1004,      /* Extended TACAS/RADIUS user information */
+    SFLFLOW_EX_URL       = 1005,      /* Extended URL information */
+    SFLFLOW_EX_MPLS      = 1006,      /* Extended MPLS information */
+    SFLFLOW_EX_NAT       = 1007,      /* Extended NAT information */
+    SFLFLOW_EX_MPLS_TUNNEL  = 1008,   /* additional MPLS information */
+    SFLFLOW_EX_MPLS_VC      = 1009,
+    SFLFLOW_EX_MPLS_FTN     = 1010,
+    SFLFLOW_EX_MPLS_LDP_FEC = 1011,
+    SFLFLOW_EX_VLAN_TUNNEL  = 1012,   /* VLAN stack */
+};
+
+typedef union _SFLFlow_type {
+    SFLSampled_header header;
+    SFLSampled_ethernet ethernet;
+    SFLSampled_ipv4 ipv4;
+    SFLSampled_ipv6 ipv6;
+    SFLExtended_switch sw;
+    SFLExtended_router router;
+    SFLExtended_gateway gateway;
+    SFLExtended_user user;
+    SFLExtended_url url;
+    SFLExtended_mpls mpls;
+    SFLExtended_nat nat;
+    SFLExtended_mpls_tunnel mpls_tunnel;
+    SFLExtended_mpls_vc mpls_vc;
+    SFLExtended_mpls_FTN mpls_ftn;
+    SFLExtended_mpls_LDP_FEC mpls_ldp_fec;
+    SFLExtended_vlan_tunnel vlan_tunnel;
+} SFLFlow_type;
+
+typedef struct _SFLFlow_sample_element {
+    struct _SFLFlow_sample_element *nxt;
+    u_int32_t tag;  /* SFLFlow_type_tag */
+    u_int32_t length;
+    SFLFlow_type flowType;
+} SFLFlow_sample_element;
+
+enum SFL_sample_tag {
+    SFLFLOW_SAMPLE = 1,              /* enterprise = 0 : format = 1 */
+    SFLCOUNTERS_SAMPLE = 2,          /* enterprise = 0 : format = 2 */
+    SFLFLOW_SAMPLE_EXPANDED = 3,     /* enterprise = 0 : format = 3 */
+    SFLCOUNTERS_SAMPLE_EXPANDED = 4  /* enterprise = 0 : format = 4 */
+};
+  
+/* Format of a single flow sample */
+
+typedef struct _SFLFlow_sample {
+    /* u_int32_t tag;    */         /* SFL_sample_tag -- enterprise = 0 : format = 1 */
+    /* u_int32_t length; */
+    u_int32_t sequence_number;      /* Incremented with each flow sample
+                                      generated */
+    u_int32_t source_id;            /* fsSourceId */
+    u_int32_t sampling_rate;        /* fsPacketSamplingRate */
+    u_int32_t sample_pool;          /* Total number of packets that could have been
+                                      sampled (i.e. packets skipped by sampling
+                                      process + total number of samples) */
+    u_int32_t drops;                /* Number of times a packet was dropped due to
+                                      lack of resources */
+    u_int32_t input;                /* SNMP ifIndex of input interface.
+                                      0 if interface is not known. */
+    u_int32_t output;               /* SNMP ifIndex of output interface,
+                                      0 if interface is not known.
+                                      Set most significant bit to indicate
+                                      multiple destination interfaces
+                                      (i.e. in case of broadcast or multicast)
+                                      and set lower order bits to indicate
+                                      number of destination interfaces.
+                                      Examples:
+                                      0x00000002  indicates ifIndex = 2
+                                      0x00000000  ifIndex unknown.
+                                      0x80000007  indicates a packet sent
+                                      to 7 interfaces.
+                                      0x80000000  indicates a packet sent to
+                                      an unknown number of
+                                      interfaces greater than 1.*/
+    u_int32_t num_elements;
+    SFLFlow_sample_element *elements;
+} SFLFlow_sample;
+
+/* same thing, but the expanded version (for full 32-bit ifIndex numbers) */
+
+typedef struct _SFLFlow_sample_expanded {
+    /* u_int32_t tag;    */         /* SFL_sample_tag -- enterprise = 0 : format = 1 */
+    /* u_int32_t length; */
+    u_int32_t sequence_number;      /* Incremented with each flow sample
+                                      generated */
+    u_int32_t ds_class;             /* EXPANDED */
+    u_int32_t ds_index;             /* EXPANDED */
+    u_int32_t sampling_rate;        /* fsPacketSamplingRate */
+    u_int32_t sample_pool;          /* Total number of packets that could have been
+                                      sampled (i.e. packets skipped by sampling
+                                      process + total number of samples) */
+    u_int32_t drops;                /* Number of times a packet was dropped due to
+                                      lack of resources */
+    u_int32_t inputFormat;          /* EXPANDED */
+    u_int32_t input;                /* SNMP ifIndex of input interface.
+                                      0 if interface is not known. */
+    u_int32_t outputFormat;         /* EXPANDED */
+    u_int32_t output;               /* SNMP ifIndex of output interface,
+                                      0 if interface is not known. */
+    u_int32_t num_elements;
+    SFLFlow_sample_element *elements;
+} SFLFlow_sample_expanded;
+
+/* Counter types */
+
+/* Generic interface counters - see RFC 1573, 2233 */
+
+typedef struct _SFLIf_counters {
+    u_int32_t ifIndex;
+    u_int32_t ifType;
+    u_int64_t ifSpeed;
+    u_int32_t ifDirection;        /* Derived from MAU MIB (RFC 2668)
+                                    0 = unknown, 1 = full-duplex,
+                                    2 = half-duplex, 3 = in, 4 = out */
+    u_int32_t ifStatus;           /* bit field with the following bits assigned:
+                                    bit 0 = ifAdminStatus (0 = down, 1 = up)
+                                    bit 1 = ifOperStatus (0 = down, 1 = up) */
+    u_int64_t ifInOctets;
+    u_int32_t ifInUcastPkts;
+    u_int32_t ifInMulticastPkts;
+    u_int32_t ifInBroadcastPkts;
+    u_int32_t ifInDiscards;
+    u_int32_t ifInErrors;
+    u_int32_t ifInUnknownProtos;
+    u_int64_t ifOutOctets;
+    u_int32_t ifOutUcastPkts;
+    u_int32_t ifOutMulticastPkts;
+    u_int32_t ifOutBroadcastPkts;
+    u_int32_t ifOutDiscards;
+    u_int32_t ifOutErrors;
+    u_int32_t ifPromiscuousMode;
+} SFLIf_counters;
+
+/* Ethernet interface counters - see RFC 2358 */
+typedef struct _SFLEthernet_counters {
+    u_int32_t dot3StatsAlignmentErrors;
+    u_int32_t dot3StatsFCSErrors;
+    u_int32_t dot3StatsSingleCollisionFrames;
+    u_int32_t dot3StatsMultipleCollisionFrames;
+    u_int32_t dot3StatsSQETestErrors;
+    u_int32_t dot3StatsDeferredTransmissions;
+    u_int32_t dot3StatsLateCollisions;
+    u_int32_t dot3StatsExcessiveCollisions;
+    u_int32_t dot3StatsInternalMacTransmitErrors;
+    u_int32_t dot3StatsCarrierSenseErrors;
+    u_int32_t dot3StatsFrameTooLongs;
+    u_int32_t dot3StatsInternalMacReceiveErrors;
+    u_int32_t dot3StatsSymbolErrors;
+} SFLEthernet_counters;
+
+/* Token ring counters - see RFC 1748 */
+
+typedef struct _SFLTokenring_counters {
+    u_int32_t dot5StatsLineErrors;
+    u_int32_t dot5StatsBurstErrors;
+    u_int32_t dot5StatsACErrors;
+    u_int32_t dot5StatsAbortTransErrors;
+    u_int32_t dot5StatsInternalErrors;
+    u_int32_t dot5StatsLostFrameErrors;
+    u_int32_t dot5StatsReceiveCongestions;
+    u_int32_t dot5StatsFrameCopiedErrors;
+    u_int32_t dot5StatsTokenErrors;
+    u_int32_t dot5StatsSoftErrors;
+    u_int32_t dot5StatsHardErrors;
+    u_int32_t dot5StatsSignalLoss;
+    u_int32_t dot5StatsTransmitBeacons;
+    u_int32_t dot5StatsRecoverys;
+    u_int32_t dot5StatsLobeWires;
+    u_int32_t dot5StatsRemoves;
+    u_int32_t dot5StatsSingles;
+    u_int32_t dot5StatsFreqErrors;
+} SFLTokenring_counters;
+
+/* 100 BaseVG interface counters - see RFC 2020 */
+
+typedef struct _SFLVg_counters {
+    u_int32_t dot12InHighPriorityFrames;
+    u_int64_t dot12InHighPriorityOctets;
+    u_int32_t dot12InNormPriorityFrames;
+    u_int64_t dot12InNormPriorityOctets;
+    u_int32_t dot12InIPMErrors;
+    u_int32_t dot12InOversizeFrameErrors;
+    u_int32_t dot12InDataErrors;
+    u_int32_t dot12InNullAddressedFrames;
+    u_int32_t dot12OutHighPriorityFrames;
+    u_int64_t dot12OutHighPriorityOctets;
+    u_int32_t dot12TransitionIntoTrainings;
+    u_int64_t dot12HCInHighPriorityOctets;
+    u_int64_t dot12HCInNormPriorityOctets;
+    u_int64_t dot12HCOutHighPriorityOctets;
+} SFLVg_counters;
+
+typedef struct _SFLVlan_counters {
+    u_int32_t vlan_id;
+    u_int64_t octets;
+    u_int32_t ucastPkts;
+    u_int32_t multicastPkts;
+    u_int32_t broadcastPkts;
+    u_int32_t discards;
+} SFLVlan_counters;
+
+/* Counters data */
+
+enum SFLCounters_type_tag {
+    /* enterprise = 0, format = ... */
+    SFLCOUNTERS_GENERIC      = 1,
+    SFLCOUNTERS_ETHERNET     = 2,
+    SFLCOUNTERS_TOKENRING    = 3,
+    SFLCOUNTERS_VG           = 4,
+    SFLCOUNTERS_VLAN         = 5
+};
+
+typedef union _SFLCounters_type {
+    SFLIf_counters generic;
+    SFLEthernet_counters ethernet;
+    SFLTokenring_counters tokenring;
+    SFLVg_counters vg;
+    SFLVlan_counters vlan;
+} SFLCounters_type;
+
+typedef struct _SFLCounters_sample_element {
+    struct _SFLCounters_sample_element *nxt; /* linked list */
+    u_int32_t tag; /* SFLCounters_type_tag */
+    u_int32_t length;
+    SFLCounters_type counterBlock;
+} SFLCounters_sample_element;
+
+typedef struct _SFLCounters_sample {
+    /* u_int32_t tag;    */       /* SFL_sample_tag -- enterprise = 0 : format = 2 */
+    /* u_int32_t length; */
+    u_int32_t sequence_number;    /* Incremented with each counters sample
+                                    generated by this source_id */
+    u_int32_t source_id;          /* fsSourceId */
+    u_int32_t num_elements;
+    SFLCounters_sample_element *elements;
+} SFLCounters_sample;
+
+/* same thing, but the expanded version, so ds_index can be a full 32 bits */
+typedef struct _SFLCounters_sample_expanded {
+    /* u_int32_t tag;    */       /* SFL_sample_tag -- enterprise = 0 : format = 2 */
+    /* u_int32_t length; */
+    u_int32_t sequence_number;    /* Incremented with each counters sample
+                                    generated by this source_id */
+    u_int32_t ds_class;           /* EXPANDED */
+    u_int32_t ds_index;           /* EXPANDED */
+    u_int32_t num_elements;
+    SFLCounters_sample_element *elements;
+} SFLCounters_sample_expanded;
+
+#define SFLADD_ELEMENT(_sm, _el) do { (_el)->nxt = (_sm)->elements; (_sm)->elements = (_el); } while(0)
+
+/* Format of a sample datagram */
+
+enum SFLDatagram_version {
+    SFLDATAGRAM_VERSION2 = 2,
+    SFLDATAGRAM_VERSION4 = 4,
+    SFLDATAGRAM_VERSION5 = 5
+};
+
+typedef struct _SFLSample_datagram_hdr {
+    u_int32_t datagram_version;      /* (enum SFLDatagram_version) = VERSION5 = 5 */
+    SFLAddress agent_address;        /* IP address of sampling agent */
+    u_int32_t sub_agent_id;          /* Used to distinguishing between datagram
+                                       streams from separate agent sub entities
+                                       within an device. */
+    u_int32_t sequence_number;       /* Incremented with each sample datagram
+                                       generated */
+    u_int32_t uptime;                /* Current time (in milliseconds since device
+                                       last booted). Should be set as close to
+                                       datagram transmission time as possible.*/
+    u_int32_t num_records;           /* Number of tag-len-val flow/counter records to follow */
+} SFLSample_datagram_hdr;
+
+#define SFL_MAX_DATAGRAM_SIZE 1500
+#define SFL_MIN_DATAGRAM_SIZE 200
+#define SFL_DEFAULT_DATAGRAM_SIZE 1400
+
+#define SFL_DATA_PAD 400
+
+#endif /* SFLOW_H */
diff --git a/lib/sflow_agent.c b/lib/sflow_agent.c
new file mode 100644 (file)
index 0000000..4b25c25
--- /dev/null
@@ -0,0 +1,492 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#include "sflow_api.h"
+
+static void * sflAlloc(SFLAgent *agent, size_t bytes);
+static void sflFree(SFLAgent *agent, void *obj);
+static void sfl_agent_jumpTableAdd(SFLAgent *agent, SFLSampler *sampler);
+static void sfl_agent_jumpTableRemove(SFLAgent *agent, SFLSampler *sampler);
+
+/*________________--------------------------__________________
+  ________________    sfl_agent_init        __________________
+  ----------------__________________________------------------
+*/
+
+void sfl_agent_init(SFLAgent *agent,
+                   SFLAddress *myIP, /* IP address of this agent in net byte order */
+                   u_int32_t subId,  /* agent_sub_id */
+                   time_t bootTime,  /* agent boot time */
+                   time_t now,       /* time now */
+                   void *magic,      /* ptr to pass back in logging and alloc fns */
+                   allocFn_t allocFn,
+                   freeFn_t freeFn,
+                   errorFn_t errorFn,
+                   sendFn_t sendFn)
+{
+    /* first clear everything */
+    memset(agent, 0, sizeof(*agent));
+    /* now copy in the parameters */
+    agent->myIP = *myIP; /* structure copy */
+    agent->subId = subId;
+    agent->bootTime = bootTime;
+    agent->now = now;
+    agent->magic = magic;
+    agent->allocFn = allocFn;
+    agent->freeFn = freeFn;
+    agent->errorFn = errorFn;
+    agent->sendFn = sendFn;
+
+#ifdef SFLOW_DO_SOCKET  
+    if(sendFn == NULL) {
+       /* open the socket - really need one for v4 and another for v6? */
+       if((agent->receiverSocket4 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1)
+           sfl_agent_sysError(agent, "agent", "IPv4 socket open failed");
+       if((agent->receiverSocket6 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP)) == -1)
+           sfl_agent_sysError(agent, "agent", "IPv6 socket open failed");
+    }
+#endif
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_release       __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_agent_release(SFLAgent *agent)
+{
+    /* release and free the samplers, pollers and receivers */
+    SFLSampler *sm = agent->samplers;
+    SFLPoller *pl = agent->pollers;
+    SFLReceiver *rcv = agent->receivers;
+
+    for(; sm != NULL; ) {
+       SFLSampler *nextSm = sm->nxt;
+       sflFree(agent, sm);
+       sm = nextSm;
+    }
+    agent->samplers = NULL;
+
+    for(; pl != NULL; ) {
+       SFLPoller *nextPl = pl->nxt;
+       sflFree(agent, pl);
+       pl = nextPl;
+    }
+    agent->pollers = NULL;
+
+    for(; rcv != NULL; ) {
+       SFLReceiver *nextRcv = rcv->nxt;
+       sflFree(agent, rcv);
+       rcv = nextRcv;
+    }
+    agent->receivers = NULL;
+
+#ifdef SFLOW_DO_SOCKET
+    /* close the sockets */
+    if(agent->receiverSocket4 > 0) close(agent->receiverSocket4);
+    if(agent->receiverSocket6 > 0) close(agent->receiverSocket6);
+#endif
+}
+
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_set_*         __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_agent_set_agentAddress(SFLAgent *agent, SFLAddress *addr)
+{
+    if(addr && memcmp(addr, &agent->myIP, sizeof(agent->myIP)) != 0) {
+       /* change of address */
+       agent->myIP = *addr; /* structure copy */
+       /* reset sequence numbers here? */
+    }
+}
+
+void sfl_agent_set_agentSubId(SFLAgent *agent, u_int32_t subId)
+{
+    if(subId != agent->subId) {
+       /* change of subId */
+       agent->subId = subId;
+       /* reset sequence numbers here? */
+    }
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_tick          __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_agent_tick(SFLAgent *agent, time_t now)
+{
+    SFLReceiver *rcv = agent->receivers;
+    SFLSampler *sm = agent->samplers;
+    SFLPoller *pl = agent->pollers;
+    agent->now = now;
+    /* receivers use ticks to flush send data */
+    for(; rcv != NULL; rcv = rcv->nxt) sfl_receiver_tick(rcv, now);
+    /* samplers use ticks to decide when they are sampling too fast */
+    for(; sm != NULL; sm = sm->nxt) sfl_sampler_tick(sm, now);
+    /* pollers use ticks to decide when to ask for counters */
+    for(; pl != NULL; pl = pl->nxt) sfl_poller_tick(pl, now);
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_addReceiver   __________________
+  -----------------___________________________------------------
+*/
+
+SFLReceiver *sfl_agent_addReceiver(SFLAgent *agent)
+{
+    SFLReceiver *rcv = (SFLReceiver *)sflAlloc(agent, sizeof(SFLReceiver));
+    sfl_receiver_init(rcv, agent);
+    /* add to end of list - to preserve the receiver index numbers for existing receivers */
+    {
+       SFLReceiver *r, *prev = NULL;
+       for(r = agent->receivers; r != NULL; prev = r, r = r->nxt);
+       if(prev) prev->nxt = rcv;
+       else agent->receivers = rcv;
+       rcv->nxt = NULL;
+    }
+    return rcv;
+}
+
+/*_________________---------------------------__________________
+  _________________     sfl_dsi_compare       __________________
+  -----------------___________________________------------------
+
+  Note that if there is a mixture of ds_classes for this agent, then
+  the simple numeric comparison may not be correct - the sort order (for
+  the purposes of the SNMP MIB) should really be determined by the OID
+  that these numeric ds_class numbers are a shorthand for.  For example,
+  ds_class == 0 means ifIndex, which is the oid "1.3.6.1.2.1.2.2.1"
+*/
+
+static inline int sfl_dsi_compare(SFLDataSource_instance *pdsi1, SFLDataSource_instance *pdsi2) {
+    /* could have used just memcmp(),  but not sure if that would
+       give the right answer on little-endian platforms. Safer to be explicit... */
+    int cmp = pdsi2->ds_class - pdsi1->ds_class;
+    if(cmp == 0) cmp = pdsi2->ds_index - pdsi1->ds_index;
+    if(cmp == 0) cmp = pdsi2->ds_instance - pdsi1->ds_instance;
+    return cmp;
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_addSampler    __________________
+  -----------------___________________________------------------
+*/
+
+SFLSampler *sfl_agent_addSampler(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* Keep the list sorted. */
+    SFLSampler *prev = NULL, *sm = agent->samplers;
+    for(; sm != NULL; prev = sm, sm = sm->nxt) {
+       int64_t cmp = sfl_dsi_compare(pdsi, &sm->dsi);
+       if(cmp == 0) return sm;  /* found - return existing one */
+       if(cmp < 0) break;       /* insert here */
+    }
+    /* either we found the insert point, or reached the end of the list...*/
+    
+    {
+       SFLSampler *newsm = (SFLSampler *)sflAlloc(agent, sizeof(SFLSampler));
+       sfl_sampler_init(newsm, agent, pdsi);
+       if(prev) prev->nxt = newsm;
+       else agent->samplers = newsm;
+       newsm->nxt = sm;
+       
+       /* see if we should go in the ifIndex jumpTable */
+       if(SFL_DS_CLASS(newsm->dsi) == 0) {
+           SFLSampler *test = sfl_agent_getSamplerByIfIndex(agent, SFL_DS_INDEX(newsm->dsi));
+           if(test && (SFL_DS_INSTANCE(newsm->dsi) < SFL_DS_INSTANCE(test->dsi))) {
+               /* replace with this new one because it has a lower ds_instance number */
+               sfl_agent_jumpTableRemove(agent, test);
+               test = NULL;
+           }
+           if(test == NULL) sfl_agent_jumpTableAdd(agent, newsm);
+       }
+       return newsm;
+    }
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_agent_addPoller     __________________
+  -----------------___________________________------------------
+*/
+
+SFLPoller *sfl_agent_addPoller(SFLAgent *agent,
+                              SFLDataSource_instance *pdsi,
+                              void *magic,         /* ptr to pass back in getCountersFn() */
+                              getCountersFn_t getCountersFn)
+{
+    /* keep the list sorted */
+    SFLPoller *prev = NULL, *pl = agent->pollers;
+    for(; pl != NULL; prev = pl, pl = pl->nxt) {
+       int64_t cmp = sfl_dsi_compare(pdsi, &pl->dsi);
+       if(cmp == 0) return pl;  /* found - return existing one */
+       if(cmp < 0) break;       /* insert here */
+    }
+    /* either we found the insert point, or reached the end of the list... */
+    {
+       SFLPoller *newpl = (SFLPoller *)sflAlloc(agent, sizeof(SFLPoller));
+       sfl_poller_init(newpl, agent, pdsi, magic, getCountersFn);
+       if(prev) prev->nxt = newpl;
+       else agent->pollers = newpl;
+       newpl->nxt = pl;
+       return newpl;
+    }
+}
+
+/*_________________---------------------------__________________
+  _________________  sfl_agent_removeSampler  __________________
+  -----------------___________________________------------------
+*/
+
+int sfl_agent_removeSampler(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* find it, unlink it and free it */
+    SFLSampler *prev = NULL, *sm = agent->samplers;
+    for(; sm != NULL; prev = sm, sm = sm->nxt) {
+       if(sfl_dsi_compare(pdsi, &sm->dsi) == 0) {
+           if(prev == NULL) agent->samplers = sm->nxt;
+           else prev->nxt = sm->nxt;
+           sfl_agent_jumpTableRemove(agent, sm);
+           sflFree(agent, sm);
+           return 1;
+       }
+    }
+    /* not found */
+    return 0;
+}
+
+/*_________________---------------------------__________________
+  _________________  sfl_agent_removePoller   __________________
+  -----------------___________________________------------------
+*/
+
+int sfl_agent_removePoller(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* find it, unlink it and free it */
+    SFLPoller *prev = NULL, *pl = agent->pollers;
+    for(; pl != NULL; prev = pl, pl = pl->nxt) {
+       if(sfl_dsi_compare(pdsi, &pl->dsi) == 0) {
+           if(prev == NULL) agent->pollers = pl->nxt;
+           else prev->nxt = pl->nxt;
+           sflFree(agent, pl);
+           return 1;
+       }
+    }
+    /* not found */
+    return 0;
+}
+
+/*_________________--------------------------------__________________
+  _________________  sfl_agent_jumpTableAdd        __________________
+  -----------------________________________________------------------
+*/
+
+static void sfl_agent_jumpTableAdd(SFLAgent *agent, SFLSampler *sampler)
+{
+    u_int32_t hashIndex = SFL_DS_INDEX(sampler->dsi) % SFL_HASHTABLE_SIZ;
+    sampler->hash_nxt = agent->jumpTable[hashIndex];
+    agent->jumpTable[hashIndex] = sampler;
+}
+
+/*_________________--------------------------------__________________
+  _________________  sfl_agent_jumpTableRemove     __________________
+  -----------------________________________________------------------
+*/
+
+static void sfl_agent_jumpTableRemove(SFLAgent *agent, SFLSampler *sampler)
+{
+    u_int32_t hashIndex = SFL_DS_INDEX(sampler->dsi) % SFL_HASHTABLE_SIZ;
+    SFLSampler *search = agent->jumpTable[hashIndex], *prev = NULL;
+    for( ; search != NULL; prev = search, search = search->hash_nxt) if(search == sampler) break;
+    if(search) {
+       // found - unlink
+       if(prev) prev->hash_nxt = search->hash_nxt;
+       else agent->jumpTable[hashIndex] = search->hash_nxt;
+       search->hash_nxt = NULL;
+    }
+}
+
+/*_________________--------------------------------__________________
+  _________________  sfl_agent_getSamplerByIfIndex __________________
+  -----------------________________________________------------------
+  fast lookup (pointers cached in hash table).  If there are multiple
+  sampler instances for a given ifIndex, then this fn will return
+  the one with the lowest instance number.  Since the samplers
+  list is sorted, this means the other instances will be accesible
+  by following the sampler->nxt pointer (until the ds_class
+  or ds_index changes).  This is helpful if you need to offer
+  the same flowSample to multiple samplers.
+*/
+
+SFLSampler *sfl_agent_getSamplerByIfIndex(SFLAgent *agent, u_int32_t ifIndex)
+{
+    SFLSampler *search = agent->jumpTable[ifIndex % SFL_HASHTABLE_SIZ];
+    for( ; search != NULL; search = search->hash_nxt) if(SFL_DS_INDEX(search->dsi) == ifIndex) break;
+    return search;
+}
+
+/*_________________---------------------------__________________
+  _________________  sfl_agent_getSampler     __________________
+  -----------------___________________________------------------
+*/
+
+SFLSampler *sfl_agent_getSampler(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* find it and return it */
+    SFLSampler *sm = agent->samplers;
+    for(; sm != NULL; sm = sm->nxt)
+       if(sfl_dsi_compare(pdsi, &sm->dsi) == 0) return sm;
+    /* not found */
+    return NULL;
+}
+
+/*_________________---------------------------__________________
+  _________________  sfl_agent_getPoller      __________________
+  -----------------___________________________------------------
+*/
+
+SFLPoller *sfl_agent_getPoller(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* find it and return it */
+    SFLPoller *pl = agent->pollers;
+    for(; pl != NULL; pl = pl->nxt)
+       if(sfl_dsi_compare(pdsi, &pl->dsi) == 0) return pl;
+    /* not found */
+    return NULL;
+}
+
+/*_________________---------------------------__________________
+  _________________  sfl_agent_getReceiver    __________________
+  -----------------___________________________------------------
+*/
+
+SFLReceiver *sfl_agent_getReceiver(SFLAgent *agent, u_int32_t receiverIndex)
+{
+    u_int32_t rcvIdx = 0;
+    SFLReceiver *rcv = agent->receivers;
+    for(;  rcv != NULL; rcv = rcv->nxt)
+       if(receiverIndex == ++rcvIdx) return rcv;
+
+    /* not found - ran off the end of the table */
+    return NULL;
+}
+
+/*_________________---------------------------__________________
+  _________________ sfl_agent_getNextSampler  __________________
+  -----------------___________________________------------------
+*/
+
+SFLSampler *sfl_agent_getNextSampler(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* return the one lexograpically just after it - assume they are sorted
+       correctly according to the lexographical ordering of the object ids */
+    SFLSampler *sm = sfl_agent_getSampler(agent, pdsi);
+    return sm ? sm->nxt : NULL;
+}
+
+/*_________________---------------------------__________________
+  _________________ sfl_agent_getNextPoller   __________________
+  -----------------___________________________------------------
+*/
+
+SFLPoller *sfl_agent_getNextPoller(SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* return the one lexograpically just after it - assume they are sorted
+       correctly according to the lexographical ordering of the object ids */
+    SFLPoller *pl = sfl_agent_getPoller(agent, pdsi);
+    return pl ? pl->nxt : NULL;
+}
+
+/*_________________---------------------------__________________
+  _________________ sfl_agent_getNextReceiver __________________
+  -----------------___________________________------------------
+*/
+
+SFLReceiver *sfl_agent_getNextReceiver(SFLAgent *agent, u_int32_t receiverIndex)
+{
+    return sfl_agent_getReceiver(agent, receiverIndex + 1);
+}
+
+
+/*_________________---------------------------__________________
+  _________________ sfl_agent_resetReceiver   __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_agent_resetReceiver(SFLAgent *agent, SFLReceiver *receiver)
+{
+    /* tell samplers and pollers to stop sending to this receiver */
+    /* first get his receiverIndex */
+    u_int32_t rcvIdx = 0;
+    SFLReceiver *rcv = agent->receivers;
+    for(; rcv != NULL; rcv = rcv->nxt) {
+       rcvIdx++; /* thanks to Diego Valverde for pointing out this bugfix */
+       if(rcv == receiver) {
+           /* now tell anyone that is using it to stop */
+           SFLSampler *sm = agent->samplers;
+           SFLPoller *pl = agent->pollers;
+
+           for(; sm != NULL; sm = sm->nxt)
+               if(sfl_sampler_get_sFlowFsReceiver(sm) == rcvIdx) sfl_sampler_set_sFlowFsReceiver(sm, 0);
+      
+           for(; pl != NULL; pl = pl->nxt)
+               if(sfl_poller_get_sFlowCpReceiver(pl) == rcvIdx) sfl_poller_set_sFlowCpReceiver(pl, 0);
+
+           break;
+       }
+    }
+}
+  
+/*_________________---------------------------__________________
+  _________________     sfl_agent_error       __________________
+  -----------------___________________________------------------
+*/
+#define MAX_ERRMSG_LEN 1000
+
+void sfl_agent_error(SFLAgent *agent, char *modName, char *msg)
+{
+    char errm[MAX_ERRMSG_LEN];
+    sprintf(errm, "sfl_agent_error: %s: %s\n", modName, msg);
+    if(agent->errorFn) (*agent->errorFn)(agent->magic, agent, errm);
+    else {
+       fprintf(stderr, "%s\n", errm);
+       fflush(stderr);
+    }
+}
+
+/*_________________---------------------------__________________
+  _________________     sfl_agent_sysError    __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_agent_sysError(SFLAgent *agent, char *modName, char *msg)
+{
+    char errm[MAX_ERRMSG_LEN];
+    sprintf(errm, "sfl_agent_sysError: %s: %s (errno = %d - %s)\n", modName, msg, errno, strerror(errno));
+    if(agent->errorFn) (*agent->errorFn)(agent->magic, agent, errm);
+    else {
+       fprintf(stderr, "%s\n", errm);
+       fflush(stderr);
+    }
+}
+
+
+/*_________________---------------------------__________________
+  _________________       alloc and free      __________________
+  -----------------___________________________------------------
+*/
+
+static void * sflAlloc(SFLAgent *agent, size_t bytes)
+{
+    if(agent->allocFn) return (*agent->allocFn)(agent->magic, agent, bytes);
+    else return SFL_ALLOC(bytes);
+}
+
+static void sflFree(SFLAgent *agent, void *obj)
+{
+    if(agent->freeFn) (*agent->freeFn)(agent->magic, agent, obj);
+    else SFL_FREE(obj);
+}
diff --git a/lib/sflow_api.h b/lib/sflow_api.h
new file mode 100644 (file)
index 0000000..be8d997
--- /dev/null
@@ -0,0 +1,340 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#ifndef SFLOW_API_H
+#define SFLOW_API_H 1
+
+/* define SFLOW_DO_SOCKET to 1 if you want the agent
+   to send the packets itself, otherwise set the sendFn
+   callback in sfl_agent_init.*/
+/* #define SFLOW_DO_SOCKET */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <arpa/inet.h> /* for htonl */
+
+#ifdef SFLOW_DO_SOCKET
+#include <sys/socket.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#endif
+
+#include "sflow.h"
+  
+/* define SFLOW_SOFTWARE_SAMPLING to 1 if you need to use the
+   sfl_sampler_takeSample routine and give it every packet */
+/* #define SFLOW_SOFTWARE_SAMPLING */
+
+/*
+  uncomment this preprocessor flag  (or compile with -DSFL_USE_32BIT_INDEX)
+  if your ds_index numbers can ever be >= 2^30-1 (i.e. >= 0x3FFFFFFF)
+*/
+/* #define SFL_USE_32BIT_INDEX */
+
+
+/* Used to combine ds_class, ds_index and instance into
+   a single 64-bit number like this:
+   __________________________________
+   | cls|  index     |   instance     |
+   ----------------------------------
+   but now is opened up to a 12-byte struct to ensure
+   that ds_index has a full 32-bit field, and to make
+   accessing the components simpler. The macros have
+   the same behavior as before, so this change should
+   be transparent.  The only difference is that these
+   objects are now passed around by reference instead
+   of by value, and the comparison is done using a fn.
+*/
+
+typedef struct _SFLDataSource_instance {
+    u_int32_t ds_class;
+    u_int32_t ds_index;
+    u_int32_t ds_instance;
+} SFLDataSource_instance;
+
+#ifdef SFL_USE_32BIT_INDEX
+#define SFL_FLOW_SAMPLE_TYPE SFLFlow_sample_expanded
+#define SFL_COUNTERS_SAMPLE_TYPE SFLCounters_sample_expanded
+#else
+#define SFL_FLOW_SAMPLE_TYPE SFLFlow_sample
+#define SFL_COUNTERS_SAMPLE_TYPE SFLCounters_sample
+/* if index numbers are not going to use all 32 bits, then we can use
+   the more compact encoding, with the dataSource class and index merged */
+#define SFL_DS_DATASOURCE(dsi) (((dsi).ds_class << 24) + (dsi).ds_index)
+#endif
+
+#define SFL_DS_INSTANCE(dsi) (dsi).ds_instance
+#define SFL_DS_CLASS(dsi) (dsi).ds_class
+#define SFL_DS_INDEX(dsi) (dsi).ds_index
+#define SFL_DS_SET(dsi,clss,indx,inst)         \
+    do {                                       \
+       (dsi).ds_class = (clss);                \
+       (dsi).ds_index = (indx);                \
+       (dsi).ds_instance = (inst);             \
+    } while(0)
+
+typedef struct _SFLSampleCollector {
+    u_int32_t data[(SFL_MAX_DATAGRAM_SIZE + SFL_DATA_PAD) / sizeof(u_int32_t)];
+    u_int32_t *datap; /* packet fill pointer */
+    u_int32_t pktlen; /* accumulated size */
+    u_int32_t packetSeqNo;
+    u_int32_t numSamples;
+} SFLSampleCollector;
+
+struct _SFLAgent;  /* forward decl */
+
+typedef struct _SFLReceiver {
+    struct _SFLReceiver *nxt;
+    /* MIB fields */
+    char *sFlowRcvrOwner;
+    time_t sFlowRcvrTimeout;
+    u_int32_t sFlowRcvrMaximumDatagramSize;
+    SFLAddress sFlowRcvrAddress;
+    u_int32_t sFlowRcvrPort;
+    u_int32_t sFlowRcvrDatagramVersion;
+    /* public fields */
+    struct _SFLAgent *agent;    /* pointer to my agent */
+    /* private fields */
+    SFLSampleCollector sampleCollector;
+#ifdef SFLOW_DO_SOCKET
+    struct sockaddr_in receiver4;
+    struct sockaddr_in6 receiver6;
+#endif
+} SFLReceiver;
+
+typedef struct _SFLSampler {
+    /* for linked list */
+    struct _SFLSampler *nxt;
+    /* for hash lookup table */
+    struct _SFLSampler *hash_nxt;
+    /* MIB fields */
+    SFLDataSource_instance dsi;
+    u_int32_t sFlowFsReceiver;
+    u_int32_t sFlowFsPacketSamplingRate;
+    u_int32_t sFlowFsMaximumHeaderSize;
+    /* public fields */
+    struct _SFLAgent *agent; /* pointer to my agent */
+    /* private fields */
+    SFLReceiver *myReceiver;
+    u_int32_t skip;
+    u_int32_t samplePool;
+    u_int32_t flowSampleSeqNo;
+    /* rate checking */
+    u_int32_t samplesThisTick;
+    u_int32_t samplesLastTick;
+    u_int32_t backoffThreshold;
+} SFLSampler;
+
+/* declare */
+struct _SFLPoller;
+
+typedef void (*getCountersFn_t)(void *magic,                   /* callback to get counters */
+                               struct _SFLPoller *sampler,    /* called with self */
+                               SFL_COUNTERS_SAMPLE_TYPE *cs); /* struct to fill in */
+
+typedef struct _SFLPoller {
+    /* for linked list */
+    struct _SFLPoller *nxt;
+    /* MIB fields */
+    SFLDataSource_instance dsi;
+    u_int32_t sFlowCpReceiver;
+    time_t sFlowCpInterval;
+    /* public fields */
+    struct _SFLAgent *agent; /* pointer to my agent */
+    void *magic;             /* ptr to pass back in getCountersFn() */
+    getCountersFn_t getCountersFn;
+    u_int32_t bridgePort; /* port number local to bridge */
+    /* private fields */
+    SFLReceiver *myReceiver;
+    time_t countersCountdown;
+    u_int32_t countersSampleSeqNo;
+} SFLPoller;
+
+typedef void *(*allocFn_t)(void *magic,               /* callback to allocate space on heap */
+                          struct _SFLAgent *agent,   /* called with self */
+                          size_t bytes);             /* bytes requested */
+
+typedef int (*freeFn_t)(void *magic,                  /* callback to free space on heap */
+                       struct _SFLAgent *agent,      /* called with self */
+                       void *obj);                   /* obj to free */
+
+typedef void (*errorFn_t)(void *magic,                /* callback to log error message */
+                         struct _SFLAgent *agent,    /* called with self */
+                         char *msg);                 /* error message */
+
+typedef void (*sendFn_t)(void *magic,                 /* optional override fn to send packet */
+                        struct _SFLAgent *agent,
+                        SFLReceiver *receiver,
+                        u_char *pkt,
+                        u_int32_t pktLen);
+
+
+/* prime numbers are good for hash tables */
+#define SFL_HASHTABLE_SIZ 199
+
+typedef struct _SFLAgent {
+    SFLSampler *jumpTable[SFL_HASHTABLE_SIZ]; /* fast lookup table for samplers (by ifIndex) */
+    SFLSampler *samplers;   /* the list of samplers */
+    SFLPoller  *pollers;    /* the list of samplers */
+    SFLReceiver *receivers; /* the array of receivers */
+    time_t bootTime;        /* time when we booted or started */
+    time_t now;             /* time now */
+    SFLAddress myIP;        /* IP address of this node */
+    u_int32_t subId;        /* sub_agent_id */
+    void *magic;            /* ptr to pass back in logging and alloc fns */
+    allocFn_t allocFn;
+    freeFn_t freeFn;
+    errorFn_t errorFn;
+    sendFn_t sendFn;
+#ifdef SFLOW_DO_SOCKET
+    int receiverSocket4;
+    int receiverSocket6;
+#endif
+} SFLAgent;
+
+/* call this at the start with a newly created agent */
+void sfl_agent_init(SFLAgent *agent,
+                   SFLAddress *myIP, /* IP address of this agent */
+                   u_int32_t subId,  /* agent_sub_id */
+                   time_t bootTime,  /* agent boot time */
+                   time_t now,       /* time now */
+                   void *magic,      /* ptr to pass back in logging and alloc fns */
+                   allocFn_t allocFn,
+                   freeFn_t freeFn,
+                   errorFn_t errorFn,
+                   sendFn_t sendFn);
+
+/* call this to create samplers */
+SFLSampler *sfl_agent_addSampler(SFLAgent *agent, SFLDataSource_instance *pdsi);
+
+/* call this to create pollers */
+SFLPoller *sfl_agent_addPoller(SFLAgent *agent,
+                              SFLDataSource_instance *pdsi,
+                              void *magic, /* ptr to pass back in getCountersFn() */
+                              getCountersFn_t getCountersFn);
+
+/* call this to create receivers */
+SFLReceiver *sfl_agent_addReceiver(SFLAgent *agent);
+
+/* call this to remove samplers */
+int sfl_agent_removeSampler(SFLAgent *agent, SFLDataSource_instance *pdsi);
+
+/* call this to remove pollers */
+int sfl_agent_removePoller(SFLAgent *agent, SFLDataSource_instance *pdsi);
+
+/* note: receivers should not be removed. Typically the receivers
+   list will be created at init time and never changed */
+
+/* call these fns to retrieve sampler, poller or receiver (e.g. for SNMP GET or GETNEXT operation) */
+SFLSampler  *sfl_agent_getSampler(SFLAgent *agent, SFLDataSource_instance *pdsi);
+SFLSampler  *sfl_agent_getNextSampler(SFLAgent *agent, SFLDataSource_instance *pdsi);
+SFLPoller   *sfl_agent_getPoller(SFLAgent *agent, SFLDataSource_instance *pdsi);
+SFLPoller   *sfl_agent_getNextPoller(SFLAgent *agent, SFLDataSource_instance *pdsi);
+SFLReceiver *sfl_agent_getReceiver(SFLAgent *agent, u_int32_t receiverIndex);
+SFLReceiver *sfl_agent_getNextReceiver(SFLAgent *agent, u_int32_t receiverIndex);
+
+/* jump table access - for performance */
+SFLSampler *sfl_agent_getSamplerByIfIndex(SFLAgent *agent, u_int32_t ifIndex);
+
+/* call these functions to GET and SET MIB values */
+
+/* receiver */
+char *      sfl_receiver_get_sFlowRcvrOwner(SFLReceiver *receiver);
+void        sfl_receiver_set_sFlowRcvrOwner(SFLReceiver *receiver, char *sFlowRcvrOwner);
+time_t      sfl_receiver_get_sFlowRcvrTimeout(SFLReceiver *receiver);
+void        sfl_receiver_set_sFlowRcvrTimeout(SFLReceiver *receiver, time_t sFlowRcvrTimeout);
+u_int32_t   sfl_receiver_get_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver);
+void        sfl_receiver_set_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver, u_int32_t sFlowRcvrMaximumDatagramSize);
+SFLAddress *sfl_receiver_get_sFlowRcvrAddress(SFLReceiver *receiver);
+void        sfl_receiver_set_sFlowRcvrAddress(SFLReceiver *receiver, SFLAddress *sFlowRcvrAddress);
+u_int32_t   sfl_receiver_get_sFlowRcvrPort(SFLReceiver *receiver);
+void        sfl_receiver_set_sFlowRcvrPort(SFLReceiver *receiver, u_int32_t sFlowRcvrPort);
+/* sampler */
+u_int32_t sfl_sampler_get_sFlowFsReceiver(SFLSampler *sampler);
+void      sfl_sampler_set_sFlowFsReceiver(SFLSampler *sampler, u_int32_t sFlowFsReceiver);
+u_int32_t sfl_sampler_get_sFlowFsPacketSamplingRate(SFLSampler *sampler);
+void      sfl_sampler_set_sFlowFsPacketSamplingRate(SFLSampler *sampler, u_int32_t sFlowFsPacketSamplingRate);
+u_int32_t sfl_sampler_get_sFlowFsMaximumHeaderSize(SFLSampler *sampler);
+void      sfl_sampler_set_sFlowFsMaximumHeaderSize(SFLSampler *sampler, u_int32_t sFlowFsMaximumHeaderSize);
+u_int32_t sfl_sampler_get_samplesLastTick(SFLSampler *sampler);
+/* poller */
+u_int32_t sfl_poller_get_sFlowCpReceiver(SFLPoller *poller);
+void      sfl_poller_set_sFlowCpReceiver(SFLPoller *poller, u_int32_t sFlowCpReceiver);
+u_int32_t sfl_poller_get_sFlowCpInterval(SFLPoller *poller);
+void      sfl_poller_set_sFlowCpInterval(SFLPoller *poller, u_int32_t sFlowCpInterval);
+
+/* fns to set the sflow agent address or sub-id */
+void sfl_agent_set_agentAddress(SFLAgent *agent, SFLAddress *addr);
+void sfl_agent_set_agentSubId(SFLAgent *agent, u_int32_t subId);
+
+/* The poller may need a separate number to reference the local bridge port
+   to get counters if it is not the same as the global ifIndex */
+void sfl_poller_set_bridgePort(SFLPoller *poller, u_int32_t port_no);
+u_int32_t sfl_poller_get_bridgePort(SFLPoller *poller);
+
+/* call this to indicate a discontinuity with a counter like samplePool so that the
+   sflow collector will ignore the next delta */
+void sfl_sampler_resetFlowSeqNo(SFLSampler *sampler);
+
+/* call this to indicate a discontinuity with one or more of the counters so that the
+   sflow collector will ignore the next delta */
+void sfl_poller_resetCountersSeqNo(SFLPoller *poller);
+
+#ifdef SFLOW_SOFTWARE_SAMLING
+/* software sampling: call this with every packet - returns non-zero if the packet
+   should be sampled (in which case you then call sfl_sampler_writeFlowSample()) */
+int sfl_sampler_takeSample(SFLSampler *sampler);
+#endif
+
+/* call this to set a maximum samples-per-second threshold. If the sampler reaches this
+   threshold it will automatically back off the sampling rate. A value of 0 disables the
+   mechanism */
+void sfl_sampler_set_backoffThreshold(SFLSampler *sampler, u_int32_t samplesPerSecond);
+u_int32_t sfl_sampler_get_backoffThreshold(SFLSampler *sampler);
+
+/* call this once per second (N.B. not on interrupt stack i.e. not hard real-time) */
+void sfl_agent_tick(SFLAgent *agent, time_t now);
+
+/* call this with each flow sample */
+void sfl_sampler_writeFlowSample(SFLSampler *sampler, SFL_FLOW_SAMPLE_TYPE *fs);
+
+/* call this to push counters samples (usually done in the getCountersFn callback) */
+void sfl_poller_writeCountersSample(SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs);
+
+/* call this to deallocate resources */
+void sfl_agent_release(SFLAgent *agent);
+
+
+/* internal fns */
+
+void sfl_receiver_init(SFLReceiver *receiver, SFLAgent *agent);
+void sfl_sampler_init(SFLSampler *sampler, SFLAgent *agent, SFLDataSource_instance *pdsi);
+void sfl_poller_init(SFLPoller *poller, SFLAgent *agent, SFLDataSource_instance *pdsi, void *magic, getCountersFn_t getCountersFn);
+
+
+void sfl_receiver_tick(SFLReceiver *receiver, time_t now);
+void sfl_poller_tick(SFLPoller *poller, time_t now);
+void sfl_sampler_tick(SFLSampler *sampler, time_t now);
+
+int sfl_receiver_writeFlowSample(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs);
+int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs);
+
+void sfl_agent_resetReceiver(SFLAgent *agent, SFLReceiver *receiver);
+
+void sfl_agent_error(SFLAgent *agent, char *modName, char *msg);
+void sfl_agent_sysError(SFLAgent *agent, char *modName, char *msg);
+
+u_int32_t sfl_receiver_samplePacketsSent(SFLReceiver *receiver);
+
+#define SFL_ALLOC malloc
+#define SFL_FREE free
+
+#endif /* SFLOW_API_H */
+
+
diff --git a/lib/sflow_poller.c b/lib/sflow_poller.c
new file mode 100644 (file)
index 0000000..ffd09d3
--- /dev/null
@@ -0,0 +1,142 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#include "sflow_api.h"
+
+/*_________________--------------------------__________________
+  _________________    sfl_poller_init       __________________
+  -----------------__________________________------------------
+*/
+
+void sfl_poller_init(SFLPoller *poller,
+                    SFLAgent *agent,
+                    SFLDataSource_instance *pdsi,
+                    void *magic,         /* ptr to pass back in getCountersFn() */
+                    getCountersFn_t getCountersFn)
+{
+    /* copy the dsi in case it points to poller->dsi, which we are about to clear */
+    SFLDataSource_instance dsi = *pdsi;
+
+    /* preserve the *nxt pointer too, in case we are resetting this poller and it is
+       already part of the agent's linked list (thanks to Matt Woodly for pointing this out) */
+    SFLPoller *nxtPtr = poller->nxt;
+
+    /* clear everything */
+    memset(poller, 0, sizeof(*poller));
+  
+    /* restore the linked list ptr */
+    poller->nxt = nxtPtr;
+  
+    /* now copy in the parameters */
+    poller->agent = agent;
+    poller->dsi = dsi; /* structure copy */
+    poller->magic = magic;
+    poller->getCountersFn = getCountersFn;
+}
+
+/*_________________--------------------------__________________
+  _________________       reset              __________________
+  -----------------__________________________------------------
+*/
+
+static void reset(SFLPoller *poller)
+{
+    SFLDataSource_instance dsi = poller->dsi;
+    sfl_poller_init(poller, poller->agent, &dsi, poller->magic, poller->getCountersFn);
+}
+
+/*_________________---------------------------__________________
+  _________________      MIB access           __________________
+  -----------------___________________________------------------
+*/
+u_int32_t sfl_poller_get_sFlowCpReceiver(SFLPoller *poller) {
+    return poller->sFlowCpReceiver;
+}
+
+void sfl_poller_set_sFlowCpReceiver(SFLPoller *poller, u_int32_t sFlowCpReceiver) {
+    poller->sFlowCpReceiver = sFlowCpReceiver;
+    if(sFlowCpReceiver == 0) reset(poller);
+    else {
+       /* retrieve and cache a direct pointer to my receiver */
+       poller->myReceiver = sfl_agent_getReceiver(poller->agent, poller->sFlowCpReceiver);
+    }
+}
+
+u_int32_t sfl_poller_get_sFlowCpInterval(SFLPoller *poller) {
+    return poller->sFlowCpInterval;
+}
+
+void sfl_poller_set_sFlowCpInterval(SFLPoller *poller, u_int32_t sFlowCpInterval) {
+    poller->sFlowCpInterval = sFlowCpInterval;
+    /* Set the countersCountdown to be a randomly selected value between 1 and
+       sFlowCpInterval. That way the counter polling would be desynchronised
+       (on a 200-port switch, polling all the counters in one second could be harmful). */
+    poller->countersCountdown = 1 + (random() % sFlowCpInterval);
+}
+
+/*_________________---------------------------------__________________
+  _________________          bridge port            __________________
+  -----------------_________________________________------------------
+  May need a separate number to reference the local bridge port
+  to get counters if it is not the same as the global ifIndex.
+*/
+
+void sfl_poller_set_bridgePort(SFLPoller *poller, u_int32_t port_no) {
+    poller->bridgePort = port_no;
+}
+
+u_int32_t sfl_poller_get_bridgePort(SFLPoller *poller) {
+    return poller->bridgePort;
+}
+
+/*_________________---------------------------------__________________
+  _________________   sequence number reset         __________________
+  -----------------_________________________________------------------
+  Used to indicate a counter discontinuity
+  so that the sflow collector will know to ignore the next delta.
+*/
+void sfl_poller_resetCountersSeqNo(SFLPoller *poller) {  poller->countersSampleSeqNo = 0; }
+
+/*_________________---------------------------__________________
+  _________________    sfl_poller_tick        __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_poller_tick(SFLPoller *poller, time_t now)
+{
+    if(poller->countersCountdown == 0) return; /* counters retrieval was not enabled */
+    if(poller->sFlowCpReceiver == 0) return;
+
+    if(--poller->countersCountdown == 0) {
+       if(poller->getCountersFn != NULL) {
+           /* call out for counters */
+           SFL_COUNTERS_SAMPLE_TYPE cs;
+           memset(&cs, 0, sizeof(cs));
+           poller->getCountersFn(poller->magic, poller, &cs);
+           /* this countersFn is expected to fill in some counter block elements
+              and then call sfl_poller_writeCountersSample(poller, &cs); */
+       }
+       /* reset the countdown */
+       poller->countersCountdown = poller->sFlowCpInterval;
+    }
+}
+
+/*_________________---------------------------------__________________
+  _________________ sfl_poller_writeCountersSample  __________________
+  -----------------_________________________________------------------
+*/
+
+void sfl_poller_writeCountersSample(SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs)
+{
+    /* fill in the rest of the header fields, and send to the receiver */
+    cs->sequence_number = ++poller->countersSampleSeqNo;
+#ifdef SFL_USE_32BIT_INDEX
+    cs->ds_class = SFL_DS_CLASS(poller->dsi);
+    cs->ds_index = SFL_DS_INDEX(poller->dsi);
+#else
+    cs->source_id = SFL_DS_DATASOURCE(poller->dsi);
+#endif
+    /* sent to my receiver */
+    if(poller->myReceiver) sfl_receiver_writeCountersSample(poller->myReceiver, cs);
+}
+
diff --git a/lib/sflow_receiver.c b/lib/sflow_receiver.c
new file mode 100644 (file)
index 0000000..7fccab3
--- /dev/null
@@ -0,0 +1,832 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#include <assert.h>
+#include "sflow_api.h"
+
+static void resetSampleCollector(SFLReceiver *receiver);
+static void sendSample(SFLReceiver *receiver);
+static void sflError(SFLReceiver *receiver, char *errm);
+inline static void putNet32(SFLReceiver *receiver, u_int32_t val);
+inline static void putAddress(SFLReceiver *receiver, SFLAddress *addr);
+#ifdef SFLOW_DO_SOCKET
+static void initSocket(SFLReceiver *receiver);
+#endif
+
+/*_________________--------------------------__________________
+  _________________    sfl_receiver_init     __________________
+  -----------------__________________________------------------
+*/
+
+void sfl_receiver_init(SFLReceiver *receiver, SFLAgent *agent)
+{
+    /* first clear everything */
+    memset(receiver, 0, sizeof(*receiver));
+
+    /* now copy in the parameters */
+    receiver->agent = agent;
+
+    /* set defaults */
+    receiver->sFlowRcvrMaximumDatagramSize = SFL_DEFAULT_DATAGRAM_SIZE;
+    receiver->sFlowRcvrPort = SFL_DEFAULT_COLLECTOR_PORT;
+
+#ifdef SFLOW_DO_SOCKET
+    /* initialize the socket address */
+    initSocket(receiver);
+#endif
+
+    /* preset some of the header fields */
+    receiver->sampleCollector.datap = receiver->sampleCollector.data;
+    putNet32(receiver, SFLDATAGRAM_VERSION5);
+    putAddress(receiver, &agent->myIP);
+    putNet32(receiver, agent->subId);
+
+    /* prepare to receive the first sample */
+    resetSampleCollector(receiver);
+}
+
+/*_________________---------------------------__________________
+  _________________      reset                __________________
+  -----------------___________________________------------------
+
+  called on timeout, or when owner string is cleared
+*/
+
+static void reset(SFLReceiver *receiver) {
+    // ask agent to tell samplers and pollers to stop sending samples
+    sfl_agent_resetReceiver(receiver->agent, receiver);
+    // reinitialize
+    sfl_receiver_init(receiver, receiver->agent);
+}
+
+#ifdef SFLOW_DO_SOCKET
+/*_________________---------------------------__________________
+  _________________      initSocket           __________________
+  -----------------___________________________------------------
+*/
+
+static void initSocket(SFLReceiver *receiver) {
+    if(receiver->sFlowRcvrAddress.type == SFLADDRESSTYPE_IP_V6) {
+       struct sockaddr_in6 *sa6 = &receiver->receiver6;
+       sa6->sin6_port = htons((u_int16_t)receiver->sFlowRcvrPort);
+       sa6->sin6_family = AF_INET6;
+       sa6->sin6_addr = receiver->sFlowRcvrAddress.address.ip_v6;
+    }
+    else {
+       struct sockaddr_in *sa4 = &receiver->receiver4;
+       sa4->sin_port = htons((u_int16_t)receiver->sFlowRcvrPort);
+       sa4->sin_family = AF_INET;
+       sa4->sin_addr = receiver->sFlowRcvrAddress.address.ip_v4;
+    }
+}
+#endif
+
+/*_________________----------------------------------------_____________
+  _________________          MIB Vars                      _____________
+  -----------------________________________________________-------------
+*/
+
+char * sfl_receiver_get_sFlowRcvrOwner(SFLReceiver *receiver) {
+    return receiver->sFlowRcvrOwner;
+}
+void sfl_receiver_set_sFlowRcvrOwner(SFLReceiver *receiver, char *sFlowRcvrOwner) {
+    receiver->sFlowRcvrOwner = sFlowRcvrOwner;
+    if(sFlowRcvrOwner == NULL || sFlowRcvrOwner[0] == '\0') {
+       // reset condition! owner string was cleared
+       reset(receiver);
+    }
+}
+time_t sfl_receiver_get_sFlowRcvrTimeout(SFLReceiver *receiver) {
+    return receiver->sFlowRcvrTimeout;
+}
+void sfl_receiver_set_sFlowRcvrTimeout(SFLReceiver *receiver, time_t sFlowRcvrTimeout) {
+    receiver->sFlowRcvrTimeout =sFlowRcvrTimeout;
+} 
+u_int32_t sfl_receiver_get_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver) {
+    return receiver->sFlowRcvrMaximumDatagramSize;
+}
+void sfl_receiver_set_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver, u_int32_t sFlowRcvrMaximumDatagramSize) {
+    u_int32_t mdz = sFlowRcvrMaximumDatagramSize;
+    if(mdz < SFL_MIN_DATAGRAM_SIZE) mdz = SFL_MIN_DATAGRAM_SIZE;
+    receiver->sFlowRcvrMaximumDatagramSize = mdz;
+}
+SFLAddress *sfl_receiver_get_sFlowRcvrAddress(SFLReceiver *receiver) {
+    return &receiver->sFlowRcvrAddress;
+}
+void sfl_receiver_set_sFlowRcvrAddress(SFLReceiver *receiver, SFLAddress *sFlowRcvrAddress) {
+    if(sFlowRcvrAddress) receiver->sFlowRcvrAddress = *sFlowRcvrAddress; // structure copy
+#ifdef SFLOW_DO_SOCKET
+    initSocket(receiver);
+#endif
+}
+u_int32_t sfl_receiver_get_sFlowRcvrPort(SFLReceiver *receiver) {
+    return receiver->sFlowRcvrPort;
+}
+void sfl_receiver_set_sFlowRcvrPort(SFLReceiver *receiver, u_int32_t sFlowRcvrPort) {
+    receiver->sFlowRcvrPort = sFlowRcvrPort;
+    // update the socket structure
+#ifdef SFLOW_DO_SOCKET
+    initSocket(receiver);
+#endif
+}
+
+/*_________________---------------------------__________________
+  _________________   sfl_receiver_tick       __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_receiver_tick(SFLReceiver *receiver, time_t now)
+{
+    // if there are any samples to send, flush them now
+    if(receiver->sampleCollector.numSamples > 0) sendSample(receiver);
+    // check the timeout
+    if(receiver->sFlowRcvrTimeout && (u_int32_t)receiver->sFlowRcvrTimeout != 0xFFFFFFFF) {
+       // count down one tick and reset if we reach 0
+       if(--receiver->sFlowRcvrTimeout == 0) reset(receiver);
+    }
+}
+
+/*_________________-----------------------------__________________
+  _________________   receiver write utilities  __________________
+  -----------------_____________________________------------------
+*/
+inline static void put32(SFLReceiver *receiver, u_int32_t val)
+{
+    *receiver->sampleCollector.datap++ = val;
+}
+
+inline static void putNet32(SFLReceiver *receiver, u_int32_t val)
+{
+    *receiver->sampleCollector.datap++ = htonl(val);
+}
+
+inline static void putNet32_run(SFLReceiver *receiver, void *obj, size_t quads)
+{
+    u_int32_t *from = (u_int32_t *)obj;
+    while(quads--) putNet32(receiver, *from++);
+}
+
+inline static void putNet64(SFLReceiver *receiver, u_int64_t val64)
+{
+    u_int32_t *firstQuadPtr = receiver->sampleCollector.datap;
+    // first copy the bytes in
+    memcpy((u_char *)firstQuadPtr, &val64, 8);
+    if(htonl(1) != 1) {
+       // swap the bytes, and reverse the quads too
+       u_int32_t tmp = *receiver->sampleCollector.datap++;
+       *firstQuadPtr = htonl(*receiver->sampleCollector.datap);
+       *receiver->sampleCollector.datap++ = htonl(tmp);
+    }
+    else receiver->sampleCollector.datap += 2;
+}
+
+inline static void put128(SFLReceiver *receiver, u_char *val)
+{
+    memcpy(receiver->sampleCollector.datap, val, 16);
+    receiver->sampleCollector.datap += 4;
+}
+
+inline static void putString(SFLReceiver *receiver, SFLString *s)
+{
+    putNet32(receiver, s->len);
+    memcpy(receiver->sampleCollector.datap, s->str, s->len);
+    receiver->sampleCollector.datap += (s->len + 3) / 4; /* pad to 4-byte boundary */
+}
+
+inline static u_int32_t stringEncodingLength(SFLString *s) {
+    // answer in bytes,  so remember to mulitply by 4 after rounding up to nearest 4-byte boundary
+    return 4 + (((s->len + 3) / 4) * 4);
+}
+
+inline static void putAddress(SFLReceiver *receiver, SFLAddress *addr)
+{
+    // encode unspecified addresses as IPV4:0.0.0.0 - or should we flag this as an error?
+    if(addr->type == 0) {
+       putNet32(receiver, SFLADDRESSTYPE_IP_V4);
+       put32(receiver, 0);
+    }
+    else {
+       putNet32(receiver, addr->type);
+       if(addr->type == SFLADDRESSTYPE_IP_V4) put32(receiver, addr->address.ip_v4.addr);
+       else put128(receiver, addr->address.ip_v6.addr);
+    }
+}
+
+inline static u_int32_t addressEncodingLength(SFLAddress *addr) {
+    return (addr->type == SFLADDRESSTYPE_IP_V6) ? 20 : 8;  // type + address (unspecified == IPV4)
+}
+
+inline static void putMACAddress(SFLReceiver *receiver, u_int8_t *mac)
+{
+    memcpy(receiver->sampleCollector.datap, mac, 6);
+    receiver->sampleCollector.datap += 2;
+}
+
+inline static void putSwitch(SFLReceiver *receiver, SFLExtended_switch *sw)
+{
+    putNet32(receiver, sw->src_vlan);
+    putNet32(receiver, sw->src_priority);
+    putNet32(receiver, sw->dst_vlan);
+    putNet32(receiver, sw->dst_priority);
+}
+
+inline static void putRouter(SFLReceiver *receiver, SFLExtended_router *router)
+{
+    putAddress(receiver, &router->nexthop);
+    putNet32(receiver, router->src_mask);
+    putNet32(receiver, router->dst_mask);
+}
+
+inline static u_int32_t routerEncodingLength(SFLExtended_router *router) {
+    return addressEncodingLength(&router->nexthop) + 8;
+}
+
+inline static void putGateway(SFLReceiver *receiver, SFLExtended_gateway *gw)
+{
+    putAddress(receiver, &gw->nexthop);
+    putNet32(receiver, gw->as);
+    putNet32(receiver, gw->src_as);
+    putNet32(receiver, gw->src_peer_as);
+    putNet32(receiver, gw->dst_as_path_segments);
+    {
+       u_int32_t seg = 0;
+       for(; seg < gw->dst_as_path_segments; seg++) {
+           putNet32(receiver, gw->dst_as_path[seg].type);
+           putNet32(receiver, gw->dst_as_path[seg].length);
+           putNet32_run(receiver, gw->dst_as_path[seg].as.seq, gw->dst_as_path[seg].length);
+       }
+    }
+    putNet32(receiver, gw->communities_length);
+    putNet32_run(receiver, gw->communities, gw->communities_length);
+    putNet32(receiver, gw->localpref);
+}
+
+inline static u_int32_t gatewayEncodingLength(SFLExtended_gateway *gw) {
+    u_int32_t elemSiz = addressEncodingLength(&gw->nexthop);
+    u_int32_t seg = 0;
+    elemSiz += 16; // as, src_as, src_peer_as, dst_as_path_segments 
+    for(; seg < gw->dst_as_path_segments; seg++) {
+       elemSiz += 8; // type, length 
+       elemSiz += 4 * gw->dst_as_path[seg].length; // set/seq bytes
+    }
+    elemSiz += 4; // communities_length
+    elemSiz += 4 * gw->communities_length; // communities
+    elemSiz += 4; // localpref
+    return elemSiz;
+}
+
+inline static void putUser(SFLReceiver *receiver, SFLExtended_user *user)
+{
+    putNet32(receiver, user->src_charset);
+    putString(receiver, &user->src_user);
+    putNet32(receiver, user->dst_charset);
+    putString(receiver, &user->dst_user);
+}
+
+inline static u_int32_t userEncodingLength(SFLExtended_user *user) {
+    return 4
+       + stringEncodingLength(&user->src_user)
+       + 4
+       + stringEncodingLength(&user->dst_user);
+}
+
+inline static void putUrl(SFLReceiver *receiver, SFLExtended_url *url)
+{
+    putNet32(receiver, url->direction);
+    putString(receiver, &url->url);
+    putString(receiver, &url->host);
+}
+
+inline static u_int32_t urlEncodingLength(SFLExtended_url *url) {
+    return 4
+       + stringEncodingLength(&url->url)
+       + stringEncodingLength(&url->host);
+}
+
+inline static void putLabelStack(SFLReceiver *receiver, SFLLabelStack *labelStack)
+{
+    putNet32(receiver, labelStack->depth);
+    putNet32_run(receiver, labelStack->stack, labelStack->depth);
+}
+
+inline static u_int32_t labelStackEncodingLength(SFLLabelStack *labelStack) {
+    return 4 + (4 * labelStack->depth);
+}
+
+inline static void putMpls(SFLReceiver *receiver, SFLExtended_mpls *mpls)
+{
+    putAddress(receiver, &mpls->nextHop);
+    putLabelStack(receiver, &mpls->in_stack);
+    putLabelStack(receiver, &mpls->out_stack);
+}
+
+inline static u_int32_t mplsEncodingLength(SFLExtended_mpls *mpls) {
+    return addressEncodingLength(&mpls->nextHop)
+       + labelStackEncodingLength(&mpls->in_stack)
+       + labelStackEncodingLength(&mpls->out_stack);
+}
+
+inline static void putNat(SFLReceiver *receiver, SFLExtended_nat *nat)
+{
+    putAddress(receiver, &nat->src);
+    putAddress(receiver, &nat->dst);
+}
+
+inline static u_int32_t natEncodingLength(SFLExtended_nat *nat) {
+    return addressEncodingLength(&nat->src)
+       + addressEncodingLength(&nat->dst);
+}
+
+inline static void putMplsTunnel(SFLReceiver *receiver, SFLExtended_mpls_tunnel *tunnel)
+{
+    putString(receiver, &tunnel->tunnel_lsp_name);
+    putNet32(receiver, tunnel->tunnel_id);
+    putNet32(receiver, tunnel->tunnel_cos);
+}
+
+inline static u_int32_t mplsTunnelEncodingLength(SFLExtended_mpls_tunnel *tunnel) {
+    return stringEncodingLength(&tunnel->tunnel_lsp_name) + 8;
+}
+
+inline static void putMplsVc(SFLReceiver *receiver, SFLExtended_mpls_vc *vc)
+{
+    putString(receiver, &vc->vc_instance_name);
+    putNet32(receiver, vc->vll_vc_id);
+    putNet32(receiver, vc->vc_label_cos);
+}
+
+inline static u_int32_t mplsVcEncodingLength(SFLExtended_mpls_vc *vc) {
+    return stringEncodingLength( &vc->vc_instance_name) + 8;
+}
+
+inline static void putMplsFtn(SFLReceiver *receiver, SFLExtended_mpls_FTN *ftn)
+{
+    putString(receiver, &ftn->mplsFTNDescr);
+    putNet32(receiver, ftn->mplsFTNMask);
+}
+
+inline static u_int32_t mplsFtnEncodingLength(SFLExtended_mpls_FTN *ftn) {
+    return stringEncodingLength( &ftn->mplsFTNDescr) + 4;
+}
+
+inline static void putMplsLdpFec(SFLReceiver *receiver, SFLExtended_mpls_LDP_FEC *ldpfec)
+{
+    putNet32(receiver, ldpfec->mplsFecAddrPrefixLength);
+}
+
+inline static u_int32_t mplsLdpFecEncodingLength(SFLExtended_mpls_LDP_FEC *ldpfec) {
+    return 4;
+}
+
+inline static void putVlanTunnel(SFLReceiver *receiver, SFLExtended_vlan_tunnel *vlanTunnel)
+{
+    putLabelStack(receiver, &vlanTunnel->stack);
+}
+
+inline static u_int32_t vlanTunnelEncodingLength(SFLExtended_vlan_tunnel *vlanTunnel) {
+    return labelStackEncodingLength(&vlanTunnel->stack);
+}
+
+
+inline static void putGenericCounters(SFLReceiver *receiver, SFLIf_counters *counters)
+{
+    putNet32(receiver, counters->ifIndex);
+    putNet32(receiver, counters->ifType);
+    putNet64(receiver, counters->ifSpeed);
+    putNet32(receiver, counters->ifDirection);
+    putNet32(receiver, counters->ifStatus);
+    putNet64(receiver, counters->ifInOctets);
+    putNet32(receiver, counters->ifInUcastPkts);
+    putNet32(receiver, counters->ifInMulticastPkts);
+    putNet32(receiver, counters->ifInBroadcastPkts);
+    putNet32(receiver, counters->ifInDiscards);
+    putNet32(receiver, counters->ifInErrors);
+    putNet32(receiver, counters->ifInUnknownProtos);
+    putNet64(receiver, counters->ifOutOctets);
+    putNet32(receiver, counters->ifOutUcastPkts);
+    putNet32(receiver, counters->ifOutMulticastPkts);
+    putNet32(receiver, counters->ifOutBroadcastPkts);
+    putNet32(receiver, counters->ifOutDiscards);
+    putNet32(receiver, counters->ifOutErrors);
+    putNet32(receiver, counters->ifPromiscuousMode);
+}
+
+
+/*_________________-----------------------------__________________
+  _________________      computeFlowSampleSize  __________________
+  -----------------_____________________________------------------
+*/
+
+static int computeFlowSampleSize(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs)
+{
+    SFLFlow_sample_element *elem = fs->elements;
+#ifdef SFL_USE_32BIT_INDEX
+    u_int siz = 52; /* tag, length, sequence_number, ds_class, ds_index, sampling_rate,
+                      sample_pool, drops, inputFormat, input, outputFormat, output, number of elements */
+#else
+    u_int siz = 40; /* tag, length, sequence_number, source_id, sampling_rate,
+                      sample_pool, drops, input, output, number of elements */
+#endif
+
+    fs->num_elements = 0; /* we're going to count them again even if this was set by the client */
+    for(; elem != NULL; elem = elem->nxt) {
+       u_int elemSiz = 0;
+       fs->num_elements++;
+       siz += 8; /* tag, length */
+       switch(elem->tag) {
+       case SFLFLOW_HEADER:
+           elemSiz = 16; /* header_protocol, frame_length, stripped, header_length */
+           elemSiz += ((elem->flowType.header.header_length + 3) / 4) * 4; /* header, rounded up to nearest 4 bytes */
+           break;
+       case SFLFLOW_ETHERNET: elemSiz = sizeof(SFLSampled_ethernet); break;
+       case SFLFLOW_IPV4: elemSiz = sizeof(SFLSampled_ipv4); break;
+       case SFLFLOW_IPV6: elemSiz = sizeof(SFLSampled_ipv6); break;
+       case SFLFLOW_EX_SWITCH: elemSiz = sizeof(SFLExtended_switch); break;
+       case SFLFLOW_EX_ROUTER: elemSiz = routerEncodingLength(&elem->flowType.router); break;
+       case SFLFLOW_EX_GATEWAY: elemSiz = gatewayEncodingLength(&elem->flowType.gateway); break;
+       case SFLFLOW_EX_USER: elemSiz = userEncodingLength(&elem->flowType.user); break;
+       case SFLFLOW_EX_URL: elemSiz = urlEncodingLength(&elem->flowType.url); break;
+       case SFLFLOW_EX_MPLS: elemSiz = mplsEncodingLength(&elem->flowType.mpls); break;
+       case SFLFLOW_EX_NAT: elemSiz = natEncodingLength(&elem->flowType.nat); break;
+       case SFLFLOW_EX_MPLS_TUNNEL: elemSiz = mplsTunnelEncodingLength(&elem->flowType.mpls_tunnel); break;
+       case SFLFLOW_EX_MPLS_VC: elemSiz = mplsVcEncodingLength(&elem->flowType.mpls_vc); break;
+       case SFLFLOW_EX_MPLS_FTN: elemSiz = mplsFtnEncodingLength(&elem->flowType.mpls_ftn); break;
+       case SFLFLOW_EX_MPLS_LDP_FEC: elemSiz = mplsLdpFecEncodingLength(&elem->flowType.mpls_ldp_fec); break;
+       case SFLFLOW_EX_VLAN_TUNNEL: elemSiz = vlanTunnelEncodingLength(&elem->flowType.vlan_tunnel); break;
+       default:
+           sflError(receiver, "unexpected packet_data_tag");
+           return -1;
+           break;
+       }
+       // cache the element size, and accumulate it into the overall FlowSample size
+       elem->length = elemSiz;
+       siz += elemSiz;
+    }
+
+    return siz;
+}
+
+/*_________________-------------------------------__________________
+  _________________ sfl_receiver_writeFlowSample  __________________
+  -----------------_______________________________------------------
+*/
+
+int sfl_receiver_writeFlowSample(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs)
+{
+    int packedSize;
+    if(fs == NULL) return -1;
+    if((packedSize = computeFlowSampleSize(receiver, fs)) == -1) return -1;
+
+    // check in case this one sample alone is too big for the datagram
+    // in fact - if it is even half as big then we should ditch it. Very
+    // important to avoid overruning the packet buffer.
+    if(packedSize > (int)(receiver->sFlowRcvrMaximumDatagramSize / 2)) {
+       sflError(receiver, "flow sample too big for datagram");
+       return -1;
+    }
+
+    // if the sample pkt is full enough so that this sample might put
+    // it over the limit, then we should send it now before going on.
+    if((receiver->sampleCollector.pktlen + packedSize) >= receiver->sFlowRcvrMaximumDatagramSize)
+       sendSample(receiver);
+    
+    receiver->sampleCollector.numSamples++;
+
+#ifdef SFL_USE_32BIT_INDEX
+    putNet32(receiver, SFLFLOW_SAMPLE_EXPANDED);
+#else
+    putNet32(receiver, SFLFLOW_SAMPLE);
+#endif
+
+    putNet32(receiver, packedSize - 8); // don't include tag and len
+    putNet32(receiver, fs->sequence_number);
+
+#ifdef SFL_USE_32BIT_INDEX
+    putNet32(receiver, fs->ds_class);
+    putNet32(receiver, fs->ds_index);
+#else
+    putNet32(receiver, fs->source_id);
+#endif
+
+    putNet32(receiver, fs->sampling_rate);
+    putNet32(receiver, fs->sample_pool);
+    putNet32(receiver, fs->drops);
+
+#ifdef SFL_USE_32BIT_INDEX
+    putNet32(receiver, fs->inputFormat);
+    putNet32(receiver, fs->input);
+    putNet32(receiver, fs->outputFormat);
+    putNet32(receiver, fs->output);
+#else
+    putNet32(receiver, fs->input);
+    putNet32(receiver, fs->output);
+#endif
+
+    putNet32(receiver, fs->num_elements);
+
+    {
+       SFLFlow_sample_element *elem = fs->elements;
+       for(; elem != NULL; elem = elem->nxt) {
+           
+           putNet32(receiver, elem->tag);
+           putNet32(receiver, elem->length); // length cached in computeFlowSampleSize()
+           
+           switch(elem->tag) {
+           case SFLFLOW_HEADER:
+               putNet32(receiver, elem->flowType.header.header_protocol);
+               putNet32(receiver, elem->flowType.header.frame_length);
+               putNet32(receiver, elem->flowType.header.stripped);
+               putNet32(receiver, elem->flowType.header.header_length);
+               /* the header */
+               memcpy(receiver->sampleCollector.datap, elem->flowType.header.header_bytes, elem->flowType.header.header_length);
+               /* round up to multiple of 4 to preserve alignment */
+               receiver->sampleCollector.datap += ((elem->flowType.header.header_length + 3) / 4);
+               break;
+           case SFLFLOW_ETHERNET:
+               putNet32(receiver, elem->flowType.ethernet.eth_len);
+               putMACAddress(receiver, elem->flowType.ethernet.src_mac);
+               putMACAddress(receiver, elem->flowType.ethernet.dst_mac);
+               putNet32(receiver, elem->flowType.ethernet.eth_type);
+               break;
+           case SFLFLOW_IPV4:
+               putNet32(receiver, elem->flowType.ipv4.length);
+               putNet32(receiver, elem->flowType.ipv4.protocol);
+               put32(receiver, elem->flowType.ipv4.src_ip.addr);
+               put32(receiver, elem->flowType.ipv4.dst_ip.addr);
+               putNet32(receiver, elem->flowType.ipv4.src_port);
+               putNet32(receiver, elem->flowType.ipv4.dst_port);
+               putNet32(receiver, elem->flowType.ipv4.tcp_flags);
+               putNet32(receiver, elem->flowType.ipv4.tos);
+               break;
+           case SFLFLOW_IPV6:
+               putNet32(receiver, elem->flowType.ipv6.length);
+               putNet32(receiver, elem->flowType.ipv6.protocol);
+               put128(receiver, elem->flowType.ipv6.src_ip.addr);
+               put128(receiver, elem->flowType.ipv6.dst_ip.addr);
+               putNet32(receiver, elem->flowType.ipv6.src_port);
+               putNet32(receiver, elem->flowType.ipv6.dst_port);
+               putNet32(receiver, elem->flowType.ipv6.tcp_flags);
+               putNet32(receiver, elem->flowType.ipv6.priority);
+               break;
+           case SFLFLOW_EX_SWITCH: putSwitch(receiver, &elem->flowType.sw); break;
+           case SFLFLOW_EX_ROUTER: putRouter(receiver, &elem->flowType.router); break;
+           case SFLFLOW_EX_GATEWAY: putGateway(receiver, &elem->flowType.gateway); break;
+           case SFLFLOW_EX_USER: putUser(receiver, &elem->flowType.user); break;
+           case SFLFLOW_EX_URL: putUrl(receiver, &elem->flowType.url); break;
+           case SFLFLOW_EX_MPLS: putMpls(receiver, &elem->flowType.mpls); break;
+           case SFLFLOW_EX_NAT: putNat(receiver, &elem->flowType.nat); break;
+           case SFLFLOW_EX_MPLS_TUNNEL: putMplsTunnel(receiver, &elem->flowType.mpls_tunnel); break;
+           case SFLFLOW_EX_MPLS_VC: putMplsVc(receiver, &elem->flowType.mpls_vc); break;
+           case SFLFLOW_EX_MPLS_FTN: putMplsFtn(receiver, &elem->flowType.mpls_ftn); break;
+           case SFLFLOW_EX_MPLS_LDP_FEC: putMplsLdpFec(receiver, &elem->flowType.mpls_ldp_fec); break;
+           case SFLFLOW_EX_VLAN_TUNNEL: putVlanTunnel(receiver, &elem->flowType.vlan_tunnel); break;
+           default:
+               sflError(receiver, "unexpected packet_data_tag");
+               return -1;
+               break;
+           }
+       }
+    }
+
+    // sanity check
+    assert(((u_char *)receiver->sampleCollector.datap
+           - (u_char *)receiver->sampleCollector.data
+           - receiver->sampleCollector.pktlen)  == (u_int32_t)packedSize);
+
+    // update the pktlen
+    receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data;
+    return packedSize;
+}
+
+/*_________________-----------------------------__________________
+  _________________ computeCountersSampleSize   __________________
+  -----------------_____________________________------------------
+*/
+
+static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs)
+{
+    SFLCounters_sample_element *elem = cs->elements;
+#ifdef SFL_USE_32BIT_INDEX
+    u_int siz = 24; /* tag, length, sequence_number, ds_class, ds_index, number of elements */
+#else
+    u_int siz = 20; /* tag, length, sequence_number, source_id, number of elements */
+#endif
+
+    cs->num_elements = 0; /* we're going to count them again even if this was set by the client */
+    for(; elem != NULL; elem = elem->nxt) {
+       u_int elemSiz = 0;
+       cs->num_elements++;
+       siz += 8; /* tag, length */
+       switch(elem->tag) {
+       case SFLCOUNTERS_GENERIC:  elemSiz = sizeof(elem->counterBlock.generic); break;
+       case SFLCOUNTERS_ETHERNET: elemSiz = sizeof(elem->counterBlock.ethernet); break;
+       case SFLCOUNTERS_TOKENRING: elemSiz = sizeof(elem->counterBlock.tokenring); break;
+       case SFLCOUNTERS_VG: elemSiz = sizeof(elem->counterBlock.vg); break;
+       case SFLCOUNTERS_VLAN: elemSiz = sizeof(elem->counterBlock.vlan); break;
+       default:
+           sflError(receiver, "unexpected counters_tag");
+           return -1;
+           break;
+       }
+       // cache the element size, and accumulate it into the overall FlowSample size
+       elem->length = elemSiz;
+       siz += elemSiz;
+    }
+    return siz;
+}
+
+/*_________________----------------------------------__________________
+  _________________ sfl_receiver_writeCountersSample __________________
+  -----------------__________________________________------------------
+*/
+
+int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs)
+{
+    int packedSize;
+    if(cs == NULL) return -1;
+    // if the sample pkt is full enough so that this sample might put
+    // it over the limit, then we should send it now.
+    if((packedSize = computeCountersSampleSize(receiver, cs)) == -1) return -1;
+  
+    // check in case this one sample alone is too big for the datagram
+    // in fact - if it is even half as big then we should ditch it. Very
+    // important to avoid overruning the packet buffer.
+    if(packedSize > (int)(receiver->sFlowRcvrMaximumDatagramSize / 2)) {
+       sflError(receiver, "counters sample too big for datagram");
+       return -1;
+    }
+  
+    if((receiver->sampleCollector.pktlen + packedSize) >= receiver->sFlowRcvrMaximumDatagramSize)
+       sendSample(receiver);
+  
+    receiver->sampleCollector.numSamples++;
+  
+#ifdef SFL_USE_32BIT_INDEX
+    putNet32(receiver, SFLCOUNTERS_SAMPLE_EXPANDED);
+#else
+    putNet32(receiver, SFLCOUNTERS_SAMPLE);
+#endif
+
+    putNet32(receiver, packedSize - 8); // tag and length not included
+    putNet32(receiver, cs->sequence_number);
+
+#ifdef SFL_USE_32BIT_INDEX
+    putNet32(receiver, cs->ds_class);
+    putNet32(receiver, cs->ds_index);
+#else
+    putNet32(receiver, cs->source_id);
+#endif
+
+    putNet32(receiver, cs->num_elements);
+  
+    {
+       SFLCounters_sample_element *elem = cs->elements;
+       for(; elem != NULL; elem = elem->nxt) {
+           
+           putNet32(receiver, elem->tag);
+           putNet32(receiver, elem->length); // length cached in computeCountersSampleSize()
+           
+           switch(elem->tag) {
+           case SFLCOUNTERS_GENERIC:
+               putGenericCounters(receiver, &(elem->counterBlock.generic));
+               break;
+           case SFLCOUNTERS_ETHERNET:
+               // all these counters are 32-bit
+               putNet32_run(receiver, &elem->counterBlock.ethernet, sizeof(elem->counterBlock.ethernet) / 4);
+               break;
+           case SFLCOUNTERS_TOKENRING:
+               // all these counters are 32-bit
+               putNet32_run(receiver, &elem->counterBlock.tokenring, sizeof(elem->counterBlock.tokenring) / 4);
+               break;
+           case SFLCOUNTERS_VG:
+               // mixed sizes
+               putNet32(receiver, elem->counterBlock.vg.dot12InHighPriorityFrames);
+               putNet64(receiver, elem->counterBlock.vg.dot12InHighPriorityOctets);
+               putNet32(receiver, elem->counterBlock.vg.dot12InNormPriorityFrames);
+               putNet64(receiver, elem->counterBlock.vg.dot12InNormPriorityOctets);
+               putNet32(receiver, elem->counterBlock.vg.dot12InIPMErrors);
+               putNet32(receiver, elem->counterBlock.vg.dot12InOversizeFrameErrors);
+               putNet32(receiver, elem->counterBlock.vg.dot12InDataErrors);
+               putNet32(receiver, elem->counterBlock.vg.dot12InNullAddressedFrames);
+               putNet32(receiver, elem->counterBlock.vg.dot12OutHighPriorityFrames);
+               putNet64(receiver, elem->counterBlock.vg.dot12OutHighPriorityOctets);
+               putNet32(receiver, elem->counterBlock.vg.dot12TransitionIntoTrainings);
+               putNet64(receiver, elem->counterBlock.vg.dot12HCInHighPriorityOctets);
+               putNet64(receiver, elem->counterBlock.vg.dot12HCInNormPriorityOctets);
+               putNet64(receiver, elem->counterBlock.vg.dot12HCOutHighPriorityOctets);
+               break;
+           case SFLCOUNTERS_VLAN:
+               // mixed sizes
+               putNet32(receiver, elem->counterBlock.vlan.vlan_id);
+               putNet64(receiver, elem->counterBlock.vlan.octets);
+               putNet32(receiver, elem->counterBlock.vlan.ucastPkts);
+               putNet32(receiver, elem->counterBlock.vlan.multicastPkts);
+               putNet32(receiver, elem->counterBlock.vlan.broadcastPkts);
+               putNet32(receiver, elem->counterBlock.vlan.discards);
+               break;
+           default:
+               sflError(receiver, "unexpected counters_tag");
+               return -1;
+               break;
+           }
+       }
+    }
+    // sanity check
+    assert(((u_char *)receiver->sampleCollector.datap
+           - (u_char *)receiver->sampleCollector.data
+           - receiver->sampleCollector.pktlen)  == (u_int32_t)packedSize);
+
+    // update the pktlen
+    receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data;
+    return packedSize;
+}
+
+/*_________________---------------------------------__________________
+  _________________ sfl_receiver_samplePacketsSent  __________________
+  -----------------_________________________________------------------
+*/
+
+u_int32_t sfl_receiver_samplePacketsSent(SFLReceiver *receiver)
+{
+    return receiver->sampleCollector.packetSeqNo;
+}
+
+/*_________________---------------------------__________________
+  _________________     sendSample            __________________
+  -----------------___________________________------------------
+*/
+
+static void sendSample(SFLReceiver *receiver)
+{  
+    /* construct and send out the sample, then reset for the next one... */
+    /* first fill in the header with the latest values */
+    /* version, agent_address and sub_agent_id were pre-set. */
+    u_int32_t hdrIdx = (receiver->agent->myIP.type == SFLADDRESSTYPE_IP_V6) ? 7 : 4;
+    receiver->sampleCollector.data[hdrIdx++] = htonl(++receiver->sampleCollector.packetSeqNo); /* seq no */
+    receiver->sampleCollector.data[hdrIdx++] = htonl((receiver->agent->now - receiver->agent->bootTime) * 1000); /* uptime */
+    receiver->sampleCollector.data[hdrIdx++] = htonl(receiver->sampleCollector.numSamples); /* num samples */
+    /* send */
+    if(receiver->agent->sendFn) (*receiver->agent->sendFn)(receiver->agent->magic,
+                                                          receiver->agent,
+                                                          receiver,
+                                                          (u_char *)receiver->sampleCollector.data, 
+                                                          receiver->sampleCollector.pktlen);
+    else {
+#ifdef SFLOW_DO_SOCKET
+       /* send it myself */
+       if (receiver->sFlowRcvrAddress.type == SFLADDRESSTYPE_IP_V6) {
+           u_int32_t soclen = sizeof(struct sockaddr_in6);
+           int result = sendto(receiver->agent->receiverSocket6,
+                               receiver->sampleCollector.data,
+                               receiver->sampleCollector.pktlen,
+                               0,
+                               (struct sockaddr *)&receiver->receiver6,
+                               soclen);
+           if(result == -1 && errno != EINTR) sfl_agent_sysError(receiver->agent, "receiver", "IPv6 socket sendto error");
+           if(result == 0) sfl_agent_error(receiver->agent, "receiver", "IPv6 socket sendto returned 0");
+       }
+       else {
+           u_int32_t soclen = sizeof(struct sockaddr_in);
+           int result = sendto(receiver->agent->receiverSocket4,
+                               receiver->sampleCollector.data,
+                               receiver->sampleCollector.pktlen,
+                               0,
+                               (struct sockaddr *)&receiver->receiver4,
+                               soclen);
+           if(result == -1 && errno != EINTR) sfl_agent_sysError(receiver->agent, "receiver", "socket sendto error");
+           if(result == 0) sfl_agent_error(receiver->agent, "receiver", "socket sendto returned 0");
+       }
+#endif
+    }
+
+    /* reset for the next time */
+    resetSampleCollector(receiver);
+}
+
+/*_________________---------------------------__________________
+  _________________   resetSampleCollector    __________________
+  -----------------___________________________------------------
+*/
+
+static void resetSampleCollector(SFLReceiver *receiver)
+{
+    receiver->sampleCollector.pktlen = 0;
+    receiver->sampleCollector.numSamples = 0;
+    /* point the datap to just after the header */
+    receiver->sampleCollector.datap = (receiver->agent->myIP.type == SFLADDRESSTYPE_IP_V6) ?
+       (receiver->sampleCollector.data + 10) :  (receiver->sampleCollector.data + 7);
+
+    receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data;
+}
+
+/*_________________---------------------------__________________
+  _________________         sflError          __________________
+  -----------------___________________________------------------
+*/
+
+static void sflError(SFLReceiver *receiver, char *msg)
+{
+    sfl_agent_error(receiver->agent, "receiver", msg);
+    resetSampleCollector(receiver);
+}
diff --git a/lib/sflow_sampler.c b/lib/sflow_sampler.c
new file mode 100644 (file)
index 0000000..759b5a2
--- /dev/null
@@ -0,0 +1,183 @@
+/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of the InMon sFlow licence: */
+/* http://www.inmon.com/technology/sflowlicense.txt */
+
+#include "sflow_api.h"
+
+
+/*_________________--------------------------__________________
+  _________________   sfl_sampler_init       __________________
+  -----------------__________________________------------------
+*/
+
+void sfl_sampler_init(SFLSampler *sampler, SFLAgent *agent, SFLDataSource_instance *pdsi)
+{
+    /* copy the dsi in case it points to sampler->dsi, which we are about to clear.
+       (Thanks to Jagjit Choudray of Force 10 Networks for pointing out this bug) */
+    SFLDataSource_instance dsi = *pdsi;
+
+    /* preserve the *nxt pointer too, in case we are resetting this poller and it is
+       already part of the agent's linked list (thanks to Matt Woodly for pointing this out) */
+    SFLSampler *nxtPtr = sampler->nxt;
+  
+    /* clear everything */
+    memset(sampler, 0, sizeof(*sampler));
+  
+    /* restore the linked list ptr */
+    sampler->nxt = nxtPtr;
+  
+    /* now copy in the parameters */
+    sampler->agent = agent;
+    sampler->dsi = dsi;
+  
+    /* set defaults */
+    sampler->sFlowFsMaximumHeaderSize = SFL_DEFAULT_HEADER_SIZE;
+    sampler->sFlowFsPacketSamplingRate = SFL_DEFAULT_SAMPLING_RATE;
+}
+
+/*_________________--------------------------__________________
+  _________________       reset              __________________
+  -----------------__________________________------------------
+*/
+
+static void reset(SFLSampler *sampler)
+{
+    SFLDataSource_instance dsi = sampler->dsi;
+    sfl_sampler_init(sampler, sampler->agent, &dsi);
+}
+
+/*_________________---------------------------__________________
+  _________________      MIB access           __________________
+  -----------------___________________________------------------
+*/
+u_int32_t sfl_sampler_get_sFlowFsReceiver(SFLSampler *sampler) {
+    return sampler->sFlowFsReceiver;
+}
+void sfl_sampler_set_sFlowFsReceiver(SFLSampler *sampler, u_int32_t sFlowFsReceiver) {
+    sampler->sFlowFsReceiver = sFlowFsReceiver;
+    if(sFlowFsReceiver == 0) reset(sampler);
+    else {
+       /* retrieve and cache a direct pointer to my receiver */
+       sampler->myReceiver = sfl_agent_getReceiver(sampler->agent, sampler->sFlowFsReceiver);
+    }
+}
+u_int32_t sfl_sampler_get_sFlowFsPacketSamplingRate(SFLSampler *sampler) {
+    return sampler->sFlowFsPacketSamplingRate;
+}
+void sfl_sampler_set_sFlowFsPacketSamplingRate(SFLSampler *sampler, u_int32_t sFlowFsPacketSamplingRate) {
+    sampler->sFlowFsPacketSamplingRate = sFlowFsPacketSamplingRate;
+}
+u_int32_t sfl_sampler_get_sFlowFsMaximumHeaderSize(SFLSampler *sampler) {
+    return sampler->sFlowFsMaximumHeaderSize;
+}
+void sfl_sampler_set_sFlowFsMaximumHeaderSize(SFLSampler *sampler, u_int32_t sFlowFsMaximumHeaderSize) {
+    sampler->sFlowFsMaximumHeaderSize = sFlowFsMaximumHeaderSize;
+}
+
+/* call this to set a maximum samples-per-second threshold. If the sampler reaches this
+   threshold it will automatically back off the sampling rate. A value of 0 disables the
+   mechanism */
+void sfl_sampler_set_backoffThreshold(SFLSampler *sampler, u_int32_t samplesPerSecond) {
+    sampler->backoffThreshold = samplesPerSecond;
+}
+u_int32_t sfl_sampler_get_backoffThreshold(SFLSampler *sampler) {
+    return sampler->backoffThreshold;
+}
+u_int32_t sfl_sampler_get_samplesLastTick(SFLSampler *sampler) {
+    return sampler->samplesLastTick;
+}
+
+/*_________________---------------------------------__________________
+  _________________   sequence number reset         __________________
+  -----------------_________________________________------------------
+  Used by the agent to indicate a samplePool discontinuity
+  so that the sflow collector will know to ignore the next delta.
+*/
+void sfl_sampler_resetFlowSeqNo(SFLSampler *sampler) { sampler->flowSampleSeqNo = 0; }
+
+
+/*_________________---------------------------__________________
+  _________________    sfl_sampler_tick       __________________
+  -----------------___________________________------------------
+*/
+
+void sfl_sampler_tick(SFLSampler *sampler, time_t now)
+{
+    if(sampler->backoffThreshold && sampler->samplesThisTick > sampler->backoffThreshold) {
+       /* automatic backoff.  If using hardware sampling then this is where you have to
+        * call out to change the sampling rate and make sure that any other registers/variables
+        * that hold this value are updated.
+        */
+       sampler->sFlowFsPacketSamplingRate *= 2;
+    }
+    sampler->samplesLastTick = sampler->samplesThisTick;
+    sampler->samplesThisTick = 0;
+}
+
+
+
+/*_________________------------------------------__________________
+  _________________ sfl_sampler_writeFlowSample  __________________
+  -----------------______________________________------------------
+*/
+
+void sfl_sampler_writeFlowSample(SFLSampler *sampler, SFL_FLOW_SAMPLE_TYPE *fs)
+{
+    if(fs == NULL) return;
+    sampler->samplesThisTick++;
+    /* increment the sequence number */
+    fs->sequence_number = ++sampler->flowSampleSeqNo;
+    /* copy the other header fields in */
+#ifdef SFL_USE_32BIT_INDEX
+    fs->ds_class = SFL_DS_CLASS(sampler->dsi);
+    fs->ds_index = SFL_DS_INDEX(sampler->dsi);
+#else
+    fs->source_id = SFL_DS_DATASOURCE(sampler->dsi);
+#endif
+    /* the sampling rate may have been set already. */
+    if(fs->sampling_rate == 0) fs->sampling_rate = sampler->sFlowFsPacketSamplingRate;
+    /* the samplePool may be maintained upstream too. */
+    if( fs->sample_pool == 0) fs->sample_pool = sampler->samplePool;
+    /* sent to my receiver */
+    if(sampler->myReceiver) sfl_receiver_writeFlowSample(sampler->myReceiver, fs);
+}
+
+#ifdef SFLOW_SOFTWARE_SAMPLING
+
+/* ================== software sampling ========================*/
+
+/*_________________---------------------------__________________
+  _________________     nextRandomSkip        __________________
+  -----------------___________________________------------------
+*/
+
+inline static u_int32_t nextRandomSkip(u_int32_t mean)
+{
+    if(mean == 0 || mean == 1) return 1;
+    return ((random() % ((2 * mean) - 1)) + 1);
+} 
+
+/*_________________---------------------------__________________
+  _________________  sfl_sampler_takeSample   __________________
+  -----------------___________________________------------------
+*/
+
+int sfl_sampler_takeSample(SFLSampler *sampler)
+{
+    if(sampler->skip == 0) {
+       /* first time - seed the random number generator */
+       srandom(SFL_DS_INDEX(sampler->dsi));
+       sampler->skip = nextRandomSkip(sampler->sFlowFsPacketSamplingRate);
+    }
+
+    /* increment the samplePool */
+    sampler->samplePool++;
+
+    if(--sampler->skip == 0) {
+       /* reached zero. Set the next skip and return true. */
+       sampler->skip = nextRandomSkip(sampler->sFlowFsPacketSamplingRate);
+       return 1;
+    }
+    return 0;
+}
+
+#endif /* SFLOW_SOFTWARE_SAMPLING */
index 3c2a898..dd2a16e 100644 (file)
@@ -25,7 +25,12 @@ void stream_ssl_set_certificate_file(const char *file_name);
 void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap);
 void stream_ssl_set_peer_ca_cert_file(const char *file_name);
 
-#define STREAM_SSL_LONG_OPTIONS                     \
+/* Define the long options for SSL support.
+ *
+ * Note that the definition includes a final comma, and therefore a comma 
+ * must not be supplied when using the definition.  This is done so that 
+ * compilation succeeds whether or not HAVE_OPENSSL is defined. */
+#define STREAM_SSL_LONG_OPTIONS                      \
         {"private-key", required_argument, 0, 'p'}, \
         {"certificate", required_argument, 0, 'c'}, \
         {"ca-cert",     required_argument, 0, 'C'},
index 8506516..f012e10 100644 (file)
@@ -66,6 +66,7 @@ VLOG_MODULE(process)
 VLOG_MODULE(rconn)
 VLOG_MODULE(reconnect)
 VLOG_MODULE(rtnetlink)
+VLOG_MODULE(sflow)
 VLOG_MODULE(stp)
 VLOG_MODULE(stream_fd)
 VLOG_MODULE(stream_ssl)
index 0f05e53..0c99b49 100644 (file)
@@ -19,6 +19,8 @@ ofproto_libofproto_a_SOURCES = \
        ofproto/netflow.h \
        ofproto/ofproto.c \
        ofproto/ofproto.h \
+       ofproto/ofproto-sflow.c \
+       ofproto/ofproto-sflow.h \
        ofproto/pktbuf.c \
        ofproto/pktbuf.h \
        ofproto/pinsched.c \
index f7cb1db..4589f32 100644 (file)
@@ -121,3 +121,9 @@ collectors_send(const struct collectors *c, const void *payload, size_t n)
         }
     }
 }
+
+int
+collectors_count(const struct collectors *c)
+{
+    return c->n_fds;
+}
index a4abb63..ac70f37 100644 (file)
@@ -28,4 +28,6 @@ void collectors_destroy(struct collectors *);
 
 void collectors_send(const struct collectors *, const void *, size_t);
 
+int collectors_count(const struct collectors *);
+
 #endif /* collectors.h */
diff --git a/ofproto/ofproto-sflow.c b/ofproto/ofproto-sflow.c
new file mode 100644 (file)
index 0000000..1b659d1
--- /dev/null
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2009, 2010 InMon Corp.
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "ofproto-sflow.h"
+#include <inttypes.h>
+#include <stdlib.h>
+#include "collectors.h"
+#include "dpif.h"
+#include "compiler.h"
+#include "netdev.h"
+#include "ofpbuf.h"
+#include "ofproto.h"
+#include "poll-loop.h"
+#include "port-array.h"
+#include "sflow_api.h"
+#include "socket-util.h"
+#include "timeval.h"
+
+#define THIS_MODULE VLM_sflow
+#include "vlog.h"
+
+struct ofproto_sflow_port {
+    struct netdev *netdev;      /* Underlying network device, for stats. */
+    SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
+};
+
+struct ofproto_sflow {
+    struct ofproto *ofproto;
+    struct collectors *collectors;
+    SFLAgent *sflow_agent;
+    struct ofproto_sflow_options *options;
+    struct dpif *dpif;
+    time_t next_tick;
+    size_t n_flood, n_all;
+    struct port_array ports;    /* Indexed by ODP port number. */
+};
+
+#define RECEIVER_INDEX 1
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static bool
+nullable_string_is_equal(const char *a, const char *b)
+{
+    return a ? b && !strcmp(a, b) : !b;
+}
+
+static bool
+ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
+                            const struct ofproto_sflow_options *b)
+{
+    return (svec_equal(&a->targets, &b->targets)
+            && a->sampling_rate == b->sampling_rate
+            && a->polling_interval == b->polling_interval
+            && a->header_len == b->header_len
+            && a->sub_id == b->sub_id
+            && nullable_string_is_equal(a->agent_device, b->agent_device)
+            && nullable_string_is_equal(a->control_ip, b->control_ip));
+}
+
+static struct ofproto_sflow_options *
+ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
+{
+    struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
+    svec_clone(&new->targets, &old->targets);
+    new->agent_device = old->agent_device ? xstrdup(old->agent_device) : NULL;
+    new->control_ip = old->control_ip ? xstrdup(old->control_ip) : NULL;
+    return new;
+}
+
+static void
+ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
+{
+    if (options) {
+        svec_destroy(&options->targets);
+        free(options->agent_device);
+        free(options->control_ip);
+        free(options);
+    }
+}
+
+/* sFlow library callback to allocate memory. */
+static void *
+sflow_agent_alloc_cb(void *magic UNUSED, SFLAgent *agent UNUSED, size_t bytes)
+{
+    return calloc(1, bytes);
+}
+
+/* sFlow library callback to free memory. */
+static int
+sflow_agent_free_cb(void *magic UNUSED, SFLAgent *agent UNUSED, void *obj)
+{
+    free(obj);
+    return 0;
+}
+
+/* sFlow library callback to report error. */
+static void
+sflow_agent_error_cb(void *magic UNUSED, SFLAgent *agent UNUSED, char *msg)
+{
+    VLOG_WARN("sFlow agent error: %s", msg);
+}
+
+/* sFlow library callback to send datagram. */
+static void
+sflow_agent_send_packet_cb(void *os_, SFLAgent *agent UNUSED,
+                           SFLReceiver *receiver UNUSED, u_char *pkt,
+                           uint32_t pktLen)
+{
+    struct ofproto_sflow *os = os_;
+    collectors_send(os->collectors, pkt, pktLen);
+}
+
+static void
+sflow_agent_get_counters(void *os_, SFLPoller *poller,
+                         SFL_COUNTERS_SAMPLE_TYPE *cs)
+{
+    struct ofproto_sflow *os = os_;
+    SFLCounters_sample_element elem;
+    struct ofproto_sflow_port *osp;
+    SFLIf_counters *counters;
+    struct netdev_stats stats;
+    enum netdev_flags flags;
+    uint32_t current;
+
+    osp = port_array_get(&os->ports, poller->bridgePort);
+    if (!osp) {
+        return;
+    }
+
+    elem.tag = SFLCOUNTERS_GENERIC;
+    counters = &elem.counterBlock.generic;
+    counters->ifIndex = SFL_DS_INDEX(poller->dsi);
+    counters->ifType = 6;
+    if (!netdev_get_features(osp->netdev, &current, NULL, NULL, NULL)) {
+      /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
+         1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
+        counters->ifSpeed = netdev_features_to_bps(current);
+        counters->ifDirection = (netdev_features_is_full_duplex(current)
+                                 ? 1 : 2);
+    } else {
+        counters->ifSpeed = 100000000;
+        counters->ifDirection = 0;
+    }
+    if (!netdev_get_flags(osp->netdev, &flags) && flags & NETDEV_UP) {
+        bool carrier;
+
+        counters->ifStatus = 1; /* ifAdminStatus up. */
+        if (!netdev_get_carrier(osp->netdev, &carrier) && carrier) {
+            counters->ifStatus |= 2; /* ifOperStatus us. */
+        }
+    } else {
+        counters->ifStatus = 0;  /* Down. */
+    }
+
+    /* XXX
+       1. Is the multicast counter filled in?
+       2. Does the multicast counter include broadcasts?
+       3. Does the rx_packets counter include multicasts/broadcasts?
+    */
+    netdev_get_stats(osp->netdev, &stats);
+    counters->ifInOctets = stats.rx_bytes;
+    counters->ifInUcastPkts = stats.rx_packets;
+    counters->ifInMulticastPkts = stats.multicast;
+    counters->ifInBroadcastPkts = -1;
+    counters->ifInDiscards = stats.rx_dropped;
+    counters->ifInErrors = stats.rx_errors;
+    counters->ifInUnknownProtos = -1;
+    counters->ifOutOctets = stats.tx_bytes;
+    counters->ifOutUcastPkts = stats.tx_packets;
+    counters->ifOutMulticastPkts = -1;
+    counters->ifOutBroadcastPkts = -1;
+    counters->ifOutDiscards = stats.tx_dropped;
+    counters->ifOutErrors = stats.tx_errors;
+    counters->ifPromiscuousMode = 0;
+
+    SFLADD_ELEMENT(cs, &elem);
+    sfl_poller_writeCountersSample(poller, cs);
+}
+
+/* Obtains an address to use for the local sFlow agent and stores it into
+ * '*agent_addr'.  Returns true if successful, false on failure.
+ *
+ * The sFlow agent address should be a local IP address that is persistent and
+ * reachable over the network, if possible.  The IP address associated with
+ * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
+ * address used to talk to the controller. */
+static bool
+sflow_choose_agent_address(const char *agent_device, const char *control_ip,
+                           SFLAddress *agent_addr)
+{
+    struct in_addr in4;
+
+    memset(agent_addr, 0, sizeof *agent_addr);
+    agent_addr->type = SFLADDRESSTYPE_IP_V4;
+
+    if (agent_device) {
+        struct netdev *netdev;
+
+        if (!netdev_open_default(agent_device, &netdev)) {
+            int error = netdev_get_in4(netdev, &in4, NULL);
+            netdev_close(netdev);
+            if (!error) {
+                goto success;
+            }
+        }
+    }
+
+    if (control_ip && !lookup_ip(control_ip, &in4)) {
+        goto success;
+    }
+
+    VLOG_ERR("could not determine IP address for sFlow agent");
+    return false;
+
+success:
+    agent_addr->address.ip_v4.addr = in4.s_addr;
+    return true;
+}
+
+void
+ofproto_sflow_clear(struct ofproto_sflow *os)
+{
+    struct ofproto_sflow_port *osp;
+    unsigned int odp_port;
+
+    if (os->sflow_agent) {
+        sfl_agent_release(os->sflow_agent);
+        os->sflow_agent = NULL;
+    }
+    collectors_destroy(os->collectors);
+    os->collectors = NULL;
+    ofproto_sflow_options_destroy(os->options);
+    os->options = NULL;
+
+    PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
+        ofproto_sflow_del_port(os, odp_port);
+    }
+    port_array_clear(&os->ports);
+
+    /* Turn off sampling to save CPU cycles. */
+    dpif_set_sflow_probability(os->dpif, 0);
+}
+
+bool
+ofproto_sflow_is_enabled(const struct ofproto_sflow *os)
+{
+    return os->collectors != NULL;
+}
+
+struct ofproto_sflow *
+ofproto_sflow_create(struct dpif *dpif)
+{
+    struct ofproto_sflow *os;
+
+    os = xcalloc(1, sizeof *os);
+    os->dpif = dpif;
+    os->next_tick = time_now() + 1;
+    port_array_init(&os->ports);
+    return os;
+}
+
+void
+ofproto_sflow_destroy(struct ofproto_sflow *os)
+{
+    if (os) {
+        ofproto_sflow_clear(os);
+        port_array_destroy(&os->ports);
+        free(os);
+    }
+}
+
+static void
+ofproto_sflow_add_poller(struct ofproto_sflow *os,
+                         struct ofproto_sflow_port *osp, uint16_t odp_port)
+{
+    SFLPoller *poller = sfl_agent_addPoller(os->sflow_agent, &osp->dsi, os,
+                                            sflow_agent_get_counters);
+    sfl_poller_set_sFlowCpInterval(poller, os->options->polling_interval);
+    sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
+    sfl_poller_set_bridgePort(poller, odp_port);
+}
+
+static void
+ofproto_sflow_add_sampler(struct ofproto_sflow *os,
+                         struct ofproto_sflow_port *osp,
+                         u_int32_t sampling_rate, u_int32_t header_len)
+{
+    SFLSampler *sampler = sfl_agent_addSampler(os->sflow_agent, &osp->dsi);
+    sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, sampling_rate);
+    sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, header_len);
+    sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
+}
+
+void
+ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t odp_port,
+                       const char *netdev_name)
+{
+    struct ofproto_sflow_port *osp;
+    struct netdev *netdev;
+    uint32_t ifindex;
+    int error;
+
+    ofproto_sflow_del_port(os, odp_port);
+
+    /* Open network device. */
+    error = netdev_open_default(netdev_name, &netdev);
+    if (error) {
+        VLOG_WARN_RL(&rl, "failed to open network device \"%s\": %s",
+                     netdev_name, strerror(error));
+        return;
+    }
+
+    /* Add to table of ports. */
+    osp = xmalloc(sizeof *osp);
+    osp->netdev = netdev;
+    ifindex = netdev_get_ifindex(netdev);
+    if (ifindex <= 0) {
+        ifindex = (os->sflow_agent->subId << 16) + odp_port;
+    }
+    SFL_DS_SET(osp->dsi, 0, ifindex, 0);
+    port_array_set(&os->ports, odp_port, osp);
+
+    /* Add poller. */
+    if (os->sflow_agent) {
+        ofproto_sflow_add_poller(os, osp, odp_port);
+    }
+}
+
+void
+ofproto_sflow_del_port(struct ofproto_sflow *os, uint16_t odp_port)
+{
+    struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port);
+    if (osp) {
+        if (os->sflow_agent) {
+            sfl_agent_removePoller(os->sflow_agent, &osp->dsi);
+            sfl_agent_removeSampler(os->sflow_agent, &osp->dsi);
+        }
+        netdev_close(osp->netdev);
+        free(osp);
+        port_array_set(&os->ports, odp_port, NULL);
+    }
+}
+
+void
+ofproto_sflow_set_options(struct ofproto_sflow *os,
+                          const struct ofproto_sflow_options *options)
+{
+    struct ofproto_sflow_port *osp;
+    bool options_changed;
+    SFLReceiver *receiver;
+    unsigned int odp_port;
+    SFLAddress agentIP;
+    time_t now;
+    int error;
+
+    if (!options->targets.n || !options->sampling_rate) {
+        /* No point in doing any work if there are no targets or nothing to
+         * sample. */
+        ofproto_sflow_clear(os);
+        return;
+    }
+
+    options_changed = (!os->options
+                       || !ofproto_sflow_options_equal(options, os->options));
+
+    /* Configure collectors if options have changed or if we're shortchanged in
+     * collectors (which indicates that opening one or more of the configured
+     * collectors failed, so that we should retry). */
+    if (options_changed
+        || collectors_count(os->collectors) < options->targets.n) {
+        collectors_destroy(os->collectors);
+        error = collectors_create(&options->targets,
+                                  SFL_DEFAULT_COLLECTOR_PORT, &os->collectors);
+        if (os->collectors == NULL) {
+            VLOG_WARN_RL(&rl, "no collectors could be initialized, "
+                         "sFlow disabled");
+            ofproto_sflow_clear(os);
+            return;
+        }
+    }
+
+    /* Avoid reconfiguring if options didn't change. */
+    if (!options_changed) {
+        return;
+    }
+    ofproto_sflow_options_destroy(os->options);
+    os->options = ofproto_sflow_options_clone(options);
+
+    /* Choose agent IP address. */
+    if (!sflow_choose_agent_address(options->agent_device,
+                                    options->control_ip, &agentIP)) {
+        ofproto_sflow_clear(os);
+        return;
+    }
+
+    /* Create agent. */
+    VLOG_INFO("creating sFlow agent %d", options->sub_id);
+    if (os->sflow_agent) {
+        sfl_agent_release(os->sflow_agent);
+    }
+    os->sflow_agent = xcalloc(1, sizeof *os->sflow_agent);
+    now = time_now();
+    sfl_agent_init(os->sflow_agent,
+                   &agentIP,
+                   options->sub_id,
+                   now,         /* Boot time. */
+                   now,         /* Current time. */
+                   os,          /* Pointer supplied to callbacks. */
+                   sflow_agent_alloc_cb,
+                   sflow_agent_free_cb,
+                   sflow_agent_error_cb,
+                   sflow_agent_send_packet_cb);
+
+    receiver = sfl_agent_addReceiver(os->sflow_agent);
+    sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
+    sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
+
+    /* Set the sampling_rate down in the datapath. */
+    dpif_set_sflow_probability(os->dpif,
+                               MAX(1, UINT32_MAX / options->sampling_rate));
+
+    /* Add samplers and pollers for the currently known ports. */
+    PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
+        ofproto_sflow_add_sampler(os, osp,
+                                  options->sampling_rate, options->header_len);
+    }
+}
+
+static int
+ofproto_sflow_odp_port_to_ifindex(const struct ofproto_sflow *os,
+                                  uint16_t odp_port)
+{
+    struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port);
+    return osp ? SFL_DS_INDEX(osp->dsi) : 0;
+}
+
+void
+ofproto_sflow_received(struct ofproto_sflow *os, struct odp_msg *msg)
+{
+    SFL_FLOW_SAMPLE_TYPE fs;
+    SFLFlow_sample_element hdrElem;
+    SFLSampled_header *header;
+    SFLFlow_sample_element switchElem;
+    SFLSampler *sampler;
+    const struct odp_sflow_sample_header *hdr;
+    const union odp_action *actions;
+    struct ofpbuf payload;
+    size_t n_actions, n_outputs;
+    size_t min_size;
+    flow_t flow;
+    size_t i;
+
+    /* Get odp_sflow_sample_header. */
+    min_size = sizeof *msg + sizeof *hdr;
+    if (min_size > msg->length) {
+        VLOG_WARN_RL(&rl, "sFlow packet too small (%"PRIu32" < %zu)",
+                     msg->length, min_size);
+        return;
+    }
+    hdr = (const struct odp_sflow_sample_header *) (msg + 1);
+
+    /* Get actions. */
+    n_actions = hdr->n_actions;
+    if (n_actions > 65536 / sizeof *actions) {
+        VLOG_WARN_RL(&rl, "too many actions in sFlow packet (%zu > %zu)",
+                     65536 / sizeof *actions, n_actions);
+        return;
+    }
+    min_size += n_actions * sizeof *actions;
+    if (min_size > msg->length) {
+        VLOG_WARN_RL(&rl, "sFlow packet with %zu actions too small "
+                     "(%"PRIu32" < %zu)",
+                     n_actions, msg->length, min_size);
+        return;
+    }
+    actions = (const union odp_action *) (hdr + 1);
+
+    /* Get packet payload and extract flow. */
+    payload.data = (union odp_action *) (actions + n_actions);
+    payload.size = msg->length - min_size;
+    flow_extract(&payload, msg->port, &flow);
+
+    /* Build a flow sample */
+    memset(&fs, 0, sizeof fs);
+    fs.input = ofproto_sflow_odp_port_to_ifindex(os, msg->port);
+    fs.output = 0;              /* Filled in correctly below. */
+    fs.sample_pool = hdr->sample_pool;
+
+    /* We are going to give it to the sampler that represents this input port.
+     * By implementing "ingress-only" sampling like this we ensure that we
+     * never have to offer the same sample to more than one sampler. */
+    sampler = sfl_agent_getSamplerByIfIndex(os->sflow_agent, fs.input);
+    if (!sampler) {
+        VLOG_WARN_RL(&rl, "no sampler for input ifIndex (%"PRIu32")",
+                     fs.input);
+        return;
+    }
+
+    /* Sampled header. */
+    memset(&hdrElem, 0, sizeof hdrElem);
+    hdrElem.tag = SFLFLOW_HEADER;
+    header = &hdrElem.flowType.header;
+    header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
+    header->frame_length = payload.size;
+    header->stripped = 4; /* Ethernet FCS stripped off. */
+    header->header_length = MIN(payload.size,
+                                sampler->sFlowFsMaximumHeaderSize);
+    header->header_bytes = payload.data;
+
+    /* Add extended switch element. */
+    memset(&switchElem, 0, sizeof(switchElem));
+    switchElem.tag = SFLFLOW_EX_SWITCH;
+    switchElem.flowType.sw.src_vlan = ntohs(flow.dl_vlan);
+    switchElem.flowType.sw.src_priority = -1; /* XXX */
+    switchElem.flowType.sw.dst_vlan = -1;     /* Filled in correctly below. */
+    switchElem.flowType.sw.dst_priority = switchElem.flowType.sw.src_priority;
+
+    /* Figure out the output ports. */
+    n_outputs = 0;
+    for (i = 0; i < n_actions; i++) {
+        const union odp_action *a = &actions[i];
+
+        switch (a->type) {
+        case ODPAT_OUTPUT:
+            fs.output = ofproto_sflow_odp_port_to_ifindex(os, a->output.port);
+            n_outputs++;
+            break;
+
+        case ODPAT_OUTPUT_GROUP:
+            n_outputs += (a->output_group.group == DP_GROUP_FLOOD ? os->n_flood
+                          : a->output_group.group == DP_GROUP_ALL ? os->n_all
+                          : 0);
+            break;
+
+        case ODPAT_SET_VLAN_VID:
+            switchElem.flowType.sw.dst_vlan = ntohs(a->vlan_vid.vlan_vid);
+            break;
+
+        case ODPAT_SET_VLAN_PCP:
+            switchElem.flowType.sw.dst_priority = a->vlan_pcp.vlan_pcp;
+            break;
+
+        default:
+            break;
+        }
+    }
+
+    /* Set output port, as defined by http://www.sflow.org/sflow_version_5.txt
+       (search for "Input/output port information"). */
+    if (!n_outputs) {
+        /* This value indicates that the packet was dropped for an unknown
+         * reason. */
+        fs.output = 0x40000000 | 256;
+    } else if (n_outputs > 1 || !fs.output) {
+        /* Setting the high bit means "multiple output ports". */
+        fs.output = 0x80000000 | n_outputs;
+    }
+
+    /* Submit the flow sample to be encoded into the next datagram. */
+    SFLADD_ELEMENT(&fs, &hdrElem);
+    SFLADD_ELEMENT(&fs, &switchElem);
+    sfl_sampler_writeFlowSample(sampler, &fs);
+}
+
+void
+ofproto_sflow_set_group_sizes(struct ofproto_sflow *os,
+                              size_t n_flood, size_t n_all)
+{
+    os->n_flood = n_flood;
+    os->n_all = n_all;
+}
+
+void
+ofproto_sflow_run(struct ofproto_sflow *os)
+{
+    if (ofproto_sflow_is_enabled(os)) {
+        time_t now = time_now();
+        if (now >= os->next_tick) {
+            sfl_agent_tick(os->sflow_agent, now);
+            os->next_tick = now + 1;
+        }
+    }
+}
+
+void
+ofproto_sflow_wait(struct ofproto_sflow *os)
+{
+    if (ofproto_sflow_is_enabled(os)) {
+        poll_timer_wait(os->next_tick * 1000 - time_msec());
+    }
+}
diff --git a/ofproto/ofproto-sflow.h b/ofproto/ofproto-sflow.h
new file mode 100644 (file)
index 0000000..ec86d11
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2009 InMon Corp.
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OFPROTO_SFLOW_H
+#define OFPROTO_SFLOW_H 1
+
+#include <stdint.h>
+#include "svec.h"
+
+struct dpif;
+struct odp_msg;
+struct ofproto_sflow_options;
+
+struct ofproto_sflow *ofproto_sflow_create(struct dpif *);
+void ofproto_sflow_destroy(struct ofproto_sflow *);
+void ofproto_sflow_set_options(struct ofproto_sflow *,
+                               const struct ofproto_sflow_options *);
+void ofproto_sflow_clear(struct ofproto_sflow *);
+bool ofproto_sflow_is_enabled(const struct ofproto_sflow *);
+
+void ofproto_sflow_add_port(struct ofproto_sflow *, uint16_t odp_port,
+                            const char *netdev_name);
+void ofproto_sflow_del_port(struct ofproto_sflow *, uint16_t odp_port);
+void ofproto_sflow_set_group_sizes(struct ofproto_sflow *,
+                                   size_t n_flood, size_t n_all);
+
+void ofproto_sflow_run(struct ofproto_sflow *);
+void ofproto_sflow_wait(struct ofproto_sflow *);
+
+void ofproto_sflow_received(struct ofproto_sflow *, struct odp_msg *);
+
+#endif /* ofproto/ofproto-sflow.h */
index 10b4796..c44762c 100644 (file)
@@ -34,6 +34,7 @@
 #include "netflow.h"
 #include "odp-util.h"
 #include "ofp-print.h"
+#include "ofproto-sflow.h"
 #include "ofpbuf.h"
 #include "openflow/nicira-ext.h"
 #include "openflow/openflow.h"
 #define THIS_MODULE VLM_ofproto
 #include "vlog.h"
 
-enum {
-    DP_GROUP_FLOOD = 0,
-    DP_GROUP_ALL = 1
-};
+#include "sflow_api.h"
 
 enum {
     TABLEID_HASH = 0,
@@ -205,6 +203,7 @@ struct ofproto {
     struct fail_open *fail_open;
     struct pinsched *miss_sched, *action_sched;
     struct netflow *netflow;
+    struct ofproto_sflow *sflow;
 
     /* Flow table. */
     struct classifier cls;
@@ -249,7 +248,8 @@ static void handle_odp_msg(struct ofproto *, struct ofpbuf *);
 static void handle_openflow(struct ofconn *, struct ofproto *,
                             struct ofpbuf *);
 
-static void refresh_port_group(struct ofproto *, unsigned int group);
+static void refresh_port_groups(struct ofproto *);
+
 static void update_port(struct ofproto *, const char *devname);
 static int init_ports(struct ofproto *);
 static void reinit_ports(struct ofproto *);
@@ -279,7 +279,7 @@ ofproto_create(const char *datapath, const char *datapath_type,
         dpif_close(dpif);
         return error;
     }
-    error = dpif_recv_set_mask(dpif, ODPL_MISS | ODPL_ACTION);
+    error = dpif_recv_set_mask(dpif, ODPL_MISS | ODPL_ACTION | ODPL_SFLOW);
     if (error) {
         VLOG_ERR("failed to listen on datapath %s: %s",
                  datapath, strerror(error));
@@ -312,6 +312,7 @@ ofproto_create(const char *datapath, const char *datapath_type,
     p->fail_open = NULL;
     p->miss_sched = p->action_sched = NULL;
     p->netflow = NULL;
+    p->sflow = NULL;
 
     /* Initialize flow table. */
     classifier_init(&p->cls);
@@ -531,6 +532,30 @@ ofproto_set_netflow(struct ofproto *ofproto,
     }
 }
 
+void
+ofproto_set_sflow(struct ofproto *ofproto,
+                  const struct ofproto_sflow_options *oso)
+{
+    struct ofproto_sflow *os = ofproto->sflow;
+    if (oso) {
+        if (!os) {
+            struct ofport *ofport;
+            unsigned int odp_port;
+
+            os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif);
+            refresh_port_groups(ofproto);
+            PORT_ARRAY_FOR_EACH (ofport, &ofproto->ports, odp_port) {
+                ofproto_sflow_add_port(os, odp_port,
+                                       netdev_get_name(ofport->netdev));
+            }
+        }
+        ofproto_sflow_set_options(os, oso);
+    } else {
+        ofproto_sflow_destroy(os);
+        ofproto->sflow = NULL;
+    }
+}
+
 void
 ofproto_set_failure(struct ofproto *ofproto, bool fail_open)
 {
@@ -676,6 +701,7 @@ ofproto_destroy(struct ofproto *p)
     pinsched_destroy(p->miss_sched);
     pinsched_destroy(p->action_sched);
     netflow_destroy(p->netflow);
+    ofproto_sflow_destroy(p->sflow);
 
     switch_status_unregister(p->ss_cat);
 
@@ -829,6 +855,9 @@ ofproto_run1(struct ofproto *p)
     if (p->netflow) {
         netflow_run(p->netflow);
     }
+    if (p->sflow) {
+        ofproto_sflow_run(p->sflow);
+    }
 
     return 0;
 }
@@ -882,6 +911,9 @@ ofproto_wait(struct ofproto *p)
     }
     pinsched_wait(p->miss_sched);
     pinsched_wait(p->action_sched);
+    if (p->sflow) {
+        ofproto_sflow_wait(p->sflow);
+    }
     if (!tag_set_is_empty(&p->revalidate_set)) {
         poll_immediate_wake();
     }
@@ -1022,7 +1054,7 @@ reinit_ports(struct ofproto *p)
     svec_destroy(&devnames);
 }
 
-static void
+static size_t
 refresh_port_group(struct ofproto *p, unsigned int group)
 {
     uint16_t *ports;
@@ -1041,13 +1073,18 @@ refresh_port_group(struct ofproto *p, unsigned int group)
     }
     dpif_port_group_set(p->dpif, group, ports, n_ports);
     free(ports);
+
+    return n_ports;
 }
 
 static void
 refresh_port_groups(struct ofproto *p)
 {
-    refresh_port_group(p, DP_GROUP_FLOOD);
-    refresh_port_group(p, DP_GROUP_ALL);
+    size_t n_flood = refresh_port_group(p, DP_GROUP_FLOOD);
+    size_t n_all = refresh_port_group(p, DP_GROUP_ALL);
+    if (p->sflow) {
+        ofproto_sflow_set_group_sizes(p->sflow, n_flood, n_all);
+    }
 }
 
 static struct ofport *
@@ -1152,19 +1189,29 @@ send_port_status(struct ofproto *p, const struct ofport *ofport,
 static void
 ofport_install(struct ofproto *p, struct ofport *ofport)
 {
+    uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
+    const char *netdev_name = (const char *) ofport->opp.name;
+
     netdev_monitor_add(p->netdev_monitor, ofport->netdev);
-    port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no),
-                   ofport);
-    shash_add(&p->port_by_name, (char *) ofport->opp.name, ofport);
+    port_array_set(&p->ports, odp_port, ofport);
+    shash_add(&p->port_by_name, netdev_name, ofport);
+    if (p->sflow) {
+        ofproto_sflow_add_port(p->sflow, odp_port, netdev_name);
+    }
 }
 
 static void
 ofport_remove(struct ofproto *p, struct ofport *ofport)
 {
+    uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
+
     netdev_monitor_remove(p->netdev_monitor, ofport->netdev);
-    port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no), NULL);
+    port_array_set(&p->ports, odp_port, NULL);
     shash_delete(&p->port_by_name,
                  shash_find(&p->port_by_name, (char *) ofport->opp.name));
+    if (p->sflow) {
+        ofproto_sflow_del_port(p->sflow, odp_port);
+    }
 }
 
 static void
@@ -2249,7 +2296,7 @@ update_port_config(struct ofproto *p, struct ofport *port,
 #undef REVALIDATE_BITS
     if (mask & OFPPC_NO_FLOOD) {
         port->opp.config ^= OFPPC_NO_FLOOD;
-        refresh_port_group(p, DP_GROUP_FLOOD);
+        refresh_port_groups(p);
     }
     if (mask & OFPPC_NO_PACKET_IN) {
         port->opp.config ^= OFPPC_NO_PACKET_IN;
@@ -2999,7 +3046,7 @@ handle_openflow(struct ofconn *ofconn, struct ofproto *p,
 }
 \f
 static void
-handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
+handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet)
 {
     struct odp_msg *msg = packet->data;
     uint16_t in_port = odp_port_to_ofp_port(msg->port);
@@ -3007,14 +3054,6 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
     struct ofpbuf payload;
     flow_t flow;
 
-    /* Handle controller actions. */
-    if (msg->type == _ODPL_ACTION_NR) {
-        COVERAGE_INC(ofproto_ctlr_action);
-        pinsched_send(p->action_sched, in_port, packet,
-                      send_packet_in_action, p);
-        return;
-    }
-
     payload.data = msg + 1;
     payload.size = msg->length - sizeof *msg;
     flow_extract(&payload, msg->port, &flow);
@@ -3084,6 +3123,36 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
         ofpbuf_delete(packet);
     }
 }
+
+static void
+handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
+{
+    struct odp_msg *msg = packet->data;
+
+    switch (msg->type) {
+    case _ODPL_ACTION_NR:
+        COVERAGE_INC(ofproto_ctlr_action);
+        pinsched_send(p->action_sched, odp_port_to_ofp_port(msg->port), packet,
+                      send_packet_in_action, p);
+        break;
+
+    case _ODPL_SFLOW_NR:
+        if (p->sflow) {
+            ofproto_sflow_received(p->sflow, msg);
+        }
+        ofpbuf_delete(packet);
+        break;
+
+    case _ODPL_MISS_NR:
+        handle_odp_miss_msg(p, packet);
+        break;
+
+    default:
+        VLOG_WARN_RL(&rl, "received ODP message of unexpected type %"PRIu32,
+                     msg->type);
+        break;
+    }
+}
 \f
 static void
 revalidate_cb(struct cls_rule *sub_, void *cbdata_)
index ddc3448..a94c8b5 100644 (file)
@@ -29,6 +29,11 @@ struct ofhooks;
 struct ofproto;
 struct svec;
 
+enum {
+    DP_GROUP_FLOOD = 0,
+    DP_GROUP_ALL = 1
+};
+
 struct ofexpired {
     flow_t flow;
     uint64_t packet_count;      /* Packets from subrules. */
@@ -36,6 +41,16 @@ struct ofexpired {
     long long int used;         /* Last-used time (0 if never used). */
 };
 
+struct ofproto_sflow_options {
+    struct svec targets;
+    uint32_t sampling_rate;
+    uint32_t polling_interval;
+    uint32_t header_len;
+    uint32_t sub_id;
+    char *agent_device;
+    char *control_ip;
+};
+
 int ofproto_create(const char *datapath, const char *datapath_type,
                    const struct ofhooks *, void *aux,
                    struct ofproto **ofprotop);
@@ -62,6 +77,7 @@ int ofproto_set_listeners(struct ofproto *, const struct svec *listeners);
 int ofproto_set_snoops(struct ofproto *, const struct svec *snoops);
 int ofproto_set_netflow(struct ofproto *,
                         const struct netflow_options *nf_options);
+void ofproto_set_sflow(struct ofproto *, const struct ofproto_sflow_options *);
 void ofproto_set_failure(struct ofproto *, bool fail_open);
 void ofproto_set_rate_limit(struct ofproto *, int rate_limit, int burst_limit);
 int ofproto_set_stp(struct ofproto *, bool enable_stp);
index 502e8db..f862c33 100644 (file)
@@ -238,6 +238,9 @@ noinst_PROGRAMS += tests/test-timeval
 tests_test_timeval_SOURCES = tests/test-timeval.c
 tests_test_timeval_LDADD = lib/libopenvswitch.a
 
+noinst_PROGRAMS += tests/test-strtok_r
+tests_test_strtok_r_SOURCES = tests/test-strtok_r.c
+
 noinst_PROGRAMS += tests/test-type-props
 tests_test_type_props_SOURCES = tests/test-type-props.c
 
index fa2c5f3..0e408f0 100644 (file)
@@ -34,3 +34,7 @@ AT_CLEANUP
 AT_SETUP([test type properties])
 AT_CHECK([test-type-props], [0], [ignore])
 AT_CLEANUP
+
+AT_SETUP([test strtok_r bug fix])
+AT_CHECK([test-strtok_r], [0], [ignore])
+AT_CLEANUP
diff --git a/tests/test-strtok_r.c b/tests/test-strtok_r.c
new file mode 100644 (file)
index 0000000..9f8d898
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Some versions of glibc 2.7 has a bug in strtok_r when with optimization that
+ * can cause segfaults:
+ *      http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614.
+ *
+ * Open vSwitch works around this problem by supplying a replacement string.h.
+ * This test program verifies that the workaround is in place.
+ */
+int
+main(void)
+{
+    char string[] = ":::";
+    char *save_ptr = (char *) 0xc0ffee;
+    char *token1, *token2;
+    token1 = strtok_r(string, ":", &save_ptr);
+    token2 = strtok_r(NULL, ":", &save_ptr);
+    printf ("%s %s\n", token1, token2);
+    return 0;
+}
index b3d7841..5feb01c 100644 (file)
@@ -72,6 +72,7 @@ utilities_ovs_ofctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS)
 utilities_ovs_openflowd_SOURCES = utilities/ovs-openflowd.c
 utilities_ovs_openflowd_LDADD = \
        ofproto/libofproto.a \
+       lib/libsflow.a \
        lib/libopenvswitch.a \
        $(SSL_LIBS)
 
index dbdd741..0b7d92d 100644 (file)
@@ -281,7 +281,8 @@ parse_options(int argc, char *argv[])
         OPT_ACCEPT_VCONN = UCHAR_MAX + 1,
         OPT_EXIT_WITHOUT_BIND,
         OPT_EXIT_AFTER_BIND,
-        OPT_NO_DETACH
+        OPT_NO_DETACH,
+        VLOG_OPTION_ENUMS
     };
     static struct option long_options[] = {
         {"accept-vconn", required_argument, 0, OPT_ACCEPT_VCONN},
@@ -291,9 +292,9 @@ parse_options(int argc, char *argv[])
         {"timeout",     required_argument, 0, 't'},
         {"pidfile",     optional_argument, 0, OPT_PIDFILE},
         {"overwrite-pidfile", no_argument, 0, OPT_OVERWRITE_PIDFILE},
-        {"verbose",     optional_argument, 0, 'v'},
         {"help",        no_argument, 0, 'h'},
         {"version",     no_argument, 0, 'V'},
+        VLOG_LONG_OPTIONS,
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -353,9 +354,7 @@ parse_options(int argc, char *argv[])
             OVS_PRINT_VERSION(0, 0);
             exit(EXIT_SUCCESS);
 
-        case 'v':
-            vlog_set_verbosity(optarg);
-            break;
+        VLOG_OPTION_HANDLERS
 
         case '?':
             exit(EXIT_FAILURE);
index fceab4c..ebcf2e2 100644 (file)
@@ -63,11 +63,15 @@ main(int argc, char *argv[])
 static void
 parse_options(int argc, char *argv[])
 {
+    enum {
+        OPT_DUMMY = UCHAR_MAX + 1,
+        VLOG_OPTION_ENUMS
+    };
     static struct option long_options[] = {
         {"timeout", required_argument, 0, 't'},
-        {"verbose", optional_argument, 0, 'v'},
         {"help", no_argument, 0, 'h'},
         {"version", no_argument, 0, 'V'},
+        VLOG_LONG_OPTIONS,
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -99,9 +103,7 @@ parse_options(int argc, char *argv[])
             OVS_PRINT_VERSION(0, 0);
             exit(EXIT_SUCCESS);
 
-        case 'v':
-            vlog_set_verbosity(optarg);
-            break;
+        VLOG_OPTION_HANDLERS
 
         case '?':
             exit(EXIT_FAILURE);
index 52ec0b6..2447ba2 100644 (file)
@@ -83,14 +83,15 @@ static void
 parse_options(int argc, char *argv[])
 {
     enum {
-        OPT_STRICT = UCHAR_MAX + 1
+        OPT_STRICT = UCHAR_MAX + 1,
+        VLOG_OPTION_ENUMS
     };
     static struct option long_options[] = {
         {"timeout", required_argument, 0, 't'},
-        {"verbose", optional_argument, 0, 'v'},
         {"strict", no_argument, 0, OPT_STRICT},
         {"help", no_argument, 0, 'h'},
         {"version", no_argument, 0, 'V'},
+        VLOG_LONG_OPTIONS,
         STREAM_SSL_LONG_OPTIONS
         {0, 0, 0, 0},
     };
@@ -123,14 +124,11 @@ parse_options(int argc, char *argv[])
             OVS_PRINT_VERSION(OFP_VERSION, OFP_VERSION);
             exit(EXIT_SUCCESS);
 
-        case 'v':
-            vlog_set_verbosity(optarg);
-            break;
-
         case OPT_STRICT:
             strict = true;
             break;
 
+        VLOG_OPTION_HANDLERS
         STREAM_SSL_OPTION_HANDLERS
 
         case '?':
index 9121169..c38add6 100644 (file)
@@ -18,6 +18,7 @@ vswitchd_ovs_vswitchd_SOURCES = \
        vswitchd/xenserver.h
 vswitchd_ovs_vswitchd_LDADD = \
        ofproto/libofproto.a \
+       lib/libsflow.a \
        lib/libopenvswitch.a \
        $(SSL_LIBS)
 
index 66f0884..88f8db1 100644 (file)
@@ -61,6 +61,7 @@
 #include "vswitchd/vswitch-idl.h"
 #include "xenserver.h"
 #include "xtoxll.h"
+#include "sflow_api.h"
 
 #define THIS_MODULE VLM_bridge
 #include "vlog.h"
@@ -536,6 +537,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     struct shash_node *node;
     struct bridge *br, *next;
     size_t i;
+    int sflow_bridge_number;
 
     COVERAGE_INC(bridge_reconfigure);
 
@@ -669,6 +671,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         shash_destroy(&cur_ifaces);
         shash_destroy(&want_ifaces);
     }
+    sflow_bridge_number = 0;
     LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
         uint8_t ea[8];
         uint64_t dpid;
@@ -750,6 +753,45 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
             ofproto_set_netflow(br->ofproto, NULL);
         }
 
+        /* Set sFlow configuration on this bridge. */
+        if (br->cfg->sflow) {
+            struct ovsrec_sflow *sflow_cfg = br->cfg->sflow;
+            struct ofproto_sflow_options oso;
+
+            memset(&oso, 0, sizeof oso);
+
+            oso.targets.n = sflow_cfg->n_targets;
+            oso.targets.names = sflow_cfg->targets;
+
+            oso.sampling_rate = SFL_DEFAULT_SAMPLING_RATE;
+            if (sflow_cfg->sampling) {
+                oso.sampling_rate = *sflow_cfg->sampling;
+            }
+
+            oso.polling_interval = SFL_DEFAULT_POLLING_INTERVAL;
+            if (sflow_cfg->polling) {
+                oso.polling_interval = *sflow_cfg->polling;
+            }
+
+            oso.header_len = SFL_DEFAULT_HEADER_SIZE;
+            if (sflow_cfg->header) {
+                oso.header_len = *sflow_cfg->header;
+            }
+
+            oso.sub_id = sflow_bridge_number++;
+            oso.agent_device = sflow_cfg->agent;
+
+#if 0       /* xxx foo */
+            ctrl = bridge_get_controller(ovs_cfg, br);
+            oso.control_ip = ctrl ? ctrl->local_ip : NULL;
+#endif
+            ofproto_set_sflow(br->ofproto, &oso);
+
+            svec_destroy(&oso.targets);
+        } else {
+            ofproto_set_sflow(br->ofproto, NULL);
+        }
+
         /* Update the controller and related settings.  It would be more
          * straightforward to call this from bridge_reconfigure_one(), but we
          * can't do it there for two reasons.  First, and most importantly, at
index 958265b..71115f9 100644 (file)
@@ -52,6 +52,9 @@ Port mirroring, with optional VLAN tagging.
 NetFlow v5 flow logging.
 .
 .IP \(bu
+sFlow(R) monitoring.
+.
+.IP \(bu
 Connectivity to an external OpenFlow controller, such as NOX.
 .
 .PP
index ff5766a..b8e457d 100644 (file)
@@ -13,6 +13,7 @@ s["tables"]["Open_vSwitch"]["columns"]["ssl"]["type"]["keyRefTable"] = "SSL"
 s["tables"]["Bridge"]["columns"]["ports"]["type"]["keyRefTable"] = "Port"
 s["tables"]["Bridge"]["columns"]["mirrors"]["type"]["keyRefTable"] = "Mirror"
 s["tables"]["Bridge"]["columns"]["netflow"]["type"]["keyRefTable"] = "NetFlow"
+s["tables"]["Bridge"]["columns"]["sflow"]["type"]["keyRefTable"] = "sFlow"
 s["tables"]["Bridge"]["columns"]["controller"]["type"]["keyRefTable"] = "Controller"
 s["tables"]["Port"]["columns"]["interfaces"]["type"]["keyRefTable"] = "Interface"
 s["tables"]["Mirror"]["columns"]["select_src_port"]["type"]["keyRefTable"] = "Port"
index 7b0a539..022d65f 100644 (file)
@@ -44,6 +44,9 @@
        "netflow": {
          "comment": "NetFlow configuration.",
          "type": {"key": "uuid", "min": 0, "max": 1}},
+       "sflow": {
+         "comment": "sFlow configuration.",
+         "type": {"key": "uuid", "min": 0, "max": 1}},
        "controller": {
          "comment": "OpenFlow controller.  If unset, defaults to that specified by the parent Open_vSwitch.",
          "type": {"key": "uuid", "min": 0, "max": 1}},
        "active_timeout": {
          "comment": "Active timeout interval, in seconds.  A value of 0 requests the default timeout; a negative value disables active timeouts.",
          "type": "integer"}}},
+   "sFlow": {
+     "comment": "A sFlow target.",
+     "columns": {
+       "targets": {
+         "comment": "sFlow targets in the form \"IP:PORT\".",
+         "type": {"key": "string", "min": 1, "max": "unlimited"}},
+       "sampling": {
+         "comment": "Rate at which packets should be sampled and sent to the collector.  If not specified, defaults to 400, which means one out of 400, on average, will be sent to the collector.",
+         "type": {"key": "integer", "min": 0, "max": 1}},
+       "polling": {
+         "comment": "Polling rate in seconds to send port statistics to the collector.  If not specified, defaults to 30 seconds.",
+         "type": {"key": "integer", "min": 0, "max": 1}},
+       "header": {
+         "comment": "Number of bytes of a sampled packet to send to the collector.  If not specified, defaults is 128 bytes.",
+         "type": {"key": "integer", "min": 0, "max": 1}},
+       "agent": {
+         "comment": "IP address to report as \"agent address\" to collectors.  If not specified, defaults to collector's \"local_ip\" value.  If neither is specified, sFlow is disabled.",
+         "type": {"key": "string", "min": 0, "max": 1}}}},
    "Controller": {
      "comment": "An OpenFlow controller.",
      "columns": {
index 221925c..1b93045 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 Nicira Networks, Inc.
+# Copyright (C) 2009, 2010 Nicira Networks, Inc.
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
@@ -6,18 +6,23 @@
 # without warranty of any kind.
 
 EXTRA_DIST += \
+       xenserver/LICENSE \
        xenserver/README \
+       xenserver/automake.mk \
        xenserver/etc_init.d_vswitch \
        xenserver/etc_init.d_vswitch-xapi-update \
        xenserver/etc_logrotate.d_vswitch \
        xenserver/etc_profile.d_vswitch.sh \
        xenserver/etc_xapi.d_plugins_vswitch-cfg-update \
        xenserver/etc_xensource_scripts_vif \
+       xenserver/opt_xensource_libexec_InterfaceReconfigure.py \
+       xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py \
+       xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \
        xenserver/opt_xensource_libexec_interface-reconfigure \
        xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \
        xenserver/usr_sbin_brctl \
        xenserver/usr_sbin_xen-bugtool \
-       xenserver/usr_share_vswitch_scripts_sysconfig.template \
        xenserver/usr_share_vswitch_scripts_dump-vif-details \
        xenserver/usr_share_vswitch_scripts_refresh-xs-network-uuids \
+       xenserver/usr_share_vswitch_scripts_sysconfig.template \
        xenserver/vswitch-xen.spec
index 7bc826a..95c6b80 100755 (executable)
@@ -386,9 +386,9 @@ case "$1" in
         strace -p $(cat "$BRCOMPATD_PIDFILE") "$@"
         ;;
     status)
-        status -p ovsdb-server.pid ovsdb-server
-        status -p ovs-vswitchd.pid ovs-vswitchd
-        status -p ovs-brcompatd.pid ovs-brcompatd
+        status -p "$OVSDB_SERVER_PIDFILE" ovsdb-server
+        status -p "$VSWITCHD_PIDFILE" ovs-vswitchd
+        status -p "$BRCOMPATD_PIDFILE" ovs-brcompatd
         ;;
     version)
         /usr/sbin/ovsdb-server -V
index 667d7fa..1c5f079 100755 (executable)
@@ -27,7 +27,7 @@ function do_host_call {
 
 function start {
     if [ ! -f /etc/xensource-inventory ]; then
-        printf "vxwitch-xapi-update ERROR: XenSource inventory not present in /etc/xensource-inventory\n"
+        printf "vswitch-xapi-update ERROR: XenSource inventory not present in /etc/xensource-inventory\n"
         exit 1
     fi
     source /etc/xensource-inventory
index 6905448..4e24d83 100755 (executable)
@@ -1,7 +1,6 @@
 #!/bin/sh
 
 # Copyright (C) 2008,2009 Citrix Systems, Inc.
-# Copyright (C) 2009 Nicira Networks, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published
 
 # Keep other-config/ keys in sync with device.ml:vif_udev_keys
 
+BRCTL="/usr/sbin/brctl"
+IP="/sbin/ip"
+
 vsctl="/usr/bin/ovs-vsctl"
 dump_vif_details="/usr/share/vswitch/scripts/dump-vif-details"
-service="/sbin/service"
-
-TYPE=`echo ${XENBUS_PATH} | cut -f 2 -d '/'`
-DOMID=`echo ${XENBUS_PATH} | cut -f 3 -d '/'`
-DEVID=`echo ${XENBUS_PATH} | cut -f 4 -d '/'`
-
-XAPI=/xapi/${DOMID}/hotplug/${TYPE}/${DEVID}
-HOTPLUG=/xapi/${DOMID}/hotplug/${TYPE}/${DEVID}
-PRIVATE=/xapi/${DOMID}/private/${TYPE}/${DEVID}
-BRCTL=/usr/sbin/brctl
-IP=/sbin/ip
-
 
 handle_promiscuous()
 {
-    local arg=$(xenstore-read "${PRIVATE}/other-config/promiscuous")
+    local arg=$(xenstore-read "${PRIVATE}/other-config/promiscuous" 2>/dev/null)
     if [ $? -eq 0 -a -n "${arg}" ] ; then
-        case "${arg}" in 
-            true|on) logger -t script-vif "${vif}: Promiscuous ports are not supported via vSwitch." ;;
-            *) ;;
-        esac
+       case $NETWORK_MODE in
+           bridge)
+               case "${arg}" in 
+                   true|on) echo 1 > /sys/class/net/${dev}/brport/promisc ;;
+                   *) echo 0 > /sys/class/net/${dev}/brport/promisc ;;
+               esac
+               ;;
+           vswitch)
+               logger -t script-vif "${dev}: Promiscuous ports are not supported via vSwitch."
+               ;;
+       esac
     fi
 }
 
 handle_ethtool()
 {
     local opt=$1
-    local arg=$(xenstore-read "${PRIVATE}/other-config/ethtool-${opt}")
+    local arg=$(xenstore-read "${PRIVATE}/other-config/ethtool-${opt}" 2>/dev/null)
     if [ $? -eq 0 -a -n "${arg}" ] ; then
         case "${arg}" in
-            true|on)   /sbin/ethtool -K "${vif}" "${opt}" on ;;
-            false|off) /sbin/ethtool -K "${vif}" "${opt}" off ;;
-            *) logger -t scripts-vif "Unknown ethtool argument ${opt}=${arg} on ${vif}/${VIFUUID}" ;;
+            true|on)   /sbin/ethtool -K "${dev}" "${opt}" on ;;
+            false|off) /sbin/ethtool -K "${dev}" "${opt}" off ;;
+            *) logger -t scripts-vif "Unknown ethtool argument ${opt}=${arg} on ${dev}/${VIFUUID}" ;;
         esac
     fi
 }
 
 handle_mtu()
 {
-    local mtu=$(xenstore-read "${PRIVATE}/MTU")
+    local mtu=$(xenstore-read "${PRIVATE}/MTU" 2>/dev/null)
     if [ $? -eq 0 -a -n "${mtu}" ]; then
-       echo "${mtu}" > /sys/class/net/${vif}/mtu
+       echo "${mtu}" > /sys/class/net/${dev}/mtu
     fi
 }
 
@@ -73,54 +70,118 @@ add_to_bridge()
     local address=$(xenstore-read "${PRIVATE}/bridge-MAC")
     if [ $? -ne 0 -o -z "${address}" ]; then
        logger -t scripts-vif "Failed to read ${PRIVATE}/bridge-MAC from xenstore"
+       exit 1
     fi
     local bridge=$(xenstore-read "${PRIVATE}/bridge")
     if [ $? -ne 0 -o -z "${bridge}" ]; then
        logger -t scripts-vif "Failed to read ${PRIVATE}/bridge from xenstore"
+       exit 1
     fi
-    logger -t scripts-vif "Adding ${vif} to ${bridge} with address ${address}"
-
-    ${IP} link set "${vif}" down                        || logger -t scripts-vif "Failed to ip link set ${vif} down"
-    ${IP} link set "${vif}" arp off                     || logger -t scripts-vif "Failed to ip link set ${vif} arp off"
-    ${IP} link set "${vif}" multicast off               || logger -t scripts-vif "Failed to ip link set ${vif} multicast off"
-    ${IP} link set "${vif}" address "${address}"        || logger -t scripts-vif "Failed to ip link set ${vif} address ${address}"
-    ${IP} addr flush "${vif}"                           || logger -t scripts-vif "Failed to ip addr flush ${vif}"
-
-    local vif_details=$($dump_vif_details $DOMID $DEVID)
-    if [ $? -ne 0 -o -z "${vif_details}" ]; then
-           logger -t scripts-vif "Failed to retrieve vif details for vswitch"
-    fi
-
-    $vsctl add-port $bridge $vif $vif_details
+    logger -t scripts-vif "Adding ${dev} to ${bridge} with address ${address}"
+
+    ${IP} link set "${dev}" down                        || logger -t scripts-vif "Failed to ip link set ${dev} down"
+    ${IP} link set "${dev}" arp off                     || logger -t scripts-vif "Failed to ip link set ${dev} arp off"
+    ${IP} link set "${dev}" multicast off               || logger -t scripts-vif "Failed to ip link set ${dev} multicast off"
+    ${IP} link set "${dev}" address "${address}"        || logger -t scripts-vif "Failed to ip link set ${dev} address ${address}"
+    ${IP} addr flush "${dev}"                           || logger -t scripts-vif "Failed to ip addr flush ${dev}"
+
+    case $NETWORK_MODE in
+       bridge)
+           ${BRCTL} setfd "${bridge}" 0                        || logger -t scripts-vif "Failed to brctl setfd ${bridge} 0"
+           ${BRCTL} addif "${bridge}" "${dev}"                 || logger -t scripts-vif "Failed to brctl addif ${bridge} ${dev}"
+           ;;
+       vswitch)
+               local vif_details=$($dump_vif_details $DOMID $DEVID)
+               if [ $? -ne 0 -o -z "${vif_details}" ]; then
+                       logger -t scripts-vif "Failed to retrieve vif details for vswitch"
+               fi
+
+               $vsctl add-port $bridge $dev $vif_details
+           ;;
+    esac
+           
+    ${IP} link set "${dev}" up                          || logger -t scripts-vif "Failed to ip link set ${dev} up"
+}
 
-    ${IP} link set "${vif}" up                          || logger -t scripts-vif "Failed to ip link set ${vif} up"
+remove_from_bridge()
+{
+    case $NETWORK_MODE in
+       bridge)
+           # Nothing to do
+           ;;
+       vswitch)
+        $vsctl del-port $bridge $dev
+           ;;
+    esac
 }
 
-echo Called as "$@" "$TYPE" "$DOMID" "$DEVID" | logger -t scripts-vif
-case "$1" in
-online)
-       handle_ethtool rx
-       handle_ethtool tx
-       handle_ethtool sg
-       handle_ethtool tso
-       handle_ethtool ufo
-       handle_ethtool gso
+NETWORK_MODE=$(cat /etc/xensource/network.conf)
+ACTION=$1
+TYPE=$2
 
-       handle_mtu
-       add_to_bridge
-       handle_promiscuous
+case $NETWORK_MODE in
+    bridge|vswitch) ;;
+    *)
+       logger -t scripts-vif "Unknown network mode $NETWORK_MODE"
+       exit 1
+       ;;
+esac
 
-       xenstore-write "${HOTPLUG}/vif" "${vif}"
-       xenstore-write "${HOTPLUG}/hotplug" "online"
+case ${TYPE} in
+    vif)
+       DOMID=`echo ${XENBUS_PATH} | cut -f 3 -d '/'`
+       DEVID=`echo ${XENBUS_PATH} | cut -f 4 -d '/'`
+       dev=vif${DOMID}.${DEVID}
+       ;;
+    tap)
+       dev=$INTERFACE
+       DOMID=`echo ${dev#tap} | cut -f 1 -d '.'`
+       DEVID=`echo ${dev#tap} | cut -f 2 -d '.'`
+       ;;
+    *)  
+       logger -t scripts-vif "unknown interface type ${TYPE}"
+       exit 1
+       ;;
+esac
 
-       # xs-xen.pq.hq:91e986b8e49f netback-wait-for-hotplug
-       xenstore-write "/local/domain/0/backend/vif/${DOMID}/${DEVID}/hotplug-status" "connected"
+XAPI=/xapi/${DOMID}/hotplug/vif/${DEVID}
+HOTPLUG=/xapi/${DOMID}/hotplug/vif/${DEVID}
+PRIVATE=/xapi/${DOMID}/private/vif/${DEVID}
 
+logger -t scripts-vif "Called as \"$@\" domid:$DOMID devid:$DEVID mode:$NETWORK_MODE"
+case "${ACTION}" in
+online)
+       if [ "${TYPE}" = "vif" ] ; then
+           handle_ethtool rx
+           handle_ethtool tx
+           handle_ethtool sg
+           handle_ethtool tso
+           handle_ethtool ufo
+           handle_ethtool gso
+
+           handle_mtu
+           add_to_bridge
+           handle_promiscuous
+
+           xenstore-write "${HOTPLUG}/vif" "${dev}"
+           xenstore-write "${HOTPLUG}/hotplug" "online"
+
+           # xs-xen.pq.hq:91e986b8e49f netback-wait-for-hotplug
+           xenstore-write "/local/domain/0/backend/vif/${DOMID}/${DEVID}/hotplug-status" "connected"
+       fi
        ;;
+
+add)
+       if [ "${TYPE}" = "tap" ] ; then
+           add_to_bridge
+       fi
+       ;;
+
 remove)
-       xenstore-rm "${HOTPLUG}/hotplug"
-       vif=vif${DOMID}.${DEVID}
-       logger -t scripts-vif "${vif} has been removed"
-       $vsctl del-port $bridge $vif
+       if [ "${TYPE}" = "vif" ] ;then
+           xenstore-rm "${HOTPLUG}/hotplug"
+       fi
+       logger -t scripts-vif "${dev} has been removed"
+       remove_from_bridge
        ;;
 esac
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigure.py b/xenserver/opt_xensource_libexec_InterfaceReconfigure.py
new file mode 100644 (file)
index 0000000..229f3b9
--- /dev/null
@@ -0,0 +1,793 @@
+# Copyright (c) 2008,2009 Citrix Systems, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; version 2.1 only. with the special
+# exception on linking described in file LICENSE.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+import syslog
+import os
+
+from xml.dom.minidom import getDOMImplementation
+from xml.dom.minidom import parse as parseXML
+
+#
+# Logging.
+#
+
+def log(s):
+    syslog.syslog(s)
+
+#
+# Exceptions.
+#
+
+class Error(Exception):
+    def __init__(self, msg):
+        Exception.__init__(self)
+        self.msg = msg
+
+#
+# Run external utilities
+#
+
+def run_command(command):
+    log("Running command: " + ' '.join(command))
+    rc = os.spawnl(os.P_WAIT, command[0], *command)
+    if rc != 0:
+        log("Command failed %d: " % rc + ' '.join(command))
+        return False
+    return True
+
+#
+# Configuration File Handling.
+#
+
+class ConfigurationFile(object):
+    """Write a file, tracking old and new versions.
+
+    Supports writing a new version of a file and applying and
+    reverting those changes.
+    """
+
+    __STATE = {"OPEN":"OPEN",
+               "NOT-APPLIED":"NOT-APPLIED", "APPLIED":"APPLIED",
+               "REVERTED":"REVERTED", "COMMITTED": "COMMITTED"}
+
+    def __init__(self, path):
+        dirname,basename = os.path.split(path)
+
+        self.__state = self.__STATE['OPEN']
+        self.__children = []
+
+        self.__path    = os.path.join(dirname, basename)
+        self.__oldpath = os.path.join(dirname, "." + basename + ".xapi-old")
+        self.__newpath = os.path.join(dirname, "." + basename + ".xapi-new")
+
+        self.__f = open(self.__newpath, "w")
+
+    def attach_child(self, child):
+        self.__children.append(child)
+
+    def path(self):
+        return self.__path
+
+    def readlines(self):
+        try:
+            return open(self.path()).readlines()
+        except:
+            return ""
+
+    def write(self, args):
+        if self.__state != self.__STATE['OPEN']:
+            raise Error("Attempt to write to file in state %s" % self.__state)
+        self.__f.write(args)
+
+    def close(self):
+        if self.__state != self.__STATE['OPEN']:
+            raise Error("Attempt to close file in state %s" % self.__state)
+
+        self.__f.close()
+        self.__state = self.__STATE['NOT-APPLIED']
+
+    def changed(self):
+        if self.__state != self.__STATE['NOT-APPLIED']:
+            raise Error("Attempt to compare file in state %s" % self.__state)
+
+        return True
+
+    def apply(self):
+        if self.__state != self.__STATE['NOT-APPLIED']:
+            raise Error("Attempt to apply configuration from state %s" % self.__state)
+
+        for child in self.__children:
+            child.apply()
+
+        log("Applying changes to %s configuration" % self.__path)
+
+        # Remove previous backup.
+        if os.access(self.__oldpath, os.F_OK):
+            os.unlink(self.__oldpath)
+
+        # Save current configuration.
+        if os.access(self.__path, os.F_OK):
+            os.link(self.__path, self.__oldpath)
+            os.unlink(self.__path)
+
+        # Apply new configuration.
+        assert(os.path.exists(self.__newpath))
+        os.link(self.__newpath, self.__path)
+
+        # Remove temporary file.
+        os.unlink(self.__newpath)
+
+        self.__state = self.__STATE['APPLIED']
+
+    def revert(self):
+        if self.__state != self.__STATE['APPLIED']:
+            raise Error("Attempt to revert configuration from state %s" % self.__state)
+
+        for child in self.__children:
+            child.revert()
+
+        log("Reverting changes to %s configuration" % self.__path)
+
+        # Remove existing new configuration
+        if os.access(self.__newpath, os.F_OK):
+            os.unlink(self.__newpath)
+
+        # Revert new configuration.
+        if os.access(self.__path, os.F_OK):
+            os.link(self.__path, self.__newpath)
+            os.unlink(self.__path)
+
+        # Revert to old configuration.
+        if os.access(self.__oldpath, os.F_OK):
+            os.link(self.__oldpath, self.__path)
+            os.unlink(self.__oldpath)
+
+        # Leave .*.xapi-new as an aid to debugging.
+
+        self.__state = self.__STATE['REVERTED']
+
+    def commit(self):
+        if self.__state != self.__STATE['APPLIED']:
+            raise Error("Attempt to commit configuration from state %s" % self.__state)
+
+        for child in self.__children:
+            child.commit()
+
+        log("Committing changes to %s configuration" % self.__path)
+
+        if os.access(self.__oldpath, os.F_OK):
+            os.unlink(self.__oldpath)
+        if os.access(self.__newpath, os.F_OK):
+            os.unlink(self.__newpath)
+
+        self.__state = self.__STATE['COMMITTED']
+
+#
+# Helper functions for encoding/decoding database attributes to/from XML.
+#
+
+def _str_to_xml(xml, parent, tag, val):
+    e = xml.createElement(tag)
+    parent.appendChild(e)
+    v = xml.createTextNode(val)
+    e.appendChild(v)
+def _str_from_xml(n):
+    def getText(nodelist):
+        rc = ""
+        for node in nodelist:
+            if node.nodeType == node.TEXT_NODE:
+                rc = rc + node.data
+        return rc
+    return getText(n.childNodes).strip()
+
+def _bool_to_xml(xml, parent, tag, val):
+    if val:
+        _str_to_xml(xml, parent, tag, "True")
+    else:
+        _str_to_xml(xml, parent, tag, "False")
+def _bool_from_xml(n):
+    s = _str_from_xml(n)
+    if s == "True":
+        return True
+    elif s == "False":
+        return False
+    else:
+        raise Error("Unknown boolean value %s" % s)
+
+def _strlist_to_xml(xml, parent, ltag, itag, val):
+    e = xml.createElement(ltag)
+    parent.appendChild(e)
+    for v in val:
+        c = xml.createElement(itag)
+        e.appendChild(c)
+        cv = xml.createTextNode(v)
+        c.appendChild(cv)
+def _strlist_from_xml(n, ltag, itag):
+    ret = []
+    for n in n.childNodes:
+        if n.nodeName == itag:
+            ret.append(_str_from_xml(n))
+    return ret
+
+def _otherconfig_to_xml(xml, parent, val, attrs):
+    otherconfig = xml.createElement("other_config")
+    parent.appendChild(otherconfig)
+    for n,v in val.items():
+        if not n in attrs:
+            raise Error("Unknown other-config attribute: %s" % n)
+        _str_to_xml(xml, otherconfig, n, v)
+def _otherconfig_from_xml(n, attrs):
+    ret = {}
+    for n in n.childNodes:
+        if n.nodeName in attrs:
+            ret[n.nodeName] = _str_from_xml(n)
+    return ret
+
+#
+# Definitions of the database objects (and their attributes) used by interface-reconfigure.
+#
+# Each object is defined by a dictionary mapping an attribute name in
+# the xapi database to a tuple containing two items:
+#  - a function which takes this attribute and encodes it as XML.
+#  - a function which takes XML and decocdes it into a value.
+#
+# other-config attributes are specified as a simple array of strings
+
+_PIF_XML_TAG = "pif"
+_VLAN_XML_TAG = "vlan"
+_BOND_XML_TAG = "bond"
+_NETWORK_XML_TAG = "network"
+
+_ETHTOOL_OTHERCONFIG_ATTRS = ['ethtool-%s' % x for x in 'autoneg', 'speed', 'duplex', 'rx', 'tx', 'sg', 'tso', 'ufo', 'gso' ]
+
+_PIF_OTHERCONFIG_ATTRS = [ 'domain', 'peerdns', 'defaultroute', 'mtu', 'static-routes' ] + \
+                        [ 'bond-%s' % x for x in 'mode', 'miimon', 'downdelay', 'updelay', 'use_carrier' ] + \
+                        _ETHTOOL_OTHERCONFIG_ATTRS
+
+_PIF_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml),
+               'management': (_bool_to_xml,_bool_from_xml),
+               'network': (_str_to_xml,_str_from_xml),
+               'device': (_str_to_xml,_str_from_xml),
+               'bond_master_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'bond_master_of', 'slave', v),
+                                  lambda n: _strlist_from_xml(n, 'bond_master_of', 'slave')),
+               'bond_slave_of': (_str_to_xml,_str_from_xml),
+               'VLAN': (_str_to_xml,_str_from_xml),
+               'VLAN_master_of': (_str_to_xml,_str_from_xml),
+               'VLAN_slave_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'VLAN_slave_of', 'master', v),
+                                 lambda n: _strlist_from_xml(n, 'VLAN_slave_Of', 'master')),
+               'ip_configuration_mode': (_str_to_xml,_str_from_xml),
+               'IP': (_str_to_xml,_str_from_xml),
+               'netmask': (_str_to_xml,_str_from_xml),
+               'gateway': (_str_to_xml,_str_from_xml),
+               'DNS': (_str_to_xml,_str_from_xml),
+               'MAC': (_str_to_xml,_str_from_xml),
+               'other_config': (lambda x, p, t, v: _otherconfig_to_xml(x, p, v, _PIF_OTHERCONFIG_ATTRS),
+                                lambda n: _otherconfig_from_xml(n, _PIF_OTHERCONFIG_ATTRS)),
+
+               # Special case: We write the current value
+               # PIF.currently-attached to the cache but since it will
+               # not be valid when we come to use the cache later
+               # (i.e. after a reboot) we always read it as False.
+               'currently_attached': (_bool_to_xml, lambda n: False),
+             }
+
+_VLAN_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml),
+                'tagged_PIF': (_str_to_xml,_str_from_xml),
+                'untagged_PIF': (_str_to_xml,_str_from_xml),
+              }
+
+_BOND_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml),
+               'master': (_str_to_xml,_str_from_xml),
+               'slaves': (lambda x, p, t, v: _strlist_to_xml(x, p, 'slaves', 'slave', v),
+                          lambda n: _strlist_from_xml(n, 'slaves', 'slave')),
+              }
+
+_NETWORK_OTHERCONFIG_ATTRS = [ 'mtu', 'static-routes' ] + _ETHTOOL_OTHERCONFIG_ATTRS
+
+_NETWORK_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml),
+                   'bridge': (_str_to_xml,_str_from_xml),
+                   'PIFs': (lambda x, p, t, v: _strlist_to_xml(x, p, 'PIFs', 'PIF', v),
+                            lambda n: _strlist_from_xml(n, 'PIFs', 'PIF')),
+                   'other_config': (lambda x, p, t, v: _otherconfig_to_xml(x, p, v, _NETWORK_OTHERCONFIG_ATTRS),
+                                    lambda n: _otherconfig_from_xml(n, _NETWORK_OTHERCONFIG_ATTRS)),
+                 }
+
+#
+# Database Cache object
+#
+
+_db = None
+
+def db():
+    assert(_db is not None)
+    return _db
+
+def db_init_from_cache(cache):
+    global _db
+    assert(_db is None)
+    _db = DatabaseCache(cache_file=cache)
+    
+def db_init_from_xenapi(session):
+    global _db 
+    assert(_db is None)
+    _db  = DatabaseCache(session_ref=session)
+    
+class DatabaseCache(object):
+    def __read_xensource_inventory(self):
+        filename = "/etc/xensource-inventory"
+        f = open(filename, "r")
+        lines = [x.strip("\n") for x in f.readlines()]
+        f.close()
+
+        defs = [ (l[:l.find("=")], l[(l.find("=") + 1):]) for l in lines ]
+        defs = [ (a, b.strip("'")) for (a,b) in defs ]
+
+        return dict(defs)
+    def __pif_on_host(self,pif):
+        return self.__pifs.has_key(pif)
+
+    def __get_pif_records_from_xapi(self, session, host):
+        self.__pifs = {}
+        for (p,rec) in session.xenapi.PIF.get_all_records().items():
+            if rec['host'] != host:
+                continue
+            self.__pifs[p] = {}
+            for f in _PIF_ATTRS:
+                self.__pifs[p][f] = rec[f]
+            self.__pifs[p]['other_config'] = {}
+            for f in _PIF_OTHERCONFIG_ATTRS:
+                if not rec['other_config'].has_key(f): continue
+                self.__pifs[p]['other_config'][f] = rec['other_config'][f]
+
+    def __get_vlan_records_from_xapi(self, session):
+        self.__vlans = {}
+        for v in session.xenapi.VLAN.get_all():
+            rec = session.xenapi.VLAN.get_record(v)
+            if not self.__pif_on_host(rec['untagged_PIF']):
+                continue
+            self.__vlans[v] = {}
+            for f in _VLAN_ATTRS:
+                self.__vlans[v][f] = rec[f]
+
+    def __get_bond_records_from_xapi(self, session):
+        self.__bonds = {}
+        for b in session.xenapi.Bond.get_all():
+            rec = session.xenapi.Bond.get_record(b)
+            if not self.__pif_on_host(rec['master']):
+                continue
+            self.__bonds[b] = {}
+            for f in _BOND_ATTRS:
+                self.__bonds[b][f] = rec[f]
+
+    def __get_network_records_from_xapi(self, session):
+        self.__networks = {}
+        for n in session.xenapi.network.get_all():
+            rec = session.xenapi.network.get_record(n)
+            self.__networks[n] = {}
+            for f in _NETWORK_ATTRS:
+                if f == "PIFs":
+                    # drop PIFs on other hosts
+                    self.__networks[n][f] = [p for p in rec[f] if self.__pif_on_host(p)]
+                else:
+                    self.__networks[n][f] = rec[f]
+            self.__networks[n]['other_config'] = {}
+            for f in _NETWORK_OTHERCONFIG_ATTRS:
+                if not rec['other_config'].has_key(f): continue
+                self.__networks[n]['other_config'][f] = rec['other_config'][f]
+
+    def __to_xml(self, xml, parent, key, ref, rec, attrs):
+        """Encode a database object as XML"""
+        e = xml.createElement(key)
+        parent.appendChild(e)
+        if ref:
+            e.setAttribute('ref', ref)
+
+        for n,v in rec.items():
+            if attrs.has_key(n):
+                h,_ = attrs[n]
+                h(xml, e, n, v)
+            else:
+                raise Error("Unknown attribute %s" % n)
+    def __from_xml(self, e, attrs):
+        """Decode a database object from XML"""
+        ref = e.attributes['ref'].value
+        rec = {}
+        for n in e.childNodes:
+            if n.nodeName in attrs:
+                _,h = attrs[n.nodeName]
+                rec[n.nodeName] = h(n)
+        return (ref,rec)
+
+    def __init__(self, session_ref=None, cache_file=None):
+        if session_ref and cache_file:
+            raise Error("can't specify session reference and cache file")
+        if cache_file == None:
+            import XenAPI
+            session = XenAPI.xapi_local()
+
+            if not session_ref:
+                log("No session ref given on command line, logging in.")
+                session.xenapi.login_with_password("root", "")
+            else:
+                session._session = session_ref
+
+            try:
+
+                inventory = self.__read_xensource_inventory()
+                assert(inventory.has_key('INSTALLATION_UUID'))
+                log("host uuid is %s" % inventory['INSTALLATION_UUID'])
+
+                host = session.xenapi.host.get_by_uuid(inventory['INSTALLATION_UUID'])
+
+                self.__get_pif_records_from_xapi(session, host)
+
+                self.__get_vlan_records_from_xapi(session)
+                self.__get_bond_records_from_xapi(session)
+                self.__get_network_records_from_xapi(session)
+            finally:
+                if not session_ref:
+                    session.xenapi.session.logout()
+        else:
+            log("Loading xapi database cache from %s" % cache_file)
+
+            xml = parseXML(cache_file)
+
+            self.__pifs = {}
+            self.__bonds = {}
+            self.__vlans = {}
+            self.__networks = {}
+
+            assert(len(xml.childNodes) == 1)
+            toplevel = xml.childNodes[0]
+
+            assert(toplevel.nodeName == "xenserver-network-configuration")
+
+            for n in toplevel.childNodes:
+                if n.nodeName == "#text":
+                    pass
+                elif n.nodeName == _PIF_XML_TAG:
+                    (ref,rec) = self.__from_xml(n, _PIF_ATTRS)
+                    self.__pifs[ref] = rec
+                elif n.nodeName == _BOND_XML_TAG:
+                    (ref,rec) = self.__from_xml(n, _BOND_ATTRS)
+                    self.__bonds[ref] = rec
+                elif n.nodeName == _VLAN_XML_TAG:
+                    (ref,rec) = self.__from_xml(n, _VLAN_ATTRS)
+                    self.__vlans[ref] = rec
+                elif n.nodeName == _NETWORK_XML_TAG:
+                    (ref,rec) = self.__from_xml(n, _NETWORK_ATTRS)
+                    self.__networks[ref] = rec
+                else:
+                    raise Error("Unknown XML element %s" % n.nodeName)
+
+    def save(self, cache_file):
+
+        xml = getDOMImplementation().createDocument(
+            None, "xenserver-network-configuration", None)
+        for (ref,rec) in self.__pifs.items():
+            self.__to_xml(xml, xml.documentElement, _PIF_XML_TAG, ref, rec, _PIF_ATTRS)
+        for (ref,rec) in self.__bonds.items():
+            self.__to_xml(xml, xml.documentElement, _BOND_XML_TAG, ref, rec, _BOND_ATTRS)
+        for (ref,rec) in self.__vlans.items():
+            self.__to_xml(xml, xml.documentElement, _VLAN_XML_TAG, ref, rec, _VLAN_ATTRS)
+        for (ref,rec) in self.__networks.items():
+            self.__to_xml(xml, xml.documentElement, _NETWORK_XML_TAG, ref, rec,
+                          _NETWORK_ATTRS)
+
+        f = open(cache_file, 'w')
+        f.write(xml.toprettyxml())
+        f.close()
+
+    def get_pif_by_uuid(self, uuid):
+        pifs = map(lambda (ref,rec): ref,
+                  filter(lambda (ref,rec): uuid == rec['uuid'],
+                         self.__pifs.items()))
+        if len(pifs) == 0:
+            raise Error("Unknown PIF \"%s\"" % uuid)
+        elif len(pifs) > 1:
+            raise Error("Non-unique PIF \"%s\"" % uuid)
+
+        return pifs[0]
+
+    def get_pifs_by_device(self, device):
+        return map(lambda (ref,rec): ref,
+                   filter(lambda (ref,rec): rec['device'] == device,
+                          self.__pifs.items()))
+
+    def get_pif_by_bridge(self, bridge):
+        networks = map(lambda (ref,rec): ref,
+                       filter(lambda (ref,rec): rec['bridge'] == bridge,
+                              self.__networks.items()))
+        if len(networks) == 0:
+            raise Error("No matching network \"%s\"" % bridge)
+
+        answer = None
+        for network in networks:
+            nwrec = self.get_network_record(network)
+            for pif in nwrec['PIFs']:
+                pifrec = self.get_pif_record(pif)
+                if answer:
+                    raise Error("Multiple PIFs on host for network %s" % (bridge))
+                answer = pif
+        if not answer:
+            raise Error("No PIF on host for network %s" % (bridge))
+        return answer
+
+    def get_pif_record(self, pif):
+        if self.__pifs.has_key(pif):
+            return self.__pifs[pif]
+        raise Error("Unknown PIF \"%s\"" % pif)
+    def get_all_pifs(self):
+        return self.__pifs
+    def pif_exists(self, pif):
+        return self.__pifs.has_key(pif)
+
+    def get_management_pif(self):
+        """ Returns the management pif on host
+        """
+        all = self.get_all_pifs()
+        for pif in all:
+            pifrec = self.get_pif_record(pif)
+            if pifrec['management']: return pif
+        return None
+
+    def get_network_record(self, network):
+        if self.__networks.has_key(network):
+            return self.__networks[network]
+        raise Error("Unknown network \"%s\"" % network)
+
+    def get_bond_record(self, bond):
+        if self.__bonds.has_key(bond):
+            return self.__bonds[bond]
+        else:
+            return None
+
+    def get_vlan_record(self, vlan):
+        if self.__vlans.has_key(vlan):
+            return self.__vlans[vlan]
+        else:
+            return None
+
+#
+#
+#
+
+def ethtool_settings(oc):
+    settings = []
+    if oc.has_key('ethtool-speed'):
+        val = oc['ethtool-speed']
+        if val in ["10", "100", "1000"]:
+            settings += ['speed', val]
+        else:
+            log("Invalid value for ethtool-speed = %s. Must be 10|100|1000." % val)
+    if oc.has_key('ethtool-duplex'):
+        val = oc['ethtool-duplex']
+        if val in ["10", "100", "1000"]:
+            settings += ['duplex', 'val']
+        else:
+            log("Invalid value for ethtool-duplex = %s. Must be half|full." % val)
+    if oc.has_key('ethtool-autoneg'):
+        val = oc['ethtool-autoneg']
+        if val in ["true", "on"]:
+            settings += ['autoneg', 'on']
+        elif val in ["false", "off"]:
+            settings += ['autoneg', 'off']
+        else:
+            log("Invalid value for ethtool-autoneg = %s. Must be on|true|off|false." % val)
+    offload = []
+    for opt in ("rx", "tx", "sg", "tso", "ufo", "gso"):
+        if oc.has_key("ethtool-" + opt):
+            val = oc["ethtool-" + opt]
+            if val in ["true", "on"]:
+                offload += [opt, 'on']
+            elif val in ["false", "off"]:
+                offload += [opt, 'off']
+            else:
+                log("Invalid value for ethtool-%s = %s. Must be on|true|off|false." % (opt, val))
+    return settings,offload
+
+def mtu_setting(oc):
+    if oc.has_key('mtu'):
+        try:
+            int(oc['mtu'])      # Check that the value is an integer
+            return oc['mtu']
+        except ValueError, x:
+            log("Invalid value for mtu = %s" % oc['mtu'])
+    return None
+
+#
+# IP Network Devices -- network devices with IP configuration
+#
+def pif_ipdev_name(pif):
+    """Return the ipdev name associated with pif"""
+    pifrec = db().get_pif_record(pif)
+    nwrec = db().get_network_record(pifrec['network'])
+
+    if nwrec['bridge']:
+        # TODO: sanity check that nwrec['bridgeless'] != 'true'
+        return nwrec['bridge']
+    else:
+        # TODO: sanity check that nwrec['bridgeless'] == 'true'
+        return pif_netdev_name(pif)
+
+#
+# Bare Network Devices -- network devices without IP configuration
+#
+
+def netdev_exists(netdev):
+    return os.path.exists("/sys/class/net/" + netdev)
+
+def pif_netdev_name(pif):
+    """Get the netdev name for a PIF."""
+
+    pifrec = db().get_pif_record(pif)
+
+    if pif_is_vlan(pif):
+        return "%(device)s.%(VLAN)s" % pifrec
+    else:
+        return pifrec['device']
+
+#
+# Bonded PIFs
+#
+def pif_is_bond(pif):
+    pifrec = db().get_pif_record(pif)
+
+    return len(pifrec['bond_master_of']) > 0
+
+def pif_get_bond_masters(pif):
+    """Returns a list of PIFs which are bond masters of this PIF"""
+
+    pifrec = db().get_pif_record(pif)
+
+    bso = pifrec['bond_slave_of']
+
+    # bond-slave-of is currently a single reference but in principle a
+    # PIF could be a member of several bonds which are not
+    # concurrently attached. Be robust to this possibility.
+    if not bso or bso == "OpaqueRef:NULL":
+        bso = []
+    elif not type(bso) == list:
+        bso = [bso]
+
+    bondrecs = [db().get_bond_record(bond) for bond in bso]
+    bondrecs = [rec for rec in bondrecs if rec]
+
+    return [bond['master'] for bond in bondrecs]
+
+def pif_get_bond_slaves(pif):
+    """Returns a list of PIFs which make up the given bonded pif."""
+
+    pifrec = db().get_pif_record(pif)
+
+    bmo = pifrec['bond_master_of']
+    if len(bmo) > 1:
+        raise Error("Bond-master-of contains too many elements")
+
+    if len(bmo) == 0:
+        return []
+
+    bondrec = db().get_bond_record(bmo[0])
+    if not bondrec:
+        raise Error("No bond record for bond master PIF")
+
+    return bondrec['slaves']
+
+#
+# VLAN PIFs
+#
+
+def pif_is_vlan(pif):
+    return db().get_pif_record(pif)['VLAN'] != '-1'
+
+def pif_get_vlan_slave(pif):
+    """Find the PIF which is the VLAN slave of pif.
+
+Returns the 'physical' PIF underneath the a VLAN PIF @pif."""
+
+    pifrec = db().get_pif_record(pif)
+
+    vlan = pifrec['VLAN_master_of']
+    if not vlan or vlan == "OpaqueRef:NULL":
+        raise Error("PIF is not a VLAN master")
+
+    vlanrec = db().get_vlan_record(vlan)
+    if not vlanrec:
+        raise Error("No VLAN record found for PIF")
+
+    return vlanrec['tagged_PIF']
+
+def pif_get_vlan_masters(pif):
+    """Returns a list of PIFs which are VLANs on top of the given pif."""
+
+    pifrec = db().get_pif_record(pif)
+    vlans = [db().get_vlan_record(v) for v in pifrec['VLAN_slave_of']]
+    return [v['untagged_PIF'] for v in vlans if v and db().pif_exists(v['untagged_PIF'])]
+
+#
+# Datapath base class
+#
+
+class Datapath(object):
+    """Object encapsulating the actions necessary to (de)configure the
+       datapath for a given PIF. Does not include configuration of the
+       IP address on the ipdev.
+    """
+    
+    def __init__(self, pif):
+        self._pif = pif
+
+    def configure_ipdev(self, cfg):
+        """Write ifcfg TYPE field for an IPdev, plus any type specific
+           fields to cfg
+        """
+        raise NotImplementedError        
+
+    def preconfigure(self, parent):
+        """Prepare datapath configuration for PIF, but do not actually
+           apply any changes.
+
+           Any configuration files should be attached to parent.
+        """
+        raise NotImplementedError
+    
+    def bring_down_existing(self):
+        """Tear down any existing network device configuration which
+           needs to be undone in order to bring this PIF up.
+        """
+        raise NotImplementedError
+
+    def configure(self):
+        """Apply the configuration prepared in the preconfigure stage.
+
+           Should assume any configuration files changed attached in
+           the preconfigure stage are applied and bring up the
+           necesary devices to provide the datapath for the
+           PIF.
+
+           Should not bring up the IPdev.
+        """
+        raise NotImplementedError
+    
+    def post(self):
+        """Called after the IPdev has been brought up.
+
+           Should do any final setup, including reinstating any
+           devices which were taken down in the bring_down_existing
+           hook.
+        """
+        raise NotImplementedError
+
+    def bring_down(self):
+        """Tear down and deconfigure the datapath. Should assume the
+           IPdev has already been brought down.
+        """
+        raise NotImplementedError
+        
+def DatapathFactory(pif):
+    # XXX Need a datapath object for bridgeless PIFs
+
+    try:
+        network_conf = open("/etc/xensource/network.conf", 'r')
+        network_backend = network_conf.readline().strip()
+        network_conf.close()                
+    except Exception, e:
+        raise Error("failed to determine network backend:" + e)
+    
+    if network_backend == "bridge":
+        from InterfaceReconfigureBridge import DatapathBridge
+        return DatapathBridge(pif)
+    elif network_backend == "vswitch":
+        from InterfaceReconfigureVswitch import DatapathVswitch
+        return DatapathVswitch(pif)
+    else:
+        raise Error("unknown network backend %s" % network_backend)
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py
new file mode 100644 (file)
index 0000000..0fa9322
--- /dev/null
@@ -0,0 +1,497 @@
+# Copyright (c) 2008,2009 Citrix Systems, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; version 2.1 only. with the special
+# exception on linking described in file LICENSE.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+from InterfaceReconfigure import *
+
+import sys
+import time
+
+sysfs_bonding_masters = "/sys/class/net/bonding_masters"
+
+def open_pif_ifcfg(pif):
+    pifrec = db().get_pif_record(pif)
+
+    interface = pif_netdev_name(pif)
+    log("Configuring %s (%s)" % (interface, pifrec['MAC']))
+
+    f = ConfigurationFile("/etc/sysconfig/network-scripts/ifcfg-%s" % interface)
+
+    f.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \
+            (os.path.basename(f.path()), os.path.basename(sys.argv[0])))
+    f.write("XEMANAGED=yes\n")
+    f.write("DEVICE=%s\n" % interface)
+    f.write("ONBOOT=no\n")
+
+    return f
+
+#
+# Bare Network Devices -- network devices without IP configuration
+#
+
+def netdev_down(netdev):
+    """Bring down a bare network device"""
+    if not netdev_exists(netdev):
+        log("netdev: down: device %s does not exist, ignoring" % netdev)
+        return
+    run_command(["/sbin/ifdown", netdev])
+
+def netdev_up(netdev, mtu=None):
+    """Bring up a bare network device"""
+    #if not netdev_exists(netdev):
+    #    raise Error("netdev: up: device %s does not exist" % netdev)
+
+    run_command(["/sbin/ifup", netdev])
+
+#
+# Bonding driver
+#
+
+def load_bonding_driver():
+    log("Loading bonding driver")
+    run_command(["/sbin/modprobe", "bonding"])
+    try:
+        # bond_device_exists() uses the contents of sysfs_bonding_masters to work out which devices
+        # have already been created.  Unfortunately the driver creates "bond0" automatically at
+        # modprobe init.  Get rid of this now or our accounting will go wrong.
+        f = open(sysfs_bonding_masters, "w")
+        f.write("-bond0")
+        f.close()
+    except IOError, e:
+        log("Failed to load bonding driver: %s" % e)
+
+def bonding_driver_loaded():
+    lines = open("/proc/modules").read().split("\n")
+    modules = [line.split(" ")[0] for line in lines]
+    return "bonding" in modules
+
+def bond_device_exists(name):
+    f = open(sysfs_bonding_masters, "r")
+    bonds = f.readline().split()
+    f.close()
+    return name in bonds
+
+def __create_bond_device(name):
+
+    if not bonding_driver_loaded():
+        load_bonding_driver()
+
+    if bond_device_exists(name):
+        log("bond master %s already exists, not creating" % name)
+    else:
+        log("Creating bond master %s" % name)
+        try:
+            f = open(sysfs_bonding_masters, "w")
+            f.write("+" + name)
+            f.close()
+        except IOError, e:
+            log("Failed to create %s: %s" % (name, e))
+
+def create_bond_device(pif):
+    """Ensures that a bond master device exists in the kernel."""
+
+    if not pif_is_bond(pif):
+        return
+
+    __create_bond_device(pif_netdev_name(pif))
+
+def __destroy_bond_device(name):
+    if bond_device_exists(name):
+        retries = 10 # 10 * 0.5 seconds
+        while retries > 0:
+            retries = retries - 1
+            log("Destroying bond master %s (%d attempts remain)" % (name,retries))
+            try:
+                f = open(sysfs_bonding_masters, "w")
+                f.write("-" + name)
+                f.close()
+                retries = 0
+            except IOError, e:
+                time.sleep(0.5)
+    else:
+        log("bond master %s does not exist, not destroying" % name)
+
+def destroy_bond_device(pif):
+    """No, Mr. Bond, I expect you to die."""
+
+    pifrec = db().get_pif_record(pif)
+
+    if not pif_is_bond(pif):
+        return
+
+    # If the bonding module isn't loaded then do nothing.
+    if not os.access(sysfs_bonding_masters, os.F_OK):
+        return
+
+    name = pif_netdev_name(pif)
+
+    __destroy_bond_device(name)
+
+#
+# Bridges
+#
+
+def pif_is_bridged(pif):
+    pifrec = db().get_pif_record(pif)
+    nwrec = db().get_network_record(pifrec['network'])
+
+    if nwrec['bridge']:
+        # TODO: sanity check that nwrec['bridgeless'] != 'true'
+        return True
+    else:
+        # TODO: sanity check that nwrec['bridgeless'] == 'true'
+        return False
+
+def pif_bridge_name(pif):
+    """Return the bridge name of a pif.
+
+    PIF must be a bridged PIF."""
+    pifrec = db().get_pif_record(pif)
+
+    nwrec = db().get_network_record(pifrec['network'])
+
+    if nwrec['bridge']:
+        return nwrec['bridge']
+    else:
+        raise Error("PIF %(uuid)s does not have a bridge name" % pifrec)
+
+#
+# Bring Interface up/down.
+#
+
+def bring_down_interface(pif, destroy=False):
+    """Bring down the interface associated with PIF.
+
+    Brings down the given interface as well as any physical interfaces
+    which are bond slaves of this one. This is because they will be
+    required when the bond is brought up."""
+
+    def destroy_bridge(pif):
+        """Bring down the bridge associated with a PIF."""
+        #if not pif_is_bridged(pif):
+        #    return
+        bridge = pif_bridge_name(pif)
+        if not netdev_exists(bridge):
+            log("destroy_bridge: bridge %s does not exist, ignoring" % bridge)
+            return
+        log("Destroy bridge %s" % bridge)
+        netdev_down(bridge)
+        run_command(["/usr/sbin/brctl", "delbr", bridge])
+
+    def destroy_vlan(pif):
+        vlan = pif_netdev_name(pif)
+        if not netdev_exists(vlan):
+            log("vconfig del: vlan %s does not exist, ignoring" % vlan)
+            return
+        log("Destroy vlan device %s" % vlan)
+        run_command(["/sbin/vconfig", "rem", vlan])
+
+    if pif_is_vlan(pif):
+        interface = pif_netdev_name(pif)
+        log("bring_down_interface: %s is a VLAN" % interface)
+        netdev_down(interface)
+
+        if destroy:
+            destroy_vlan(pif)
+            destroy_bridge(pif)
+        else:
+            return
+
+        slave = pif_get_vlan_slave(pif)
+        if db().get_pif_record(slave)['currently_attached']:
+            log("bring_down_interface: vlan slave is currently attached")
+            return
+
+        masters = pif_get_vlan_masters(slave)
+        masters = [m for m in masters if m != pif and db().get_pif_record(m)['currently_attached']]
+        if len(masters) > 0:
+            log("bring_down_interface: vlan slave has other masters")
+            return
+
+        log("bring_down_interface: no more masters, bring down vlan slave %s" % pif_netdev_name(slave))
+        pif = slave
+    else:
+        vlan_masters = pif_get_vlan_masters(pif)
+        log("vlan masters of %s - %s" % (db().get_pif_record(pif)['device'], [pif_netdev_name(m) for m in vlan_masters]))
+        if len([m for m in vlan_masters if db().get_pif_record(m)['currently_attached']]) > 0:
+            log("Leaving %s up due to currently attached VLAN masters" % pif_netdev_name(pif))
+            return
+
+    # pif is now either a bond or a physical device which needs to be brought down
+
+    # Need to bring down bond slaves first since the bond device
+    # must be up to enslave/unenslave.
+    bond_slaves = pif_get_bond_slaves_sorted(pif)
+    log("bond slaves of %s - %s" % (db().get_pif_record(pif)['device'], [pif_netdev_name(s) for s in bond_slaves]))
+    for slave in bond_slaves:
+        slave_interface = pif_netdev_name(slave)
+        if db().get_pif_record(slave)['currently_attached']:
+            log("leave bond slave %s up (currently attached)" % slave_interface)
+            continue
+        log("bring down bond slave %s" % slave_interface)
+        netdev_down(slave_interface)
+        # Also destroy the bridge associated with the slave, since
+        # it will carry the MAC address and possibly an IP address
+        # leading to confusion.
+        destroy_bridge(slave)
+
+    interface = pif_netdev_name(pif)
+    log("Bring interface %s down" % interface)
+    netdev_down(interface)
+
+    if destroy:
+        destroy_bond_device(pif)
+        destroy_bridge(pif)
+
+def interface_is_up(pif):
+    try:
+        interface = pif_netdev_name(pif)
+        state = open("/sys/class/net/%s/operstate" % interface).read().strip()
+        return state == "up"
+    except:
+        return False # interface prolly doesn't exist
+
+def bring_up_interface(pif):
+    """Bring up the interface associated with a PIF.
+
+    Also bring up the interfaces listed in additional.
+    """
+
+    # VLAN on bond seems to need bond brought up explicitly, but VLAN
+    # on normal device does not. Might as well always bring it up.
+    if pif_is_vlan(pif):
+        slave = pif_get_vlan_slave(pif)
+        if not interface_is_up(slave):
+            bring_up_interface(slave)
+
+    interface = pif_netdev_name(pif)
+
+    create_bond_device(pif)
+
+    log("Bring interface %s up" % interface)
+    netdev_up(interface)
+
+
+#
+# Datapath topology configuration.
+#
+
+def _configure_physical_interface(pif):
+    """Write the configuration for a physical interface.
+
+    Writes the configuration file for the physical interface described by
+    the pif object.
+
+    Returns the open file handle for the interface configuration file.
+    """
+
+    pifrec = db().get_pif_record(pif)
+
+    f = open_pif_ifcfg(pif)
+
+    f.write("TYPE=Ethernet\n")
+    f.write("HWADDR=%(MAC)s\n" % pifrec)
+
+    settings,offload = ethtool_settings(pifrec['other_config'])
+    if len(settings):
+        f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings))
+    if len(offload):
+        f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload))
+
+    mtu = mtu_setting(pifrec['other_config'])
+    if mtu:
+        f.write("MTU=%s\n" % mtu)
+
+    return f
+
+def pif_get_bond_slaves_sorted(pif):
+    pifrec = db().get_pif_record(pif)
+
+    # build a list of slave's pifs
+    slave_pifs = pif_get_bond_slaves(pif)
+
+    # Ensure any currently attached slaves are listed in the opposite order to the order in
+    # which they were attached.  The first slave attached must be the last detached since
+    # the bond is using its MAC address.
+    try:
+        attached_slaves = open("/sys/class/net/%s/bonding/slaves" % pifrec['device']).readline().split()
+        for slave in attached_slaves:
+            pifs = [p for p in db().get_pifs_by_device(slave) if not pif_is_vlan(p)]
+            slave_pif = pifs[0]
+            slave_pifs.remove(slave_pif)
+            slave_pifs.insert(0, slave_pif)
+    except IOError:
+        pass
+
+    return slave_pifs
+
+def _configure_bond_interface(pif):
+    """Write the configuration for a bond interface.
+
+    Writes the configuration file for the bond interface described by
+    the pif object. Handles writing the configuration for the slave
+    interfaces.
+
+    Returns the open file handle for the bond interface configuration
+    file.
+    """
+
+    pifrec = db().get_pif_record(pif)
+
+    f = open_pif_ifcfg(pif)
+
+    if pifrec['MAC'] != "":
+        f.write("MACADDR=%s\n" % pifrec['MAC'])
+
+    for slave in pif_get_bond_slaves(pif):
+        s = _configure_physical_interface(slave)
+        s.write("MASTER=%(device)s\n" % pifrec)
+        s.write("SLAVE=yes\n")
+        s.close()
+        f.attach_child(s)
+
+    settings,offload = ethtool_settings(pifrec['other_config'])
+    if len(settings):
+        f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings))
+    if len(offload):
+        f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload))
+
+    mtu = mtu_setting(pifrec['other_config'])
+    if mtu:
+        f.write("MTU=%s\n" % mtu)
+
+    # The bond option defaults
+    bond_options = {
+        "mode":   "balance-slb",
+        "miimon": "100",
+        "downdelay": "200",
+        "updelay": "31000",
+        "use_carrier": "1",
+        }
+
+    # override defaults with values from other-config whose keys being with "bond-"
+    oc = pifrec['other_config']
+    overrides = filter(lambda (key,val): key.startswith("bond-"), oc.items())
+    overrides = map(lambda (key,val): (key[5:], val), overrides)
+    bond_options.update(overrides)
+
+    # write the bond options to ifcfg-bondX
+    f.write('BONDING_OPTS="')
+    for (name,val) in bond_options.items():
+        f.write("%s=%s " % (name,val))
+    f.write('"\n')
+    return f
+
+def _configure_vlan_interface(pif):
+    """Write the configuration for a VLAN interface.
+
+    Writes the configuration file for the VLAN interface described by
+    the pif object. Handles writing the configuration for the master
+    interface if necessary.
+
+    Returns the open file handle for the VLAN interface configuration
+    file.
+    """
+
+    slave = _configure_pif(pif_get_vlan_slave(pif))
+
+    pifrec = db().get_pif_record(pif)
+
+    f = open_pif_ifcfg(pif)
+    f.write("VLAN=yes\n")
+
+    settings,offload = ethtool_settings(pifrec['other_config'])
+    if len(settings):
+        f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings))
+    if len(offload):
+        f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload))
+
+    mtu = mtu_setting(pifrec['other_config'])
+    if mtu:
+        f.write("MTU=%s\n" % mtu)
+
+    f.attach_child(slave)
+
+    return f
+
+def _configure_pif(pif):
+    """Write the configuration for a PIF object.
+
+    Writes the configuration file the PIF and all dependent
+    interfaces (bond slaves and VLAN masters etc).
+
+    Returns the open file handle for the interface configuration file.
+    """
+
+    if pif_is_vlan(pif):
+        f = _configure_vlan_interface(pif)
+    elif pif_is_bond(pif):
+        f = _configure_bond_interface(pif)
+    else:
+        f = _configure_physical_interface(pif)
+
+    f.write("BRIDGE=%s\n" % pif_bridge_name(pif))
+    f.close()
+
+    return f
+
+#
+#
+#
+
+class DatapathBridge(Datapath):
+    def __init__(self, pif):
+        Datapath.__init__(self, pif)
+        log("Configured for Bridge datapath")
+
+    def configure_ipdev(self, cfg):
+        if pif_is_bridged(self._pif):
+            cfg.write("TYPE=Bridge\n")
+            cfg.write("DELAY=0\n")
+            cfg.write("STP=off\n")
+            cfg.write("PIFDEV=%s\n" % pif_netdev_name(self._pif))
+        else:
+            cfg.write("TYPE=Ethernet\n")
+        
+    def preconfigure(self, parent):
+        pf = _configure_pif(self._pif)
+        parent.attach_child(pf)
+
+    def bring_down_existing(self):
+        # Bring down any VLAN masters so that we can reconfigure the slave.
+        for master in pif_get_vlan_masters(self._pif):
+            name = pif_netdev_name(master)
+            log("action_up: bring down vlan master %s" % (name))
+            netdev_down(name)
+
+        # interface-reconfigure is never explicitly called to down a bond master.
+        # However, when we are called to up a slave it is implicit that we are destroying the master.
+        bond_masters = pif_get_bond_masters(self._pif)
+        for master in bond_masters:
+            log("action_up: bring down bond master %s" % (pif_netdev_name(master)))
+            # bring down master
+            bring_down_interface(master, destroy=True)
+
+        # No masters left - now its safe to reconfigure the slave.
+        bring_down_interface(self._pif)
+        
+    def configure(self):
+        bring_up_interface(self._pif)
+
+    def post(self):
+        # Bring back any currently-attached VLAN masters
+        for master in [v for v in pif_get_vlan_masters(self._pif) if db().get_pif_record(v)['currently_attached']]:
+            name = pif_netdev_name(master)
+            log("action_up: bring up %s" % (name))
+            netdev_up(name)
+
+    def bring_down(self):
+        bring_down_interface(self._pif, destroy=True)
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
new file mode 100644 (file)
index 0000000..c083859
--- /dev/null
@@ -0,0 +1,447 @@
+# Copyright (c) 2008,2009 Citrix Systems, Inc.
+# Copyright (c) 2009 Nicira Networks.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; version 2.1 only. with the special
+# exception on linking described in file LICENSE.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+from InterfaceReconfigure import *
+
+#
+# Bare Network Devices -- network devices without IP configuration
+#
+
+def netdev_down(netdev):
+    """Bring down a bare network device"""
+    if not netdev_exists(netdev):
+        log("netdev: down: device %s does not exist, ignoring" % netdev)
+        return
+    run_command(["/sbin/ifconfig", netdev, 'down'])
+
+def netdev_up(netdev, mtu=None):
+    """Bring up a bare network device"""
+    if not netdev_exists(netdev):
+        raise Error("netdev: up: device %s does not exist" % netdev)
+
+    if mtu:
+        mtu = ["mtu", mtu]
+    else:
+        mtu = []
+
+    run_command(["/sbin/ifconfig", netdev, 'up'] + mtu)
+
+#
+# Bridges
+#
+
+def pif_bridge_name(pif):
+    """Return the bridge name of a pif.
+
+    PIF must not be a VLAN and must be a bridged PIF."""
+
+    pifrec = db().get_pif_record(pif)
+
+    if pif_is_vlan(pif):
+        raise Error("PIF %(uuid)s cannot be a bridge, VLAN is %(VLAN)s" % pifrec)
+
+    nwrec = db().get_network_record(pifrec['network'])
+
+    if nwrec['bridge']:
+        return nwrec['bridge']
+    else:
+        raise Error("PIF %(uuid)s does not have a bridge name" % pifrec)
+
+#
+# PIF miscellanea
+#
+
+def pif_currently_in_use(pif):
+    """Determine if a PIF is currently in use.
+
+    A PIF is determined to be currently in use if
+    - PIF.currently-attached is true
+    - Any bond master is currently attached
+    - Any VLAN master is currently attached
+    """
+    rec = db().get_pif_record(pif)
+    if rec['currently_attached']:
+        log("configure_datapath: %s is currently attached" % (pif_netdev_name(pif)))
+        return True
+    for b in pif_get_bond_masters(pif):
+        if pif_currently_in_use(b):
+            log("configure_datapath: %s is in use by BOND master %s" % (pif_netdev_name(pif),pif_netdev_name(b)))
+            return True
+    for v in pif_get_vlan_masters(pif):
+        if pif_currently_in_use(v):
+            log("configure_datapath: %s is in use by VLAN master %s" % (pif_netdev_name(pif),pif_netdev_name(v)))
+            return True
+    return False
+
+#
+# Datapath Configuration
+#
+
+def pif_datapath(pif):
+    """Return the datapath PIF associated with PIF.
+For a non-VLAN PIF, the datapath name is the bridge name.
+For a VLAN PIF, the datapath name is the bridge name for the PIF's VLAN slave.
+"""
+    if pif_is_vlan(pif):
+        return pif_datapath(pif_get_vlan_slave(pif))
+
+    pifrec = db().get_pif_record(pif)
+    nwrec = db().get_network_record(pifrec['network'])
+    if not nwrec['bridge']:
+        return None
+    else:
+        return pif
+
+def datapath_get_physical_pifs(pif):
+    """Return the PIFs for the physical network device(s) associated with a datapath PIF.
+For a bond master PIF, these are the bond slave PIFs.
+For a non-VLAN, non-bond master PIF, the PIF is its own physical device PIF.
+
+A VLAN PIF cannot be a datapath PIF.
+"""
+    if pif_is_vlan(pif):
+        # Seems like overkill...
+        raise Error("get-physical-pifs should not get passed a VLAN")
+    elif pif_is_bond(pif):
+        return pif_get_bond_slaves(pif)
+    else:
+        return [pif]
+
+def datapath_deconfigure_physical(netdev):
+    return ['--', '--if-exists', 'del-port', netdev]
+
+def datapath_configure_bond(pif,slaves):
+    bridge = pif_bridge_name(pif)
+    pifrec = db.get_pif_record(pif)
+    interface = pif_netdev_name(pif)
+
+    argv = ['--', '--fake-iface', 'add-bond', bridge, interface]
+    for slave in slaves:
+        argv += [pif_netdev_name(slave)]
+
+    # XXX need ovs-vsctl support
+    #if pifrec['MAC'] != "":
+    #    argv += ['--add=port.%s.mac=%s' % (interface, pifrec['MAC'])]
+
+    # Bonding options.
+    bond_options = {
+        "mode":   "balance-slb",
+        "miimon": "100",
+        "downdelay": "200",
+        "updelay": "31000",
+        "use_carrier": "1",
+        }
+    # override defaults with values from other-config whose keys
+    # being with "bond-"
+    oc = pifrec['other_config']
+    overrides = filter(lambda (key,val):
+                           key.startswith("bond-"), oc.items())
+    overrides = map(lambda (key,val): (key[5:], val), overrides)
+    bond_options.update(overrides)
+    for (name,val) in bond_options.items():
+        # XXX need ovs-vsctl support for bond options
+        #argv += ["--add=bonding.%s.%s=%s" % (interface, name, val)]
+        pass
+    return argv
+
+def datapath_deconfigure_bond(netdev):
+    return ['--', '--if-exists', 'del-port', netdev]
+
+def datapath_deconfigure_ipdev(interface):
+    return ['--', '--if-exists', 'del-port', interface]
+
+def datapath_modify_config(commands):
+    if debug_mode():
+        log("modifying configuration:")
+        for c in commands:
+            log("  %s" % c)
+            
+    rc = run_command(['/usr/bin/ovs-vsctl'] + ['--timeout=20']
+                     + [c for c in commands if not c.startswith('#')])
+    if not rc:       
+        raise Error("Failed to modify vswitch configuration")
+    return True
+
+#
+# Toplevel Datapath Configuration.
+#
+
+def configure_datapath(pif):
+    """Bring up the datapath configuration for PIF.
+
+    Should be careful not to glitch existing users of the datapath, e.g. other VLANs etc.
+
+    Should take care of tearing down other PIFs which encompass common physical devices.
+
+    Returns a tuple containing
+    - A list containing the necessary vsctl command line arguments
+    - A list of additional devices which should be brought up after
+      the configuration is applied.
+    """
+
+    vsctl_argv = []
+    extra_up_ports = []
+
+    bridge = pif_bridge_name(pif)
+
+    physical_devices = datapath_get_physical_pifs(pif)
+
+    # Determine additional devices to deconfigure.
+    #
+    # Given all physical devices which are part of this PIF we need to
+    # consider:
+    # - any additional bond which a physical device is part of.
+    # - any additional physical devices which are part of an additional bond.
+    #
+    # Any of these which are not currently in use should be brought
+    # down and deconfigured.
+    extra_down_bonds = []
+    extra_down_ports = []
+    for p in physical_devices:
+        for bond in pif_get_bond_masters(p):
+            if bond == pif:
+                log("configure_datapath: leaving bond %s up" % pif_netdev_name(bond))
+                continue
+            if bond in extra_down_bonds:
+                continue
+            if db().get_pif_record(bond)['currently_attached']:
+                log("configure_datapath: implicitly tearing down currently-attached bond %s" % pif_netdev_name(bond))
+
+            extra_down_bonds += [bond]
+
+            for s in pif_get_bond_slaves(bond):
+                if s in physical_devices:
+                    continue
+                if s in extra_down_ports:
+                    continue
+                if pif_currently_in_use(s):
+                    continue
+                extra_down_ports += [s]
+
+    log("configure_datapath: bridge      - %s" % bridge)
+    log("configure_datapath: physical    - %s" % [pif_netdev_name(p) for p in physical_devices])
+    log("configure_datapath: extra ports - %s" % [pif_netdev_name(p) for p in extra_down_ports])
+    log("configure_datapath: extra bonds - %s" % [pif_netdev_name(p) for p in extra_down_bonds])
+
+    # Need to fully deconfigure any bridge which any of the:
+    # - physical devices
+    # - bond devices
+    # - sibling devices
+    # refers to
+    for brpif in physical_devices + extra_down_ports + extra_down_bonds:
+        if brpif == pif:
+            continue
+        b = pif_bridge_name(brpif)
+        #ifdown(b)
+        # XXX
+        netdev_down(b)
+        vsctl_argv += ['# remove bridge %s' % b]
+        vsctl_argv += ['--', '--if-exists', 'del-br', b]
+
+    for n in extra_down_ports:
+        dev = pif_netdev_name(n)
+        vsctl_argv += ['# deconfigure sibling physical device %s' % dev]
+        vsctl_argv += datapath_deconfigure_physical(dev)
+        netdev_down(dev)
+
+    for n in extra_down_bonds:
+        dev = pif_netdev_name(n)
+        vsctl_argv += ['# deconfigure bond device %s' % dev]
+        vsctl_argv += datapath_deconfigure_bond(dev)
+        netdev_down(dev)
+
+    for p in physical_devices:
+        dev = pif_netdev_name(p)
+        vsctl_argv += ['# deconfigure physical port %s' % dev]
+        vsctl_argv += datapath_deconfigure_physical(dev)
+
+    if len(physical_devices) > 1:
+        vsctl_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)]
+        vsctl_argv += datapath_deconfigure_bond(pif_netdev_name(pif))
+        vsctl_argv += ['# configure bond %s' % pif_netdev_name(pif)]
+        vsctl_argv += datapath_configure_bond(pif, physical_devices)
+        extra_up_ports += [pif_netdev_name(pif)]
+     else:
+        iface = pif_netdev_name(physical_devices[0])
+        vsctl_argv += ['# add physical device %s' % iface]
+        vsctl_argv += ['--', 'add-port', bridge, iface]
+
+    return vsctl_argv,extra_up_ports
+
+def deconfigure_datapath(pif):
+    vsctl_argv = []
+
+    bridge = pif_bridge_name(pif)
+
+    physical_devices = datapath_get_physical_pifs(pif)
+
+    log("deconfigure_datapath: bridge           - %s" % bridge)
+    log("deconfigure_datapath: physical devices - %s" % [pif_netdev_name(p) for p in physical_devices])
+
+    for p in physical_devices:
+        dev = pif_netdev_name(p)
+        vsctl_argv += ['# deconfigure physical port %s' % dev]
+        vsctl_argv += datapath_deconfigure_physical(dev)
+        netdev_down(dev)
+
+    if len(physical_devices) > 1:
+        vsctl_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)]
+        vsctl_argv += datapath_deconfigure_bond(pif_netdev_name(pif))
+
+    vsctl_argv += ['# deconfigure bridge %s' % bridge]
+    vsctl_argv += ['--', '--if-exists', 'del-br', bridge]
+
+    return vsctl_argv
+
+#
+#
+#
+
+class DatapathVswitch(Datapath):
+    def __init__(self, pif):
+        Datapath.__init__(self, pif)
+        self._dp = pif_datapath(pif)
+        self._ipdev = pif_ipdev_name(pif)
+
+        if pif_is_vlan(pif) and not self._dp:
+            raise Error("Unbridged VLAN devices not implemented yet")
+        
+        log("Configured for Vswitch datapath")
+
+    def configure_ipdev(self, cfg):
+        cfg.write("TYPE=Ethernet\n")
+
+    def preconfigure(self, parent):
+        vsctl_argv = []
+        extra_ports = []
+
+        pifrec = db().get_pif_record(self._pif)
+
+        ipdev = self._ipdev
+        bridge = pif_bridge_name(self._dp)
+        c,e = configure_datapath(self._dp)
+        vsctl_argv += c
+        extra_ports += e
+
+        if pif_is_vlan(pif):
+            datapath = pif_datapath(pif)
+            vsctl_argv += ['--', 'add-br', bridge, datapath, pifrec['VLAN']]
+        else:
+            vsctl_argv += ['--', 'add-br', bridge]
+
+        xs_network_uuids = []
+        for nwpif in db().get_pifs_by_device(db().get_pif_record(self._pif)['device']):
+            rec = db().get_pif_record(nwpif)
+
+            # When state is read from dbcache PIF.currently_attached
+            # is always assumed to be false... Err on the side of
+            # listing even detached networks for the time being.
+            #if nwpif != pif and not rec['currently_attached']:
+            #    log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid']))
+            #    continue
+            nwrec = db().get_network_record(rec['network'])
+            xs_network_uuids += [nwrec['uuid']]
+
+        vsctl_argv += ['# configure xs-network-uuids']
+        vsctl_argv += ['--', 'br-set-external-id', bridge,
+                'xs-network-uuids', ';'.join(xs_network_uuids)]
+
+        vsctl_argv += ["# deconfigure ipdev %s" % ipdev]
+        vsctl_argv += datapath_deconfigure_ipdev(ipdev)
+        vsctl_argv += ["# reconfigure ipdev %s" % ipdev]
+        vsctl_argv += ['--', 'add-port', bridge, ipdev]
+
+        # XXX Needs support in ovs-vsctl
+        #if bridge == ipdev:
+        #    vsctl_argv += ['--add=bridge.%s.mac=%s' % (bridge, pifrec['MAC'])]
+        #else:
+        #    vsctl_argv += ['--add=iface.%s.mac=%s' % (ipdev, pifrec['MAC'])]
+
+        self._vsctl_argv = vsctl_argv
+        self._extra_ports = extra_ports
+
+    def bring_down_existing(self):
+        pass
+
+    def configure(self):
+        # Bring up physical devices. ovs-vswitchd initially enables or
+        # disables bond slaves based on whether carrier is detected
+        # when they are added, and a network device that is down
+        # always reports "no carrier".
+        physical_devices = datapath_get_physical_pifs(self._dp)
+        
+        for p in physical_devices:
+            oc = db().get_pif_record(p)['other_config']
+
+            dev = pif_netdev_name(p)
+
+            mtu = mtu_setting(oc)
+
+            netdev_up(dev, mtu)
+
+            settings, offload = ethtool_settings(oc)
+            if len(settings):
+                run_command(['/sbin/ethtool', '-s', dev] + settings)
+            if len(offload):
+                run_command(['/sbin/ethtool', '-K', dev] + offload)
+
+        datapath_modify_config(self._vsctl_argv)
+
+    def post(self):
+        for p in self._extra_ports:
+            log("action_up: bring up %s" % p)
+            netdev_up(p)
+
+    def bring_down(self):
+        vsctl_argv = []
+
+        dp = self._dp
+        ipdev = self._ipdev
+        
+        bridge = pif_bridge_name(dp)
+
+        #nw = db().get_pif_record(self._pif)['network']
+        #nwrec = db().get_network_record(nw)
+        #vsctl_argv += ['# deconfigure xs-network-uuids']
+        #vsctl_argv += ['--del-entry=bridge.%s.xs-network-uuids=%s' % (bridge,nwrec['uuid'])]
+
+        log("deconfigure ipdev %s on %s" % (ipdev,bridge))
+        vsctl_argv += ["# deconfigure ipdev %s" % ipdev]
+        vsctl_argv += datapath_deconfigure_ipdev(ipdev)
+
+        if pif_is_vlan(self._pif):
+            # If the VLAN's slave is attached, leave datapath setup.
+            slave = pif_get_vlan_slave(self._pif)
+            if db().get_pif_record(slave)['currently_attached']:
+                log("action_down: vlan slave is currently attached")
+                dp = None
+
+            # If the VLAN's slave has other VLANs that are attached, leave datapath setup.
+            for master in pif_get_vlan_masters(slave):
+                if master != self._pif and db().get_pif_record(master)['currently_attached']:
+                    log("action_down: vlan slave has other master: %s" % pif_netdev_name(master))
+                    dp = None
+
+            # Otherwise, take down the datapath too (fall through)
+            if dp:
+                log("action_down: no more masters, bring down slave %s" % bridge)
+        else:
+            # Stop here if this PIF has attached VLAN masters.
+            masters = [db().get_pif_record(m)['VLAN'] for m in pif_get_vlan_masters(self._pif) if db().get_pif_record(m)['currently_attached']]
+            if len(masters) > 0:
+                log("Leaving datapath %s up due to currently attached VLAN masters %s" % (bridge, masters))
+                dp = None
+
+        if dp:
+            vsctl_argv += deconfigure_datapath(dp)
+            datapath_modify_config(vsctl_argv)
index f28ae74..a350952 100755 (executable)
@@ -1,7 +1,6 @@
 #!/usr/bin/python
 #
 # Copyright (c) 2008,2009 Citrix Systems, Inc.
-# Copyright (c) 2009 Nicira Networks.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published
 
     %(command-name)s <PIF> up
     %(command-name)s <PIF> down
-    %(command-name)s [<PIF>] rewrite
+    %(command-name)s rewrite
     %(command-name)s --force <BRIDGE> up
     %(command-name)s --force <BRIDGE> down
-    %(command-name)s --force <BRIDGE> rewrite --device=<INTERFACE> <CONFIG>
-    %(command-name)s --force all down
+    %(command-name)s --force <BRIDGE> rewrite --device=<INTERFACE> --mac=<MAC-ADDRESS> <CONFIG>
 
     where <PIF> is one of:
        --session <SESSION-REF> --pif <PIF-REF>
        --mode=static --ip=<IPADDR> --netmask=<NM> [--gateway=<GW>]
 
   Options:
-    --session          A session reference to use to access the xapi DB
+    --session           A session reference to use to access the xapi DB
     --pif               A PIF reference within the session.
     --pif-uuid          The UUID of a PIF.
     --force             An interface name.
 """
 
-#
-# Undocumented parameters for test & dev:
-#
-#  --output-directory=<DIR>    Write configuration to <DIR>. Also disables actually
-#                               raising/lowering the interfaces
-#
-#
-#
 # Notes:
 # 1. Every pif belongs to exactly one network
 # 2. Every network has zero or one pifs
 # 3. A network may have an associated bridge, allowing vifs to be attached
 # 4. A network may be bridgeless (there's no point having a bridge over a storage pif)
 
-import XenAPI
-import os, sys, getopt, time, signal
+from InterfaceReconfigure import *
+
+import os, sys, getopt
 import syslog
 import traceback
 import re
 import random
-from xml.dom.minidom import getDOMImplementation
-from xml.dom.minidom import parse as parseXML
 
-output_directory = None
-
-db = None
 management_pif = None
 
-vswitch_state_dir = "/var/lib/openvswitch/"
-dbcache_file = vswitch_state_dir + "dbcache"
+dbcache_file = "/var/xapi/network.dbcache"
 
 #
-# Debugging and Logging.
+# Logging.
 #
 
-def debug_mode():
-    return output_directory is not None
-
-def log(s):
-    if debug_mode():
-        print >>sys.stderr, s
-    else:
-        syslog.syslog(s)
-
 def log_pif_action(action, pif):
-    pifrec = db.get_pif_record(pif)
+    pifrec = db().get_pif_record(pif)
     rec = {}
     rec['uuid'] = pifrec['uuid']
     rec['ip_configuration_mode'] = pifrec['ip_configuration_mode']
@@ -91,15 +67,6 @@ def log_pif_action(action, pif):
     rec['message'] = "Bring %(action)s PIF %(uuid)s" % rec
     log("%(message)s: %(pif_netdev_name)s configured as %(ip_configuration_mode)s" % rec)
 
-
-def run_command(command):
-    log("Running command: " + ' '.join(command))
-    rc = os.spawnl(os.P_WAIT, command[0], *command)
-    if rc != 0:
-        log("Command failed %d: " % rc + ' '.join(command))
-        return False
-    return True
-
 #
 # Exceptions.
 #
@@ -109,521 +76,6 @@ class Usage(Exception):
         Exception.__init__(self)
         self.msg = msg
 
-class Error(Exception):
-    def __init__(self, msg):
-        Exception.__init__(self)
-        self.msg = msg
-
-#
-# Configuration File Handling.
-#
-
-class ConfigurationFile(object):
-    """Write a file, tracking old and new versions.
-
-    Supports writing a new version of a file and applying and
-    reverting those changes.
-    """
-
-    __STATE = {"OPEN":"OPEN",
-               "NOT-APPLIED":"NOT-APPLIED", "APPLIED":"APPLIED",
-               "REVERTED":"REVERTED", "COMMITTED": "COMMITTED"}
-
-    def __init__(self, fname, path="/etc/sysconfig/network-scripts"):
-
-        self.__state = self.__STATE['OPEN']
-        self.__fname = fname
-        self.__children = []
-
-        if debug_mode():
-            dirname = output_directory
-        else:
-            dirname = path
-
-        self.__path    = os.path.join(dirname, fname)
-        self.__oldpath = os.path.join(dirname, "." + fname + ".xapi-old")
-        self.__newpath = os.path.join(dirname, "." + fname + ".xapi-new")
-        self.__unlink = False
-
-        self.__f = open(self.__newpath, "w")
-
-    def attach_child(self, child):
-        self.__children.append(child)
-
-    def path(self):
-        return self.__path
-
-    def readlines(self):
-        try:
-            return open(self.path()).readlines()
-        except:
-            return ""
-
-    def write(self, args):
-        if self.__state != self.__STATE['OPEN']:
-            raise Error("Attempt to write to file in state %s" % self.__state)
-        self.__f.write(args)
-
-    def unlink(self):
-        if self.__state != self.__STATE['OPEN']:
-            raise Error("Attempt to unlink file in state %s" % self.__state)
-        self.__unlink = True
-        self.__f.close()
-        self.__state = self.__STATE['NOT-APPLIED']
-
-    def close(self):
-        if self.__state != self.__STATE['OPEN']:
-            raise Error("Attempt to close file in state %s" % self.__state)
-
-        self.__f.close()
-        self.__state = self.__STATE['NOT-APPLIED']
-
-    def changed(self):
-        if self.__state != self.__STATE['NOT-APPLIED']:
-            raise Error("Attempt to compare file in state %s" % self.__state)
-
-        return True
-
-    def apply(self):
-        if self.__state != self.__STATE['NOT-APPLIED']:
-            raise Error("Attempt to apply configuration from state %s" % self.__state)
-
-        for child in self.__children:
-            child.apply()
-
-        log("Applying changes to %s configuration" % self.__fname)
-
-        # Remove previous backup.
-        if os.access(self.__oldpath, os.F_OK):
-            os.unlink(self.__oldpath)
-
-        # Save current configuration.
-        if os.access(self.__path, os.F_OK):
-            os.link(self.__path, self.__oldpath)
-            os.unlink(self.__path)
-
-        # Apply new configuration.
-        assert(os.path.exists(self.__newpath))
-        if not self.__unlink:
-            os.link(self.__newpath, self.__path)
-        else:
-            pass # implicit unlink of original file
-
-        # Remove temporary file.
-        os.unlink(self.__newpath)
-
-        self.__state = self.__STATE['APPLIED']
-
-    def revert(self):
-        if self.__state != self.__STATE['APPLIED']:
-            raise Error("Attempt to revert configuration from state %s" % self.__state)
-
-        for child in self.__children:
-            child.revert()
-
-        log("Reverting changes to %s configuration" % self.__fname)
-
-        # Remove existing new configuration
-        if os.access(self.__newpath, os.F_OK):
-            os.unlink(self.__newpath)
-
-        # Revert new configuration.
-        if os.access(self.__path, os.F_OK):
-            os.link(self.__path, self.__newpath)
-            os.unlink(self.__path)
-
-        # Revert to old configuration.
-        if os.access(self.__oldpath, os.F_OK):
-            os.link(self.__oldpath, self.__path)
-            os.unlink(self.__oldpath)
-
-        # Leave .*.xapi-new as an aid to debugging.
-
-        self.__state = self.__STATE['REVERTED']
-
-    def commit(self):
-        if self.__state != self.__STATE['APPLIED']:
-            raise Error("Attempt to commit configuration from state %s" % self.__state)
-
-        for child in self.__children:
-            child.commit()
-
-        log("Committing changes to %s configuration" % self.__fname)
-
-        if os.access(self.__oldpath, os.F_OK):
-            os.unlink(self.__oldpath)
-        if os.access(self.__newpath, os.F_OK):
-            os.unlink(self.__newpath)
-
-        self.__state = self.__STATE['COMMITTED']
-
-#
-# Helper functions for encoding/decoding database attributes to/from XML.
-#
-
-def str_to_xml(xml, parent, tag, val):
-    e = xml.createElement(tag)
-    parent.appendChild(e)
-    v = xml.createTextNode(val)
-    e.appendChild(v)
-def str_from_xml(n):
-    def getText(nodelist):
-        rc = ""
-        for node in nodelist:
-            if node.nodeType == node.TEXT_NODE:
-                rc = rc + node.data
-        return rc
-    return getText(n.childNodes).strip()
-
-def bool_to_xml(xml, parent, tag, val):
-    if val:
-        str_to_xml(xml, parent, tag, "True")
-    else:
-        str_to_xml(xml, parent, tag, "False")
-def bool_from_xml(n):
-    s = str_from_xml(n)
-    if s == "True":
-        return True
-    elif s == "False":
-        return False
-    else:
-        raise Error("Unknown boolean value %s" % s)
-
-def strlist_to_xml(xml, parent, ltag, itag, val):
-    e = xml.createElement(ltag)
-    parent.appendChild(e)
-    for v in val:
-        c = xml.createElement(itag)
-        e.appendChild(c)
-        cv = xml.createTextNode(v)
-        c.appendChild(cv)
-def strlist_from_xml(n, ltag, itag):
-    ret = []
-    for n in n.childNodes:
-        if n.nodeName == itag:
-            ret.append(str_from_xml(n))
-    return ret
-
-def otherconfig_to_xml(xml, parent, val, attrs):
-    otherconfig = xml.createElement("other_config")
-    parent.appendChild(otherconfig)
-    for n,v in val.items():
-        if not n in attrs:
-            raise Error("Unknown other-config attribute: %s" % n)
-        str_to_xml(xml, otherconfig, n, v)
-def otherconfig_from_xml(n, attrs):
-    ret = {}
-    for n in n.childNodes:
-        if n.nodeName in attrs:
-            ret[n.nodeName] = str_from_xml(n)
-    return ret
-
-#
-# Definitions of the database objects (and their attributes) used by interface-reconfigure.
-#
-# Each object is defined by a dictionary mapping an attribute name in
-# the xapi database to a tuple containing two items:
-#  - a function which takes this attribute and encodes it as XML.
-#  - a function which takes XML and decocdes it into a value.
-#
-# other-config attributes are specified as a simple array of strings
-
-PIF_XML_TAG = "pif"
-VLAN_XML_TAG = "vlan"
-BOND_XML_TAG = "bond"
-NETWORK_XML_TAG = "network"
-
-ETHTOOL_OTHERCONFIG_ATTRS = ['ethtool-%s' % x for x in 'autoneg', 'speed', 'duplex', 'rx', 'tx', 'sg', 'tso', 'ufo', 'gso' ]
-
-PIF_OTHERCONFIG_ATTRS = [ 'domain', 'peerdns', 'defaultroute', 'mtu', 'static-routes' ] + \
-                        [ 'bond-%s' % x for x in 'mode', 'miimon', 'downdelay', 'updelay', 'use_carrier' ] + \
-                        ETHTOOL_OTHERCONFIG_ATTRS
-
-PIF_ATTRS = { 'uuid': (str_to_xml,str_from_xml),
-              'management': (bool_to_xml,bool_from_xml),
-              'network': (str_to_xml,str_from_xml),
-              'device': (str_to_xml,str_from_xml),
-              'bond_master_of': (lambda x, p, t, v: strlist_to_xml(x, p, 'bond_master_of', 'slave', v),
-                                 lambda n: strlist_from_xml(n, 'bond_master_of', 'slave')),
-              'bond_slave_of': (str_to_xml,str_from_xml),
-              'VLAN': (str_to_xml,str_from_xml),
-              'VLAN_master_of': (str_to_xml,str_from_xml),
-              'VLAN_slave_of': (lambda x, p, t, v: strlist_to_xml(x, p, 'VLAN_slave_of', 'master', v),
-                                lambda n: strlist_from_xml(n, 'VLAN_slave_Of', 'master')),
-              'ip_configuration_mode': (str_to_xml,str_from_xml),
-              'IP': (str_to_xml,str_from_xml),
-              'netmask': (str_to_xml,str_from_xml),
-              'gateway': (str_to_xml,str_from_xml),
-              'DNS': (str_to_xml,str_from_xml),
-              'MAC': (str_to_xml,str_from_xml),
-              'other_config': (lambda x, p, t, v: otherconfig_to_xml(x, p, v, PIF_OTHERCONFIG_ATTRS),
-                               lambda n: otherconfig_from_xml(n, PIF_OTHERCONFIG_ATTRS)),
-
-              # Special case: We write the current value
-              # PIF.currently-attached to the cache but since it will
-              # not be valid when we come to use the cache later
-              # (i.e. after a reboot) we always read it as False.
-              'currently_attached': (bool_to_xml, lambda n: False),
-            }
-
-VLAN_ATTRS = { 'uuid': (str_to_xml,str_from_xml),
-               'tagged_PIF': (str_to_xml,str_from_xml),
-               'untagged_PIF': (str_to_xml,str_from_xml),
-             }
-
-BOND_ATTRS = { 'uuid': (str_to_xml,str_from_xml),
-               'master': (str_to_xml,str_from_xml),
-               'slaves': (lambda x, p, t, v: strlist_to_xml(x, p, 'slaves', 'slave', v),
-                          lambda n: strlist_from_xml(n, 'slaves', 'slave')),
-             }
-
-NETWORK_OTHERCONFIG_ATTRS = [ 'mtu', 'static-routes' ] + ETHTOOL_OTHERCONFIG_ATTRS
-
-NETWORK_ATTRS = { 'uuid': (str_to_xml,str_from_xml),
-                  'bridge': (str_to_xml,str_from_xml),
-                  'PIFs': (lambda x, p, t, v: strlist_to_xml(x, p, 'PIFs', 'PIF', v),
-                           lambda n: strlist_from_xml(n, 'PIFs', 'PIF')),
-                  'other_config': (lambda x, p, t, v: otherconfig_to_xml(x, p, v, NETWORK_OTHERCONFIG_ATTRS),
-                                   lambda n: otherconfig_from_xml(n, NETWORK_OTHERCONFIG_ATTRS)),
-                }
-
-class DatabaseCache(object):
-    def __read_xensource_inventory(self):
-        filename = "/etc/xensource-inventory"
-        f = open(filename, "r")
-        lines = [x.strip("\n") for x in f.readlines()]
-        f.close()
-
-        defs = [ (l[:l.find("=")], l[(l.find("=") + 1):]) for l in lines ]
-        defs = [ (a, b.strip("'")) for (a,b) in defs ]
-
-        return dict(defs)
-    def __pif_on_host(self,pif):
-        return self.__pifs.has_key(pif)
-
-    def __get_pif_records_from_xapi(self, session, host):
-        self.__pifs = {}
-        for (p,rec) in session.xenapi.PIF.get_all_records().items():
-            if rec['host'] != host:
-                continue
-            self.__pifs[p] = {}
-            for f in PIF_ATTRS:
-                self.__pifs[p][f] = rec[f]
-            self.__pifs[p]['other_config'] = {}
-            for f in PIF_OTHERCONFIG_ATTRS:
-                if not rec['other_config'].has_key(f): continue
-                self.__pifs[p]['other_config'][f] = rec['other_config'][f]
-
-    def __get_vlan_records_from_xapi(self, session):
-        self.__vlans = {}
-        for v in session.xenapi.VLAN.get_all():
-            rec = session.xenapi.VLAN.get_record(v)
-            if not self.__pif_on_host(rec['untagged_PIF']):
-                continue
-            self.__vlans[v] = {}
-            for f in VLAN_ATTRS:
-                self.__vlans[v][f] = rec[f]
-
-    def __get_bond_records_from_xapi(self, session):
-        self.__bonds = {}
-        for b in session.xenapi.Bond.get_all():
-            rec = session.xenapi.Bond.get_record(b)
-            if not self.__pif_on_host(rec['master']):
-                continue
-            self.__bonds[b] = {}
-            for f in BOND_ATTRS:
-                self.__bonds[b][f] = rec[f]
-
-    def __get_network_records_from_xapi(self, session):
-        self.__networks = {}
-        for n in session.xenapi.network.get_all():
-            rec = session.xenapi.network.get_record(n)
-            self.__networks[n] = {}
-            for f in NETWORK_ATTRS:
-                if f == "PIFs":
-                    # drop PIFs on other hosts
-                    self.__networks[n][f] = [p for p in rec[f] if self.__pif_on_host(p)]
-                else:
-                    self.__networks[n][f] = rec[f]
-            self.__networks[n]['other_config'] = {}
-            for f in NETWORK_OTHERCONFIG_ATTRS:
-                if not rec['other_config'].has_key(f): continue
-                self.__networks[n]['other_config'][f] = rec['other_config'][f]
-
-    def __to_xml(self, xml, parent, key, ref, rec, attrs):
-        """Encode a database object as XML"""
-        e = xml.createElement(key)
-        parent.appendChild(e)
-        if ref:
-            e.setAttribute('ref', ref)
-
-        for n,v in rec.items():
-            if attrs.has_key(n):
-                h,_ = attrs[n]
-                h(xml, e, n, v)
-            else:
-                raise Error("Unknown attribute %s" % n)
-    def __from_xml(self, e, attrs):
-        """Decode a database object from XML"""
-        ref = e.attributes['ref'].value
-        rec = {}
-        for n in e.childNodes:
-            if n.nodeName in attrs:
-                _,h = attrs[n.nodeName]
-                rec[n.nodeName] = h(n)
-        return (ref,rec)
-
-    def __init__(self, session_ref=None, cache_file=None):
-        if session_ref and cache_file:
-            raise Error("can't specify session reference and cache file")
-        if cache_file == None:
-            session = XenAPI.xapi_local()
-
-            if not session_ref:
-                log("No session ref given on command line, logging in.")
-                session.xenapi.login_with_password("root", "")
-            else:
-                session._session = session_ref
-
-            try:
-
-                inventory = self.__read_xensource_inventory()
-                assert(inventory.has_key('INSTALLATION_UUID'))
-                log("host uuid is %s" % inventory['INSTALLATION_UUID'])
-
-                host = session.xenapi.host.get_by_uuid(inventory['INSTALLATION_UUID'])
-
-                self.__get_pif_records_from_xapi(session, host)
-
-                self.__get_vlan_records_from_xapi(session)
-                self.__get_bond_records_from_xapi(session)
-                self.__get_network_records_from_xapi(session)
-            finally:
-                if not session_ref:
-                    session.xenapi.session.logout()
-        else:
-            log("Loading xapi database cache from %s" % cache_file)
-
-            xml = parseXML(cache_file)
-
-            self.__pifs = {}
-            self.__bonds = {}
-            self.__vlans = {}
-            self.__networks = {}
-
-            assert(len(xml.childNodes) == 1)
-            toplevel = xml.childNodes[0]
-
-            assert(toplevel.nodeName == "xenserver-network-configuration")
-
-            for n in toplevel.childNodes:
-                if n.nodeName == "#text":
-                    pass
-                elif n.nodeName == PIF_XML_TAG:
-                    (ref,rec) = self.__from_xml(n, PIF_ATTRS)
-                    self.__pifs[ref] = rec
-                elif n.nodeName == BOND_XML_TAG:
-                    (ref,rec) = self.__from_xml(n, BOND_ATTRS)
-                    self.__bonds[ref] = rec
-                elif n.nodeName == VLAN_XML_TAG:
-                    (ref,rec) = self.__from_xml(n, VLAN_ATTRS)
-                    self.__vlans[ref] = rec
-                elif n.nodeName == NETWORK_XML_TAG:
-                    (ref,rec) = self.__from_xml(n, NETWORK_ATTRS)
-                    self.__networks[ref] = rec
-                else:
-                    raise Error("Unknown XML element %s" % n.nodeName)
-
-    def save(self, cache_file):
-
-        xml = getDOMImplementation().createDocument(
-            None, "xenserver-network-configuration", None)
-        for (ref,rec) in self.__pifs.items():
-            self.__to_xml(xml, xml.documentElement, PIF_XML_TAG, ref, rec, PIF_ATTRS)
-        for (ref,rec) in self.__bonds.items():
-            self.__to_xml(xml, xml.documentElement, BOND_XML_TAG, ref, rec, BOND_ATTRS)
-        for (ref,rec) in self.__vlans.items():
-            self.__to_xml(xml, xml.documentElement, VLAN_XML_TAG, ref, rec, VLAN_ATTRS)
-        for (ref,rec) in self.__networks.items():
-            self.__to_xml(xml, xml.documentElement, NETWORK_XML_TAG, ref, rec,
-                          NETWORK_ATTRS)
-
-        f = open(cache_file, 'w')
-        f.write(xml.toprettyxml())
-        f.close()
-
-    def get_pif_by_uuid(self, uuid):
-        pifs = map(lambda (ref,rec): ref,
-                  filter(lambda (ref,rec): uuid == rec['uuid'],
-                         self.__pifs.items()))
-        if len(pifs) == 0:
-            raise Error("Unknown PIF \"%s\"" % uuid)
-        elif len(pifs) > 1:
-            raise Error("Non-unique PIF \"%s\"" % uuid)
-
-        return pifs[0]
-
-    def get_pifs_by_device(self, device):
-        return map(lambda (ref,rec): ref,
-                   filter(lambda (ref,rec): rec['device'] == device,
-                          self.__pifs.items()))
-
-    def get_pif_by_bridge(self, bridge):
-        networks = map(lambda (ref,rec): ref,
-                       filter(lambda (ref,rec): rec['bridge'] == bridge,
-                              self.__networks.items()))
-        if len(networks) == 0:
-            raise Error("No matching network \"%s\"" % bridge)
-
-        answer = None
-        for network in networks:
-            nwrec = self.get_network_record(network)
-            for pif in nwrec['PIFs']:
-                pifrec = self.get_pif_record(pif)
-                if answer:
-                    raise Error("Multiple PIFs on host for network %s" % (bridge))
-                answer = pif
-        if not answer:
-            raise Error("No PIF on host for network %s" % (bridge))
-        return answer
-
-    def get_pif_record(self, pif):
-        if self.__pifs.has_key(pif):
-            return self.__pifs[pif]
-        raise Error("Unknown PIF \"%s\" (get_pif_record)" % pif)
-    def get_all_pifs(self):
-        return self.__pifs
-    def pif_exists(self, pif):
-        return self.__pifs.has_key(pif)
-
-    def get_management_pif(self):
-        """ Returns the management pif on host
-        """
-        all = self.get_all_pifs()
-        for pif in all:
-            pifrec = self.get_pif_record(pif)
-            if pifrec['management']: return pif
-        return None
-
-    def get_network_record(self, network):
-        if self.__networks.has_key(network):
-            return self.__networks[network]
-        raise Error("Unknown network \"%s\"" % network)
-    def get_all_networks(self):
-        return self.__networks
-
-    def get_bond_record(self, bond):
-        if self.__bonds.has_key(bond):
-            return self.__bonds[bond]
-        else:
-            return None
-
-    def get_vlan_record(self, vlan):
-        if self.__vlans.has_key(vlan):
-            return self.__vlans[vlan]
-        else:
-            return None
-
 #
 # Boot from Network filesystem or device.
 #
@@ -634,7 +86,7 @@ def check_allowed(pif):
     Used to prevent system PIFs (such as network root disk) from being interfered with.
     """
 
-    pifrec = db.get_pif_record(pif)
+    pifrec = db().get_pif_record(pif)
     try:
         f = open("/proc/ardence")
         macline = filter(lambda x: x.startswith("HWaddr:"), f.readlines())
@@ -652,48 +104,12 @@ def check_allowed(pif):
 # Bare Network Devices -- network devices without IP configuration
 #
 
-def netdev_exists(netdev):
-    return os.path.exists("/sys/class/net/" + netdev)
-
-def pif_netdev_name(pif):
-    """Get the netdev name for a PIF."""
-
-    pifrec = db.get_pif_record(pif)
-
-    if pif_is_vlan(pif):
-        return "%(device)s.%(VLAN)s" % pifrec
-    else:
-        return pifrec['device']
-
-def netdev_down(netdev):
-    """Bring down a bare network device"""
-    if debug_mode():
-        return
-    if not netdev_exists(netdev):
-        log("netdev: down: device %s does not exist, ignoring" % netdev)
-        return
-    run_command(["/sbin/ifconfig", netdev, 'down'])
-
-def netdev_up(netdev, mtu=None):
-    """Bring up a bare network device"""
-    if debug_mode():
-        return
-    if not netdev_exists(netdev):
-        raise Error("netdev: up: device %s does not exist" % netdev)
-
-    if mtu:
-        mtu = ["mtu", mtu]
-    else:
-        mtu = []
-        
-    run_command(["/sbin/ifconfig", netdev, 'up'] + mtu)
-
 def netdev_remap_name(pif, already_renamed=[]):
     """Check whether 'pif' exists and has the correct MAC.
     If not, try to find a device with the correct MAC and rename it.
     'already_renamed' is used to avoid infinite recursion.
     """
-    
+
     def read1(name):
         file = None
         try:
@@ -731,7 +147,7 @@ def netdev_remap_name(pif, already_renamed=[]):
         if not run_command(['/sbin/ip', 'link', 'set', old_name, 'name', new_name]):
             raise Error("Could not rename %s to %s" % (old_name, new_name))
 
-    pifrec = db.get_pif_record(pif)
+    pifrec = db().get_pif_record(pif)
     device = pifrec['device']
     mac = pifrec['MAC']
 
@@ -764,205 +180,39 @@ def netdev_remap_name(pif, already_renamed=[]):
 # IP Network Devices -- network devices with IP configuration
 #
 
-def pif_ipdev_name(pif):
-    """Return the ipdev name associated with pif"""
-    pifrec = db.get_pif_record(pif)
-    nwrec = db.get_network_record(pifrec['network'])
-
-    if nwrec['bridge']:
-        # TODO: sanity check that nwrec['bridgeless'] != 'true'
-        return nwrec['bridge']
-    else:
-        # TODO: sanity check that nwrec['bridgeless'] == 'true'
-        return pif_netdev_name(pif)
-
 def ifdown(netdev):
     """Bring down a network interface"""
-    if debug_mode():
-        return
     if not netdev_exists(netdev):
         log("ifdown: device %s does not exist, ignoring" % netdev)
         return
     if not os.path.exists("/etc/sysconfig/network-scripts/ifcfg-%s" % netdev):
-        log("ifdown: device %s exists but ifcfg %s does not" % (netdev,netdev))
-        netdev_down(netdev)
+        log("ifdown: device %s exists but ifcfg-%s does not" % (netdev,netdev))
+        run_command(["/sbin/ifconfig", netdev, 'down'])
+        return
     run_command(["/sbin/ifdown", netdev])
 
 def ifup(netdev):
     """Bring up a network interface"""
-    if debug_mode():
-        return
-    if not netdev_exists(netdev):
-        raise Error("ifup: device %s does not exist, ignoring" % netdev)
     if not os.path.exists("/etc/sysconfig/network-scripts/ifcfg-%s" % netdev):
         raise Error("ifup: device %s exists but ifcfg-%s does not" % (netdev,netdev))
     run_command(["/sbin/ifup", netdev])
 
 #
-# Bridges
+#
 #
 
-def pif_bridge_name(pif):
-    """Return the bridge name of a pif.
-
-    PIF must not be a VLAN and must be a bridged PIF."""
-
-    pifrec = db.get_pif_record(pif)
+def pif_rename_physical_devices(pif):
 
     if pif_is_vlan(pif):
-        raise Error("PIF %(uuid)s cannot be a bridge, VLAN is %(VLAN)s" % pifrec)
-        
-    nwrec = db.get_network_record(pifrec['network'])
+        pif = pif_get_vlan_slave(pif)
 
-    if nwrec['bridge']:
-        return nwrec['bridge']
+    if pif_is_bond(pif):
+        pifs = pif_get_bond_slaves(pif)
     else:
-        raise Error("PIF %(uuid)s does not have a bridge name" % pifrec)
-
-#
-# PIF miscellanea
-#
+        pifs = [pif]
 
-def pif_currently_in_use(pif):
-    """Determine if a PIF is currently in use.
-
-    A PIF is determined to be currently in use if
-    - PIF.currently-attached is true
-    - Any bond master is currently attached
-    - Any VLAN master is currently attached
-    """
-    rec = db.get_pif_record(pif)
-    if rec['currently_attached']:
-        log("configure_datapath: %s is currently attached" % (pif_netdev_name(pif)))
-        return True
-    for b in pif_get_bond_masters(pif):
-        if pif_currently_in_use(b):
-            log("configure_datapath: %s is in use by BOND master %s" % (pif_netdev_name(pif),pif_netdev_name(b)))
-            return True
-    for v in pif_get_vlan_masters(pif):
-        if pif_currently_in_use(v):
-            log("configure_datapath: %s is in use by VLAN master %s" % (pif_netdev_name(pif),pif_netdev_name(v)))
-            return True
-    return False
-
-#
-#
-#
-
-def ethtool_settings(oc):
-    settings = []
-    if oc.has_key('ethtool-speed'):
-        val = oc['ethtool-speed']
-        if val in ["10", "100", "1000"]:
-            settings += ['speed', val]
-        else:
-            log("Invalid value for ethtool-speed = %s. Must be 10|100|1000." % val)
-    if oc.has_key('ethtool-duplex'):
-        val = oc['ethtool-duplex']
-        if val in ["10", "100", "1000"]:
-            settings += ['duplex', 'val']
-        else:
-            log("Invalid value for ethtool-duplex = %s. Must be half|full." % val)
-    if oc.has_key('ethtool-autoneg'):
-        val = oc['ethtool-autoneg']
-        if val in ["true", "on"]:
-            settings += ['autoneg', 'on']
-        elif val in ["false", "off"]:
-            settings += ['autoneg', 'off']
-        else:
-            log("Invalid value for ethtool-autoneg = %s. Must be on|true|off|false." % val)
-    offload = []
-    for opt in ("rx", "tx", "sg", "tso", "ufo", "gso"):
-        if oc.has_key("ethtool-" + opt):
-            val = oc["ethtool-" + opt]
-            if val in ["true", "on"]:
-                offload += [opt, 'on']
-            elif val in ["false", "off"]:
-                offload += [opt, 'off']
-            else:
-                log("Invalid value for ethtool-%s = %s. Must be on|true|off|false." % (opt, val))
-    return settings,offload
-
-def mtu_setting(oc):
-    if oc.has_key('mtu'):
-        try:
-            int(oc['mtu'])      # Check that the value is an integer
-            return oc['mtu']
-        except ValueError, x:
-            log("Invalid value for mtu = %s" % oc['mtu'])
-    return None
-
-#
-# Bonded PIFs
-#
-def pif_get_bond_masters(pif):
-    """Returns a list of PIFs which are bond masters of this PIF"""
-
-    pifrec = db.get_pif_record(pif)
-
-    bso = pifrec['bond_slave_of']
-
-    # bond-slave-of is currently a single reference but in principle a
-    # PIF could be a member of several bonds which are not
-    # concurrently attached. Be robust to this possibility.
-    if not bso or bso == "OpaqueRef:NULL":
-        bso = []
-    elif not type(bso) == list:
-        bso = [bso]
-
-    bondrecs = [db.get_bond_record(bond) for bond in bso]
-    bondrecs = [rec for rec in bondrecs if rec]
-
-    return [bond['master'] for bond in bondrecs]
-
-def pif_get_bond_slaves(pif):
-    """Returns a list of PIFs which make up the given bonded pif."""
-
-    pifrec = db.get_pif_record(pif)
-
-    bmo = pifrec['bond_master_of']
-    if len(bmo) > 1:
-        raise Error("Bond-master-of contains too many elements")
-
-    if len(bmo) == 0:
-        return []
-
-    bondrec = db.get_bond_record(bmo[0])
-    if not bondrec:
-        raise Error("No bond record for bond master PIF")
-
-    return bondrec['slaves']
-
-#
-# VLAN PIFs
-#
-
-def pif_is_vlan(pif):
-    return db.get_pif_record(pif)['VLAN'] != '-1'
-
-def pif_get_vlan_slave(pif):
-    """Find the PIF which is the VLAN slave of pif.
-
-Returns the 'physical' PIF underneath the a VLAN PIF @pif."""
-
-    pifrec = db.get_pif_record(pif)
-
-    vlan = pifrec['VLAN_master_of']
-    if not vlan or vlan == "OpaqueRef:NULL":
-        raise Error("PIF is not a VLAN master")
-
-    vlanrec = db.get_vlan_record(vlan)
-    if not vlanrec:
-        raise Error("No VLAN record found for PIF")
-
-    return vlanrec['tagged_PIF']
-
-def pif_get_vlan_masters(pif):
-    """Returns a list of PIFs which are VLANs on top of the given pif."""
-
-    pifrec = db.get_pif_record(pif)
-    vlans = [db.get_vlan_record(v) for v in pifrec['VLAN_slave_of']]
-    return [v['untagged_PIF'] for v in vlans if v and db.pif_exists(v['untagged_PIF'])]
+    for pif in pifs:
+        netdev_remap_name(pif)
 
 #
 # IP device configuration
@@ -981,7 +231,6 @@ def ipdev_configure_static_routes(interface, oc, f):
           172.16.0.0/15 via 192.168.0.3 dev xenbr1
           172.18.0.0/16 via 192.168.0.4 dev xenbr1
     """
-    fname = "route-%s" % interface
     if oc.has_key('static-routes'):
         # The key is present - extract comma seperates entries
         lines = oc['static-routes'].split(',')
@@ -989,7 +238,7 @@ def ipdev_configure_static_routes(interface, oc, f):
         # The key is not present, i.e. there are no static routes
         lines = []
 
-    child = ConfigurationFile(fname)
+    child = ConfigurationFile("/etc/sysconfig/network-scripts/route-%s" % interface)
     child.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \
             (os.path.basename(child.path()), os.path.basename(sys.argv[0])))
 
@@ -1009,7 +258,7 @@ def ipdev_open_ifcfg(pif):
 
     log("Writing network configuration for %s" % ipdev)
 
-    f = ConfigurationFile("ifcfg-%s" % ipdev)
+    f = ConfigurationFile("/etc/sysconfig/network-scripts/ifcfg-%s" % ipdev)
 
     f.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \
             (os.path.basename(f.path()), os.path.basename(sys.argv[0])))
@@ -1019,7 +268,7 @@ def ipdev_open_ifcfg(pif):
 
     return f
 
-def ipdev_configure_network(pif):
+def ipdev_configure_network(pif, dp):
     """Write the configuration file for a network.
 
     Writes configuration derived from the network object into the relevant
@@ -1031,11 +280,11 @@ def ipdev_configure_network(pif):
 
     params:
         pif:  Opaque_ref of pif
-        f :   ConfigurationFile(/path/to/ifcfg) to which we append network configuration
+        dp:   Datapath object
     """
 
-    pifrec = db.get_pif_record(pif)
-    nwrec = db.get_network_record(pifrec['network'])
+    pifrec = db().get_pif_record(pif)
+    nwrec = db().get_network_record(pifrec['network'])
 
     ipdev = pif_ipdev_name(pif)
 
@@ -1048,7 +297,8 @@ def ipdev_configure_network(pif):
     if pifrec.has_key('other_config'):
         oc = pifrec['other_config']
 
-    f.write("TYPE=Ethernet\n")
+    dp.configure_ipdev(f)
+
     if pifrec['ip_configuration_mode'] == "DHCP":
         f.write("BOOTPROTO=dhcp\n")
         f.write("PERSISTENT_DHCLIENT=yes\n")
@@ -1081,8 +331,7 @@ def ipdev_configure_network(pif):
     if oc and oc.has_key('domain'):
         f.write("DOMAIN='%s'\n" % oc['domain'].replace(',', ' '))
 
-    # We only allow one ifcfg-* to have PEERDNS=yes and there can be
-    # only one GATEWAYDEV in /etc/sysconfig/network.
+    # There can be only one DNSDEV and one GATEWAYDEV in /etc/sysconfig/network.
     #
     # The peerdns pif will be the one with
     # pif::other-config:peerdns=true, or the mgmt pif if none have
@@ -1092,37 +341,35 @@ def ipdev_configure_network(pif):
     # pif::other-config:defaultroute=true, or the mgmt pif if none
     # have this set.
 
-    # Work out which pif on this host should be the one with
-    # PEERDNS=yes and which should be the GATEWAYDEV
+    # Work out which pif on this host should be the DNSDEV and which
+    # should be the GATEWAYDEV
     #
-    # Note: we prune out the bond master pif (if it exists).  This is
+    # Note: we prune out the bond master pif (if it exists). This is
     # because when we are called to bring up an interface with a bond
     # master, it is implicit that we should bring down that master.
-    pifs_on_host = [ __pif for __pif in db.get_all_pifs() if
-                     not __pif in pif_get_bond_masters(pif) ]
-    other_pifs_on_host = [ __pif for __pif in pifs_on_host if __pif != pif ]
 
-    peerdns_pif = None
-    defaultroute_pif = None
+    pifs_on_host = [p for p in db().get_all_pifs() if not p in pif_get_bond_masters(pif)]
 
     # loop through all the pifs on this host looking for one with
     #   other-config:peerdns = true, and one with
     #   other-config:default-route=true
+    peerdns_pif = None
+    defaultroute_pif = None
     for __pif in pifs_on_host:
-        __pifrec = db.get_pif_record(__pif)
+        __pifrec = db().get_pif_record(__pif)
         __oc = __pifrec['other_config']
         if __oc.has_key('peerdns') and __oc['peerdns'] == 'true':
             if peerdns_pif == None:
                 peerdns_pif = __pif
             else:
                 log('Warning: multiple pifs with "peerdns=true" - choosing %s and ignoring %s' % \
-                        (db.get_pif_record(peerdns_pif)['device'], __pifrec['device']))
+                        (db().get_pif_record(peerdns_pif)['device'], __pifrec['device']))
         if __oc.has_key('defaultroute') and __oc['defaultroute'] == 'true':
             if defaultroute_pif == None:
                 defaultroute_pif = __pif
             else:
                 log('Warning: multiple pifs with "defaultroute=true" - choosing %s and ignoring %s' % \
-                        (db.get_pif_record(defaultroute_pif)['device'], __pifrec['device']))
+                        (db().get_pif_record(defaultroute_pif)['device'], __pifrec['device']))
 
     # If no pif is explicitly specified then use the mgmt pif for
     # peerdns/defaultroute.
@@ -1131,356 +378,67 @@ def ipdev_configure_network(pif):
     if defaultroute_pif == None:
         defaultroute_pif = management_pif
 
-    # Update all the other network's ifcfg files and ensure
-    # consistency.
-    for __pif in other_pifs_on_host:
-        __f = ipdev_open_ifcfg(__pif)
-        peerdns_line_wanted = 'PEERDNS=%s\n' % ((__pif == peerdns_pif) and 'yes' or 'no')
-        lines =  __f.readlines()
-
-        if not peerdns_line_wanted in lines:
-            # the PIF selected for DNS has changed and as a result this ifcfg file needs rewriting
-            for line in lines:
-                if not line.lstrip().startswith('PEERDNS'):
-                    __f.write(line)
-            log("Setting %s in %s" % (peerdns_line_wanted.strip(), __f.path()))
-            __f.write(peerdns_line_wanted)
-            __f.close()
-            f.attach_child(__f)
-
-        else:
-            # There is no need to change this ifcfg file.  So don't attach_child.
-            pass
-
-    # ... and for this pif too
-    f.write('PEERDNS=%s\n' % ((pif == peerdns_pif) and 'yes' or 'no'))
-
-    # Update gatewaydev
-    fnetwork = ConfigurationFile("network", "/etc/sysconfig")
-    for line in fnetwork.readlines():
-        if line.lstrip().startswith('GATEWAY') :
-            continue
-        fnetwork.write(line)
-    if defaultroute_pif:
-        gatewaydev = pif_ipdev_name(defaultroute_pif)
-        if not gatewaydev:
-            gatewaydev = pif_netdev_name(defaultroute_pif)
-        fnetwork.write('GATEWAYDEV=%s\n' % gatewaydev)
-    fnetwork.close()
-    f.attach_child(fnetwork)
-
-    return f
-
-#
-# Datapath Configuration
-#
-
-def pif_datapath(pif):
-    """Return the OpenFlow datapath name associated with pif.
-For a non-VLAN PIF, the datapath name is the bridge name.
-For a VLAN PIF, the datapath name is the bridge name for the PIF's VLAN slave.
-"""
-    if pif_is_vlan(pif):
-        return pif_datapath(pif_get_vlan_slave(pif))
-    
-    pifrec = db.get_pif_record(pif)
-    nwrec = db.get_network_record(pifrec['network'])
-    if not nwrec['bridge']:
-        raise Error("datapath PIF cannot be bridgeless")
-    else:
-        return pif
-
-def datapath_get_physical_pifs(pif):
-    """Return the PIFs for the physical network device(s) associated with a datapath PIF.
-For a bond master PIF, these are the bond slave PIFs.
-For a non-VLAN, non-bond master PIF, the PIF is its own physical device PIF.
-
-A VLAN PIF cannot be a datapath PIF.
-"""
-    pifrec = db.get_pif_record(pif)
-
-    if pif_is_vlan(pif):
-        raise Error("get-physical-pifs should not get passed a VLAN")
-    elif len(pifrec['bond_master_of']) != 0:
-        return pif_get_bond_slaves(pif)
-    else:
-        return [pif]
-
-def datapath_deconfigure_physical(netdev):
-    return ['--', '--if-exists', 'del-port', netdev]
-
-def datapath_configure_bond(pif,slaves):
-    bridge = pif_bridge_name(pif)
-    pifrec = db.get_pif_record(pif)
-    interface = pif_netdev_name(pif)
-
-    argv = ['--', '--fake-iface', 'add-bond', bridge, interface]
-    for slave in slaves:
-        argv += [pif_netdev_name(slave)]
-
-    # XXX need ovs-vsctl support
-    #if pifrec['MAC'] != "":
-    #    argv += ['--add=port.%s.mac=%s' % (interface, pifrec['MAC'])]
-
-    # Bonding options.
-    bond_options = {
-        "mode":   "balance-slb",
-        "miimon": "100",
-        "downdelay": "200",
-        "updelay": "31000",
-        "use_carrier": "1",
-        }
-    # override defaults with values from other-config whose keys
-    # being with "bond-"
-    oc = pifrec['other_config']
-    overrides = filter(lambda (key,val):
-                           key.startswith("bond-"), oc.items())
-    overrides = map(lambda (key,val): (key[5:], val), overrides)
-    bond_options.update(overrides)
-    for (name,val) in bond_options.items():
-        # XXX need ovs-vsctl support for bond options
-        #argv += ["--add=bonding.%s.%s=%s" % (interface, name, val)]
-        pass
-    return argv
-
-def datapath_deconfigure_bond(netdev):
-    return ['--', '--if-exists', 'del-port', netdev]
+    is_dnsdev = peerdns_pif == pif
+    is_gatewaydev = defaultroute_pif == pif
+
+    if is_dnsdev or is_gatewaydev:
+        fnetwork = ConfigurationFile("/etc/sysconfig/network")
+        for line in fnetwork.readlines():
+            if is_dnsdev and line.lstrip().startswith('DNSDEV='):
+                fnetwork.write('DNSDEV=%s\n' % ipdev)
+                is_dnsdev = False
+            elif is_gatewaydev and line.lstrip().startswith('GATEWAYDEV='):
+                fnetwork.write('GATEWAYDEV=%s\n' % ipdev)
+                is_gatewaydev = False
+            else:
+                fnetwork.write(line)
 
-def datapath_deconfigure_ipdev(interface):
-    return ['--', '--if-exists', 'del-port', interface]
+        if is_dnsdev:
+            fnetwork.write('DNSDEV=%s\n' % ipdev)
+        if is_gatewaydev:
+            fnetwork.write('GATEWAYDEV=%s\n' % ipdev)
 
-def datapath_modify_config(commands):
-    if debug_mode():
-        log("modifying configuration:")
-        for c in commands:
-            log("  %s" % c)
+        fnetwork.close()
+        f.attach_child(fnetwork)
 
-    rc = run_command(['/usr/bin/ovs-vsctl'] + ['--timeout=20']
-                     + [c for c in commands if not c.startswith('#')])
-    if not rc:
-        raise Error("Failed to modify vswitch configuration")
-    return True
+    return f
 
 #
-# Toplevel Datapath Configuration.
+# Toplevel actions
 #
 
-def configure_datapath(pif):
-    """Bring up the datapath configuration for PIF.
-
-    Should be careful not to glitch existing users of the datapath, e.g. other VLANs etc.
-
-    Should take care of tearing down other PIFs which encompass common physical devices.
-
-    Returns a tuple containing
-    - A list containing the necessary cfgmod command line arguments
-    - A list of additional devices which should be brought up after
-      the configuration is applied.    
-    """
-
-    cfgmod_argv = []
-    extra_up_ports = []
-
-    bridge = pif_bridge_name(pif)
+def action_up(pif, force):
+    pifrec = db().get_pif_record(pif)
 
-    physical_devices = datapath_get_physical_pifs(pif)
-
-    # Determine additional devices to deconfigure.
-    #
-    # Given all physical devices which are part of this PIF we need to
-    # consider:
-    # - any additional bond which a physical device is part of.
-    # - any additional physical devices which are part of an additional bond.
-    #
-    # Any of these which are not currently in use should be brought
-    # down and deconfigured.
-    extra_down_bonds = []
-    extra_down_ports = []
-    for p in physical_devices:
-        for bond in pif_get_bond_masters(p):
-            if bond == pif:
-                log("configure_datapath: leaving bond %s up" % pif_netdev_name(bond))
-                continue
-            if bond in extra_down_bonds:
-                continue
-            if db.get_pif_record(bond)['currently_attached']:
-                log("configure_datapath: implicitly tearing down currently-attached bond %s" % pif_netdev_name(bond))
-
-            extra_down_bonds += [bond]
-
-            for s in pif_get_bond_slaves(bond):
-                if s in physical_devices:
-                    continue
-                if s in extra_down_ports:
-                    continue
-                if pif_currently_in_use(s):
-                    continue
-                extra_down_ports += [s]
-
-    log("configure_datapath: bridge      - %s" % bridge)
-    log("configure_datapath: physical    - %s" % [pif_netdev_name(p) for p in physical_devices])
-    log("configure_datapath: extra ports - %s" % [pif_netdev_name(p) for p in extra_down_ports])
-    log("configure_datapath: extra bonds - %s" % [pif_netdev_name(p) for p in extra_down_bonds])
-
-    # Need to fully deconfigure any bridge which any of the:
-    # - physical devices
-    # - bond devices
-    # - sibling devices
-    # refers to
-    for brpif in physical_devices + extra_down_ports + extra_down_bonds:
-        if brpif == pif:
-            continue
-        b = pif_bridge_name(brpif)
-        ifdown(b)
-        cfgmod_argv += ['# remove bridge %s' % b]
-        cfgmod_argv += ['--', '--if-exists', 'del-br', b]
-
-    for n in extra_down_ports:
-        dev = pif_netdev_name(n)
-        cfgmod_argv += ['# deconfigure sibling physical device %s' % dev]
-        cfgmod_argv += datapath_deconfigure_physical(dev)
-        netdev_down(dev)
-
-    for n in extra_down_bonds:
-        dev = pif_netdev_name(n)
-        cfgmod_argv += ['# deconfigure bond device %s' % dev]
-        cfgmod_argv += datapath_deconfigure_bond(dev)
-        netdev_down(dev)
-
-    for p in physical_devices:
-        dev = pif_netdev_name(p)
-        cfgmod_argv += ['# deconfigure physical port %s' % dev]
-        cfgmod_argv += datapath_deconfigure_physical(dev)
-
-    # Check the MAC address of each network device and remap if
-    # necessary to make names match our expectations.
-    for p in physical_devices:
-        netdev_remap_name(p)
-
-    # Bring up physical devices early, because ovs-vswitchd initially
-    # enables or disables bond slaves based on whether carrier is
-    # detected when they are added, and a network device that is down
-    # always reports "no carrier".
-    for p in physical_devices:
-        oc = db.get_pif_record(p)['other_config']
-
-        dev = pif_netdev_name(p)
-
-        mtu = mtu_setting(oc)
-
-        netdev_up(dev, mtu)
-        
-        settings, offload = ethtool_settings(oc)
-        if len(settings):
-            run_command(['/sbin/ethtool', '-s', dev] + settings)
-        if len(offload):
-            run_command(['/sbin/ethtool', '-K', dev] + offload)
-
-    # XXX It seems like the following should not be necessary...
-    cfgmod_argv += ['--', '--if-exists', 'del-br', bridge]
-                    
-    if pif_is_vlan(pif):
-        datapath = pif_datapath(pif)
-        vlan = db.get_pif_record(pif)['VLAN']
-        cfgmod_argv += ['--', 'add-br', bridge, datapath, vlan]
-    else:
-        cfgmod_argv += ['--', 'add-br', bridge]
-
-    if len(physical_devices) > 1:
-        cfgmod_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)]
-        cfgmod_argv += datapath_deconfigure_bond(pif_netdev_name(pif))
-        cfgmod_argv += ['# configure bond %s' % pif_netdev_name(pif)]
-        cfgmod_argv += datapath_configure_bond(pif, physical_devices)
-        extra_up_ports += [pif_netdev_name(pif)]
-    else:
-        iface = pif_netdev_name(physical_devices[0])
-        cfgmod_argv += ['# add physical device %s' % iface]
-        cfgmod_argv += ['--', 'add-port', bridge, iface]
-
-    return cfgmod_argv,extra_up_ports
-
-def deconfigure_datapath(pif):
-    cfgmod_argv = []
-
-    bridge = pif_bridge_name(pif)
-
-    physical_devices = datapath_get_physical_pifs(pif)
-
-    log("deconfigure_datapath: bridge           - %s" % bridge)
-    log("deconfigure_datapath: physical devices - %s" % [pif_netdev_name(p) for p in physical_devices])
+    ipdev = pif_ipdev_name(pif)
+    dp = DatapathFactory(pif)
 
-    for p in physical_devices:
-        dev = pif_netdev_name(p)
-        cfgmod_argv += ['# deconfigure physical port %s' % dev]
-        cfgmod_argv += datapath_deconfigure_physical(dev)
-        netdev_down(dev)
+    log("action_up: %s" % ipdev)
 
-    if len(physical_devices) > 1:
-        cfgmod_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)]
-        cfgmod_argv += datapath_deconfigure_bond(pif_netdev_name(pif))
+    f = ipdev_configure_network(pif, dp)
 
-    cfgmod_argv += ['# deconfigure bridge %s' % bridge]
-    cfgmod_argv += ['--', '--if-exists', 'del-br', bridge]
-    
-    return cfgmod_argv
+    dp.preconfigure(f)
 
-#
-# Toplevel actions
-#
-
-def action_up(pif):
-    pifrec = db.get_pif_record(pif)
-    cfgmod_argv = []
-    extra_ports = []
+    f.close()
 
-    ipdev = pif_ipdev_name(pif)
-    dp = pif_datapath(pif)
-    bridge = pif_bridge_name(dp)
+    pif_rename_physical_devices(pif)
 
-    log("action_up: %s on bridge %s" % (ipdev, bridge))
-    
-    ifdown(ipdev)
+    # if we are not forcing the interface up then attempt to tear down
+    # any existing devices which might interfere with brinign this one
+    # up.
+    if not force:
+        ifdown(ipdev)
 
-    if dp:
-        c,e = configure_datapath(dp)
-        cfgmod_argv += c
-        extra_ports += e
-
-        xs_network_uuids = []
-        for nwpif in db.get_pifs_by_device(db.get_pif_record(pif)['device']):
-            rec = db.get_pif_record(nwpif)
-            
-            # When state is read from dbcache PIF.currently_attached
-            # is always assumed to be false... Err on the side of
-            # listing even detached networks for the time being.
-            #if nwpif != pif and not rec['currently_attached']:
-            #    log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid']))
-            #    continue
-            nwrec = db.get_network_record(rec['network'])
-            xs_network_uuids += [nwrec['uuid']]
-        cfgmod_argv += ['# configure xs-network-uuids']
-        cfgmod_argv += ['--', 'br-set-external-id', bridge,
-                        'xs-network-uuids', ';'.join(xs_network_uuids)]
-
-        if ipdev != bridge:
-            cfgmod_argv += ["# deconfigure ipdev %s" % ipdev]
-            cfgmod_argv += datapath_deconfigure_ipdev(ipdev)
-            cfgmod_argv += ["# reconfigure ipdev %s" % ipdev]
-            cfgmod_argv += ['--', 'add-port', bridge, ipdev]
-
-    f = ipdev_configure_network(pif)
-    f.close()
+        dp.bring_down_existing()
 
-    # Apply updated configuration.
     try:
         f.apply()
 
-        datapath_modify_config(cfgmod_argv)
+        dp.configure()
 
         ifup(ipdev)
 
-        for p in extra_ports:
-            netdev_up(p)
+        dp.post()
 
         # Update /etc/issue (which contains the IP address of the management interface)
         os.system("/sbin/update-issue")
@@ -1492,72 +450,106 @@ def action_up(pif):
         raise
 
 def action_down(pif):
-    pifrec = db.get_pif_record(pif)
-    cfgmod_argv = []
-
     ipdev = pif_ipdev_name(pif)
-    dp = pif_datapath(pif)
-    bridge = pif_bridge_name(dp)
-    
-    log("action_down: %s on bridge %s" % (ipdev, bridge))
+    dp = DatapathFactory(pif)
 
-    ifdown(ipdev)
-
-    if dp:
-        nw = db.get_pif_record(pif)['network']
-        nwrec = db.get_network_record(nw)
+    log("action_down: %s" % ipdev)
 
-        log("deconfigure ipdev %s on %s" % (ipdev,bridge))
-        cfgmod_argv += ["# deconfigure ipdev %s" % ipdev]
-        cfgmod_argv += datapath_deconfigure_ipdev(ipdev)
+    ifdown(ipdev)
 
-    f = ipdev_open_ifcfg(pif)
-    f.unlink()
+    dp.bring_down()
 
-    if pif_is_vlan(pif):
-        br = ConfigurationFile("br-%s" % bridge, vswitch_state_dir)
-        br.unlink()
-        f.attach_child(br)
-
-        # If the VLAN's slave is attached, leave datapath setup.
-        slave = pif_get_vlan_slave(pif)
-        if db.get_pif_record(slave)['currently_attached']:
-            log("action_down: vlan slave is currently attached")
-            dp = None
-
-        # If the VLAN's slave has other VLANs that are attached, leave datapath setup.
-        for master in pif_get_vlan_masters(slave):
-            if master != pif and db.get_pif_record(master)['currently_attached']:
-                log("action_down: vlan slave has other master: %s" % pif_netdev_name(master))
-                dp = None
-
-        # Otherwise, take down the datapath too (fall through)
-        if dp:
-            log("action_down: no more masters, bring down slave %s" % bridge)
-    else:
-        # Stop here if this PIF has attached VLAN masters.
-        masters = [db.get_pif_record(m)['VLAN'] for m in pif_get_vlan_masters(pif) if db.get_pif_record(m)['currently_attached']]
-        if len(masters) > 0:
-            log("Leaving datapath %s up due to currently attached VLAN masters %s" % (bridge, masters))
-            dp = None
+# This is useful for reconfiguring the mgmt interface after having lost connectivity to the pool master
+def action_force_rewrite(bridge, config):
+    def getUUID():
+        import subprocess
+        uuid,_ = subprocess.Popen(['uuidgen'], stdout = subprocess.PIPE).communicate()
+        return uuid.strip()
 
-    if dp:
-        cfgmod_argv += deconfigure_datapath(dp)
+    # Notes:
+    # 1. that this assumes the interface is bridged
+    # 2. If --gateway is given it will make that the default gateway for the host
 
+    # extract the configuration
     try:
-        f.apply()
+        mode = config['mode']
+        mac = config['mac']
+        interface = config['device']
+    except:
+        raise Usage("Please supply --mode, --mac and --device")
 
-        datapath_modify_config(cfgmod_argv)
+    if mode == 'static':
+        try:
+            netmask = config['netmask']
+            ip = config['ip']
+        except:
+            raise Usage("Please supply --netmask and --ip")
+        try:
+            gateway = config['gateway']
+        except:
+            gateway = None
+    elif mode != 'dhcp':
+        raise Usage("--mode must be either static or dhcp")
 
-        f.commit()
-    except Error, e:
-        log("action_down failed to apply changes: %s" % e.msg)
-        f.revert()
-        raise
+    if config.has_key('vlan'):
+        is_vlan = True
+        vlan_slave, vlan_vid = config['vlan'].split('.')
+    else:
+        is_vlan = False
+
+    if is_vlan:
+        raise Error("Force rewrite of VLAN not implemented")
+
+    log("Configuring %s using %s configuration" % (bridge, mode))
+
+    f = ConfigurationFile(dbcache_file)
+
+    pif_uuid = getUUID()
+    network_uuid = getUUID()
+
+    f.write('<?xml version="1.0" ?>\n')
+    f.write('<xenserver-network-configuration>\n')
+    f.write('\t<pif ref="OpaqueRef:%s">\n' % pif_uuid)
+    f.write('\t\t<network>OpaqueRef:%s</network>\n' % network_uuid)
+    f.write('\t\t<management>True</management>\n')
+    f.write('\t\t<uuid>%sPif</uuid>\n' % interface)
+    f.write('\t\t<bond_slave_of>OpaqueRef:NULL</bond_slave_of>\n')
+    f.write('\t\t<bond_master_of/>\n')
+    f.write('\t\t<VLAN_slave_of/>\n')
+    f.write('\t\t<VLAN_master_of>OpaqueRef:NULL</VLAN_master_of>\n')
+    f.write('\t\t<VLAN>-1</VLAN>\n')
+    f.write('\t\t<device>%s</device>\n' % interface)
+    f.write('\t\t<MAC>%s</MAC>\n' % mac)
+    f.write('\t\t<other_config/>\n')
+    if mode == 'dhcp':
+        f.write('\t\t<ip_configuration_mode>DHCP</ip_configuration_mode>\n')
+        f.write('\t\t<IP></IP>\n')
+        f.write('\t\t<netmask></netmask>\n')
+        f.write('\t\t<gateway></gateway>\n')
+        f.write('\t\t<DNS></DNS>\n')
+    elif mode == 'static':
+        f.write('\t\t<ip_configuration_mode>Static</ip_configuration_mode>\n')
+        f.write('\t\t<IP>%s</IP>\n' % ip)
+        f.write('\t\t<netmask>%s</netmask>\n' % netmask)
+        if gateway is not None:
+            f.write('\t\t<gateway>%s</gateway>\n' % gateway)
+        f.write('\t\t<DNS></DNS>\n')
+    else:
+        raise Error("Unknown mode %s" % mode)
+    f.write('\t</pif>\n')
+
+    f.write('\t<network ref="OpaqueRef:%s">\n' % network_uuid)
+    f.write('\t\t<uuid>InitialManagementNetwork</uuid>\n')
+    f.write('\t\t<PIFs>\n')
+    f.write('\t\t\t<PIF>OpaqueRef:%s</PIF>\n' % pif_uuid)
+    f.write('\t\t</PIFs>\n')
+    f.write('\t\t<bridge>%s</bridge>\n' % bridge)
+    f.write('\t\t<other_config/>\n')
+    f.write('\t</network>\n')
+    f.write('</xenserver-network-configuration>\n')
 
-def action_rewrite(pif):
-    f = ipdev_configure_network(pif)
     f.close()
+
     try:
         f.apply()
         f.commit()
@@ -1566,11 +558,8 @@ def action_rewrite(pif):
         f.revert()
         raise
 
-def action_force_rewrite(bridge, config):
-    raise Error("Force rewrite is not implemented yet.")
-
 def main(argv=None):
-    global output_directory, management_pif
+    global management_pif
 
     session = None
     pif_uuid = None
@@ -1585,13 +574,12 @@ def main(argv=None):
     try:
         try:
             shortops = "h"
-            longops = [ "output-directory=",
-                        "pif=", "pif-uuid=",
+            longops = [ "pif=", "pif-uuid=",
                         "session=",
                         "force=",
                         "force-interface=",
                         "management",
-                        "device=", "mode=", "ip=", "netmask=", "gateway=",
+                        "mac=", "device=", "mode=", "ip=", "netmask=", "gateway=",
                         "help" ]
             arglist, args = getopt.gnu_getopt(argv[1:], shortops, longops)
         except getopt.GetoptError, msg:
@@ -1600,9 +588,7 @@ def main(argv=None):
         force_rewrite_config = {}
 
         for o,a in arglist:
-            if o == "--output-directory":
-                output_directory = a
-            elif o == "--pif":
+            if o == "--pif":
                 pif = a
             elif o == "--pif-uuid":
                 pif_uuid = a
@@ -1612,15 +598,15 @@ def main(argv=None):
                 force_interface = a
             elif o == "--management":
                 force_management = True
-            elif o in ["--device", "--mode", "--ip", "--netmask", "--gateway"]:
+            elif o in ["--mac", "--device", "--mode", "--ip", "--netmask", "--gateway"]:
                 force_rewrite_config[o[2:]] = a
             elif o == "-h" or o == "--help":
                 print __doc__ % {'command-name': os.path.basename(argv[0])}
                 return 0
 
-        if not debug_mode():
-            syslog.openlog(os.path.basename(argv[0]))
-            log("Called as " + str.join(" ", argv))
+        syslog.openlog(os.path.basename(argv[0]))
+        log("Called as " + str.join(" ", argv))
+
         if len(args) < 1:
             raise Usage("Required option <action> not present")
         if len(args) > 1:
@@ -1634,19 +620,17 @@ def main(argv=None):
         # backwards compatibility
         if action == "rewrite-configuration": action = "rewrite"
 
-        if output_directory and ( session or pif ):
-            raise Usage("--session/--pif cannot be used with --output-directory")
         if ( session or pif ) and pif_uuid:
             raise Usage("--session/--pif and --pif-uuid are mutually exclusive.")
         if ( session and not pif ) or ( not session and pif ):
             raise Usage("--session and --pif must be used together.")
         if force_interface and ( session or pif or pif_uuid ):
             raise Usage("--force is mutually exclusive with --session, --pif and --pif-uuid")
-        if force_interface == "all" and action != "down":
-            raise Usage("\"--force all\" only valid for down action")
         if len(force_rewrite_config) and not (force_interface and action == "rewrite"):
             raise Usage("\"--force rewrite\" needed for --device, --mode, --ip, --netmask, and --gateway")
-
+        if (action == "rewrite") and (pif or pif_uuid ):
+            raise Usage("rewrite action does not take --pif or --pif-uuid")
+        
         global db
         if force_interface:
             log("Force interface %s %s" % (force_interface, action))
@@ -1654,26 +638,23 @@ def main(argv=None):
             if action == "rewrite":
                 action_force_rewrite(force_interface, force_rewrite_config)
             elif action in ["up", "down"]:
-                if action == "down" and force_interface == "all":
-                    raise Error("Force all interfaces down not implemented yet")
-
-                db = DatabaseCache(cache_file=dbcache_file)
-                pif = db.get_pif_by_bridge(force_interface)
-                management_pif = db.get_management_pif()
+                db_init_from_cache(dbcache_file)
+                pif = db().get_pif_by_bridge(force_interface)
+                management_pif = db().get_management_pif()
 
                 if action == "up":
-                    action_up(pif)
+                    action_up(pif, True)
                 elif action == "down":
                     action_down(pif)
             else:
                 raise Error("Unknown action %s"  % action)
         else:
-            db = DatabaseCache(session_ref=session)
+            db_init_from_xenapi(session)
 
             if pif_uuid:
-                pif = db.get_pif_by_uuid(pif_uuid)
+                pif = db().get_pif_by_uuid(pif_uuid)
 
-            if action == "rewrite" and not pif:
+            if action == "rewrite":
                 pass
             else:
                 if not pif:
@@ -1685,8 +666,8 @@ def main(argv=None):
                 else:
                     # pif is not going to be the management pif.
                     # Search DB cache for pif on same host with management=true
-                    pifrec = db.get_pif_record(pif)
-                    management_pif = db.get_management_pif()
+                    pifrec = db().get_pif_record(pif)
+                    management_pif = db().get_management_pif()
 
                 log_pif_action(action, pif)
 
@@ -1694,16 +675,14 @@ def main(argv=None):
                     return 0
 
                 if action == "up":
-                    action_up(pif)
+                    action_up(pif, False)
                 elif action == "down":
                     action_down(pif)
-                elif action == "rewrite":
-                    action_rewrite(pif)
                 else:
                     raise Error("Unknown action %s"  % action)
 
             # Save cache.
-            db.save(dbcache_file)
+            db().save(dbcache_file)
 
     except Usage, err:
         print >>sys.stderr, err.msg
@@ -1725,7 +704,6 @@ if __name__ == "__main__":
         for exline in err:
             log(exline)
 
-    if not debug_mode():
-        syslog.closelog()
+    syslog.closelog()
 
     sys.exit(rc)
index 786ff2f..ae96c2a 100755 (executable)
@@ -121,7 +121,7 @@ VAR_LOG_DIR = '/var/log/'
 VNCTERM_CORE_DIR = '/var/xen/vncterm'
 VSWITCH_CORE_DIR = '/var/xen/vswitch'
 OVS_VSWITCH_CONF = '/etc/ovs-vswitchd.conf'
-OVS_VSWITCH_DBCACHE = '/var/lib/openvswitch/dbcache'
+OVS_VSWITCH_DBCACHE = '/var/xapi/network.dbcache'
 XENSOURCE_INVENTORY = '/etc/xensource-inventory'
 OEM_CONFIG_DIR = '/var/xsconfig'
 OEM_CONFIG_FILES_RE = re.compile(r'^.*xensource-inventory$')
index f67a1bf..41f709d 100644 (file)
@@ -71,6 +71,12 @@ install -m 644 vswitchd/vswitch.ovsschema \
          $RPM_BUILD_ROOT/usr/share/vswitch/vswitch.ovsschema
 install -m 755 xenserver/opt_xensource_libexec_interface-reconfigure \
              $RPM_BUILD_ROOT/usr/share/vswitch/scripts/interface-reconfigure
+install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigure.py \
+             $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigure.py
+install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py \
+             $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigureBridge.py
+install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \
+             $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigureVswitch.py
 install -m 755 xenserver/etc_xensource_scripts_vif \
              $RPM_BUILD_ROOT/usr/share/vswitch/scripts/vif
 install -m 755 xenserver/usr_share_vswitch_scripts_dump-vif-details \
@@ -157,7 +163,7 @@ EOF
     fi
 fi
 
-if test ! -e /var/lib/openvswitch/dbcache; then
+if test ! -e /var/xapi/network.dbcache; then
     if test "$1" = 1; then
         printf "Creating xapi database cache...  "
     else
@@ -226,13 +232,16 @@ mkdir -p /usr/lib/vswitch/xs-original \
     || printf "Could not create script backup directory.\n"
 for f in \
     /opt/xensource/libexec/interface-reconfigure \
+    /opt/xensource/libexec/InterfaceReconfigure.py \
+    /opt/xensource/libexec/InterfaceReconfigureBridge.py \
+    /opt/xensource/libexec/InterfaceReconfigureVswitch.py \
     /etc/xensource/scripts/vif \
     /usr/sbin/xen-bugtool \
     /usr/sbin/brctl
 do
     s=$(basename "$f")
     t=$(readlink "$f")
-    if [ "$t" != "/usr/share/vswitch/scripts/$s" ]; then
+    if [ -f "$f" ] && [ "$t" != "/usr/share/vswitch/scripts/$s" ]; then
         mv "$f" /usr/lib/vswitch/xs-original/ \
             || printf "Could not save original XenServer $s script\n"
         ln -s "/usr/share/vswitch/scripts/$s" "$f" \
@@ -256,6 +265,9 @@ for s in vswitch vswitch-xapi-update; do
     chkconfig $s on || printf "Could not enable $s init script."
 done
 
+# Configure system to use vswitch
+echo vswitch > /etc/xensource/network.conf
+
 if [ "$1" = "1" ]; then    # $1 = 2 for upgrade
     printf "\nYou MUST reboot the server NOW to complete the change to the\n"
     printf "the vSwitch.  Attempts to modify networking on the server\n"
@@ -304,7 +316,10 @@ if [ "$1" = "0" ]; then     # $1 = 1 for upgrade
     rm -f /etc/ovs-vswitchd.conf.db
     rm -f /etc/sysconfig/vswitch
     rm -f /etc/ovs-vswitchd.cacert
-    rm -f /var/lib/openvswitch/dbcache
+    rm -f /var/xapi/network.dbcache
+
+    # Configure system to use bridge
+    echo bridge > /etc/xensource/network.conf
 
     printf "\nYou MUST reboot the server now to complete the change to\n"
     printf "standard Xen networking.  Attempts to modify networking on the\n"
@@ -328,6 +343,15 @@ fi
 /usr/share/vswitch/scripts/dump-vif-details
 /usr/share/vswitch/scripts/refresh-xs-network-uuids
 /usr/share/vswitch/scripts/interface-reconfigure
+/usr/share/vswitch/scripts/InterfaceReconfigure.py
+/usr/share/vswitch/scripts/InterfaceReconfigure.pyc
+/usr/share/vswitch/scripts/InterfaceReconfigure.pyo
+/usr/share/vswitch/scripts/InterfaceReconfigureBridge.py
+/usr/share/vswitch/scripts/InterfaceReconfigureBridge.pyc
+/usr/share/vswitch/scripts/InterfaceReconfigureBridge.pyo
+/usr/share/vswitch/scripts/InterfaceReconfigureVswitch.py
+/usr/share/vswitch/scripts/InterfaceReconfigureVswitch.pyc
+/usr/share/vswitch/scripts/InterfaceReconfigureVswitch.pyo
 /usr/share/vswitch/scripts/vif
 /usr/share/vswitch/scripts/xen-bugtool
 /usr/share/vswitch/scripts/XSFeatureVSwitch.py