Merge citrix branch into master.

author Ben Pfaff <blp@nicira.com>

Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)

committer Ben Pfaff <blp@nicira.com>

Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)
author Ben Pfaff <blp@nicira.com>
Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)
committer Ben Pfaff <blp@nicira.com>
Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)
diff --git a/ChangeLog b/ChangeLog

new file mode 100644 (file)

index 0000000..f2f56a3
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,4 @@
+v0.90.5 - 21 Sep 2009
+---------------------
+    - Generalize in-band control to more diverse network setups
+    - Bug fixes
diff --git a/configure.ac b/configure.ac

index d5b5938..400ea78 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
  # limitations under the License.
  
  AC_PREREQ(2.63)
-AC_INIT(openvswitch, 0.90.3, bugs@openvswitch.org)
+AC_INIT(openvswitch, 0.90.5, ovs-bugs@openvswitch.org)
  NX_BUILDNR
  AC_CONFIG_SRCDIR([datapath/datapath.c])
  AC_CONFIG_MACRO_DIR([m4])
diff --git a/datapath/datapath.h b/datapath/datapath.h

index ab9359e..d28250a 100644 (file)
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -25,7 +25,7 @@
   * then this should go into include/linux/if_vlan.h. */
  #define VLAN_PCP_MASK 0xe000
  
-#define DP_MAX_PORTS 256
+#define DP_MAX_PORTS 1024
  #define DP_MAX_GROUPS 16
  
  #define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*)))
diff --git a/lib/automake.mk b/lib/automake.mk

index e5dbfba..9ba513a 100644 (file)
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -67,6 +67,7 @@ lib_libopenvswitch_a_SOURCES = \
         lib/ofp-print.h \
         lib/ofpbuf.c \
         lib/ofpbuf.h \
+       lib/packets.c \
         lib/packets.h \
         lib/pcap.c \
         lib/pcap.h \
diff --git a/lib/mac-learning.h b/lib/mac-learning.h

index 6a2d30b..e2ee74b 100644 (file)
--- a/lib/mac-learning.h
+++ b/lib/mac-learning.h
@@ -26,7 +26,7 @@
  #define MAC_HASH_MASK (MAC_HASH_SIZE - 1)
  #define MAC_HASH_SIZE (1u << MAC_HASH_BITS)
  
-#define MAC_MAX 1024
+#define MAC_MAX 2048
  
  /* Time, in seconds, before expiring a mac_entry due to inactivity. */
  #define MAC_ENTRY_IDLE_TIME 60
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c

index 2faffa3..5abf6e1 100644 (file)
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1496,6 +1496,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
  
      if (!attrs[IFLA_STATS]) {
          VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats");
+        ofpbuf_delete(reply);
          return EPROTO;
      }
  
@@ -1522,6 +1523,8 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
      stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors;
      stats->tx_window_errors = rtnl_stats->tx_window_errors;
  
+    ofpbuf_delete(reply);
+
      return 0;
  }
  
diff --git a/lib/packets.c b/lib/packets.c

new file mode 100644 (file)

index 0000000..0547791
--- /dev/null
+++ b/lib/packets.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "packets.h"
+#include <netinet/in.h>
+#include "ofpbuf.h"
+
+/* Fills 'b' with an 802.2 SNAP packet with Ethernet source address 'eth_src',
+ * the Nicira OUI as SNAP organization and 'snap_type' as SNAP type.  The text
+ * string in 'tag' is enclosed as the packet payload.
+ *
+ * This function is used by Open vSwitch to compose packets in cases where
+ * context is important but content doesn't (or shouldn't) matter.  For this
+ * purpose, 'snap_type' should be a random number and 'tag' should be an
+ * English phrase that explains the purpose of the packet.  (The English phrase
+ * gives hapless admins running Wireshark the opportunity to figure out what's
+ * going on.) */
+void
+compose_benign_packet(struct ofpbuf *b, const char *tag, uint16_t snap_type,
+                      const uint8_t eth_src[ETH_ADDR_LEN])
+{
+    struct eth_header *eth;
+    struct llc_snap_header *llc_snap;
+
+    /* Compose basic packet structure.  (We need the payload size to stick into
+     * the 802.2 header.) */
+    ofpbuf_clear(b);
+    eth = ofpbuf_put_zeros(b, ETH_HEADER_LEN);
+    llc_snap = ofpbuf_put_zeros(b, LLC_SNAP_HEADER_LEN);
+    ofpbuf_put(b, tag, strlen(tag) + 1); /* Includes null byte. */
+    ofpbuf_put(b, eth_src, ETH_ADDR_LEN);
+
+    /* Compose 802.2 header. */
+    memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
+    memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+    eth->eth_type = htons(b->size - ETH_HEADER_LEN);
+
+    /* Compose LLC, SNAP headers. */
+    llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
+    llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
+    llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
+    memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
+    llc_snap->snap.snap_type = htons(snap_type);
+}
diff --git a/lib/packets.h b/lib/packets.h

index d12cc04..4595c12 100644 (file)
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -13,6 +13,7 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+
  #ifndef PACKETS_H
  #define PACKETS_H 1
  
@@ -23,6 +24,8 @@
  #include "random.h"
  #include "util.h"
  
+struct ofpbuf;
+
  #define ETH_ADDR_LEN           6
  
  static const uint8_t eth_addr_broadcast[ETH_ADDR_LEN] UNUSED
@@ -98,6 +101,10 @@ static inline bool eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN])
              && (ea[5] & 0xf0) == 0x00);
  }
  
+void compose_benign_packet(struct ofpbuf *, const char *tag,
+                           uint16_t snap_type,
+                           const uint8_t eth_src[ETH_ADDR_LEN]);
+
  /* Example:
   *
   * uint8_t mac[ETH_ADDR_LEN];
diff --git a/lib/rconn.c b/lib/rconn.c

index a27e432..2cbe43e 100644 (file)
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -499,7 +499,7 @@ rconn_recv(struct rconn *rc)
          int error = vconn_recv(rc->vconn, &buffer);
          if (!error) {
              copy_to_monitor(rc, buffer);
-            if (is_admitted_msg(buffer)
+            if (rc->probably_admitted || is_admitted_msg(buffer)
                  || time_now() - rc->last_connected >= 30) {
                  rc->probably_admitted = true;
                  rc->last_admitted = time_now();
@@ -637,15 +637,22 @@ rconn_is_connected(const struct rconn *rconn)
      return is_connected_state(rconn->state);
  }
  
-/* Returns 0 if 'rconn' is connected.  Otherwise, if 'rconn' is in a "failure
- * mode" (that is, it is not connected), returns the number of seconds that it
- * has been in failure mode, ignoring any times that it connected but the
- * controller's admission control policy caused it to be quickly
- * disconnected. */
+/* Returns true if 'rconn' is connected and thought to have been accepted by
+ * the peer's admission-control policy. */
+bool
+rconn_is_admitted(const struct rconn *rconn)
+{
+    return (rconn_is_connected(rconn)
+            && rconn->last_admitted >= rconn->last_connected);
+}
+
+/* Returns 0 if 'rconn' is currently connected and considered to have been
+ * accepted by the peer's admission-control policy, otherwise the number of
+ * seconds since 'rconn' was last in such a state. */
  int
  rconn_failure_duration(const struct rconn *rconn)
  {
-    return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted;
+    return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted;
  }
  
  /* Returns the IP address of the peer, or 0 if the peer's IP address is not
diff --git a/lib/rconn.h b/lib/rconn.h

index ed0780a..ef4e16c 100644 (file)
--- a/lib/rconn.h
+++ b/lib/rconn.h
@@ -69,6 +69,7 @@ void rconn_add_monitor(struct rconn *, struct vconn *);
  const char *rconn_get_name(const struct rconn *);
  bool rconn_is_alive(const struct rconn *);
  bool rconn_is_connected(const struct rconn *);
+bool rconn_is_admitted(const struct rconn *);
  int rconn_failure_duration(const struct rconn *);
  bool rconn_is_connectivity_questionable(struct rconn *);
  
diff --git a/lib/vconn.c b/lib/vconn.c

index 66a56bc..3cd2948 100644 (file)
--- a/lib/vconn.c
+++ b/lib/vconn.c
@@ -911,6 +911,28 @@ make_add_simple_flow(const flow_t *flow,
      return buffer;
  }
  
+struct ofpbuf *
+make_packet_in(uint32_t buffer_id, uint16_t in_port, uint8_t reason,
+               const struct ofpbuf *payload, int max_send_len)
+{
+    struct ofp_packet_in *opi;
+    struct ofpbuf *buf;
+    int send_len;
+
+    send_len = MIN(max_send_len, payload->size);
+    buf = ofpbuf_new(sizeof *opi + send_len);
+    opi = put_openflow_xid(offsetof(struct ofp_packet_in, data),
+                           OFPT_PACKET_IN, 0, buf);
+    opi->buffer_id = htonl(buffer_id);
+    opi->total_len = htons(payload->size);
+    opi->in_port = htons(in_port);
+    opi->reason = reason;
+    ofpbuf_put(buf, payload->data, send_len);
+    update_openflow_length(buf);
+
+    return buf;
+}
+
  struct ofpbuf *
  make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id,
                  uint16_t in_port,
diff --git a/lib/vconn.h b/lib/vconn.h

index 9e012bc..0c13744 100644 (file)
--- a/lib/vconn.h
+++ b/lib/vconn.h
@@ -85,6 +85,9 @@ struct ofpbuf *make_del_flow(const flow_t *);
  struct ofpbuf *make_add_simple_flow(const flow_t *,
                                      uint32_t buffer_id, uint16_t out_port,
                                      uint16_t max_idle);
+struct ofpbuf *make_packet_in(uint32_t buffer_id, uint16_t in_port,
+                              uint8_t reason,
+                              const struct ofpbuf *payload, int max_send_len);
  struct ofpbuf *make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id,
                                 uint16_t in_port,
                                 const struct ofp_action_header *,
diff --git a/ofproto/fail-open.c b/ofproto/fail-open.c

index 60890d4..48f7069 100644 (file)
--- a/ofproto/fail-open.c
+++ b/ofproto/fail-open.c
@@ -21,38 +21,102 @@
  #include "flow.h"
  #include "mac-learning.h"
  #include "odp-util.h"
+#include "ofpbuf.h"
  #include "ofproto.h"
+#include "pktbuf.h"
+#include "poll-loop.h"
  #include "rconn.h"
  #include "status.h"
  #include "timeval.h"
+#include "vconn.h"
  
  #define THIS_MODULE VLM_fail_open
  #include "vlog.h"
  
+/*
+ * Fail-open mode.
+ *
+ * In fail-open mode, the switch detects when the controller cannot be
+ * contacted or when the controller is dropping switch connections because the
+ * switch does not pass its admission control policy.  In those situations the
+ * switch sets up flows itself using the "normal" action.
+ *
+ * There is a little subtlety to implementation, to properly handle the case
+ * where the controller allows switch connections but drops them a few seconds
+ * later for admission control reasons.  Because of this case, we don't want to
+ * just stop setting up flows when we connect to the controller: if we did,
+ * then new flow setup and existing flows would stop during the duration of
+ * connection to the controller, and thus the whole network would go down for
+ * that period of time.
+ *
+ * So, instead, we add some special caseswhen we are connected to a controller,
+ * but not yet sure that it has admitted us:
+ *
+ *     - We set up flows immediately ourselves, but simultaneously send out an
+ *       OFPT_PACKET_IN to the controller.  We put a special bogus buffer-id in
+ *       these OFPT_PACKET_IN messages so that duplicate packets don't get sent
+ *       out to the network when the controller replies.
+ *
+ *     - We also send out OFPT_PACKET_IN messages for totally bogus packets
+ *       every so often, in case no real new flows are arriving in the network.
+ *
+ *     - We don't flush the flow table at the time we connect, because this
+ *       could cause network stuttering in a switch with lots of flows or very
+ *       high-bandwidth flows by suddenly throwing lots of packets down to
+ *       userspace.
+ */
+
  struct fail_open {
      struct ofproto *ofproto;
      struct rconn *controller;
      int trigger_duration;
      int last_disconn_secs;
      struct status_category *ss_cat;
+    long long int next_bogus_packet_in;
+    struct rconn_packet_counter *bogus_packet_counter;
  };
  
-/* Causes the switch to enter or leave fail-open mode, if appropriate. */
-void
-fail_open_run(struct fail_open *fo)
+/* Returns true if 'fo' should be in fail-open mode, otherwise false. */
+static inline bool
+should_fail_open(const struct fail_open *fo)
  {
-    int disconn_secs = rconn_failure_duration(fo->controller);
-    bool open = disconn_secs >= fo->trigger_duration;
-    if (open != (fo->last_disconn_secs != 0)) {
-        if (!open) {
-            flow_t flow;
+    return rconn_failure_duration(fo->controller) >= fo->trigger_duration;
+}
+
+/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */
+bool
+fail_open_is_active(const struct fail_open *fo)
+{
+    return fo->last_disconn_secs != 0;
+}
  
-            VLOG_WARN("No longer in fail-open mode");
-            fo->last_disconn_secs = 0;
+static void
+send_bogus_packet_in(struct fail_open *fo)
+{
+    uint8_t mac[ETH_ADDR_LEN];
+    struct ofpbuf *opi;
+    struct ofpbuf b;
  
-            memset(&flow, 0, sizeof flow);
-            ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000);
-        } else {
+    /* Compose ofp_packet_in. */
+    ofpbuf_init(&b, 128);
+    eth_addr_random(mac);
+    compose_benign_packet(&b, "Open vSwitch Controller Probe", 0xa033, mac);
+    opi = make_packet_in(pktbuf_get_null(), OFPP_LOCAL, OFPR_NO_MATCH, &b, 64);
+    ofpbuf_uninit(&b);
+
+    /* Send. */
+    rconn_send_with_limit(fo->controller, opi, fo->bogus_packet_counter, 1);
+}
+
+/* Enter fail-open mode if we should be in it.  Handle reconnecting to a
+ * controller from fail-open mode. */
+void
+fail_open_run(struct fail_open *fo)
+{
+    /* Enter fail-open mode if 'fo' is not in it but should be.  */
+    if (should_fail_open(fo)) {
+        int disconn_secs = rconn_failure_duration(fo->controller);
+        if (!fail_open_is_active(fo)) {
              VLOG_WARN("Could not connect to controller (or switch failed "
                        "controller's post-connection admission control "
                        "policy) for %d seconds, failing open", disconn_secs);
@@ -62,18 +126,53 @@ fail_open_run(struct fail_open *fo)
               * fail-open rule from fail_open_flushed() when
               * ofproto_flush_flows() calls back to us. */
              ofproto_flush_flows(fo->ofproto);
+        } else if (disconn_secs > fo->last_disconn_secs + 60) {
+            VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
+                      "from controller", disconn_secs);
+            fo->last_disconn_secs = disconn_secs;
          }
-    } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
-        VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
-                  "from controller", disconn_secs);
-        fo->last_disconn_secs = disconn_secs;
      }
+
+    /* Schedule a bogus packet-in if we're connected and in fail-open. */
+    if (fail_open_is_active(fo)) {
+        if (rconn_is_connected(fo->controller)) {
+            bool expired = time_msec() >= fo->next_bogus_packet_in;
+            if (expired) {
+                send_bogus_packet_in(fo);
+            }
+            if (expired || fo->next_bogus_packet_in == LLONG_MAX) {
+                fo->next_bogus_packet_in = time_msec() + 2000;
+            }
+        } else {
+            fo->next_bogus_packet_in = LLONG_MAX;
+        }
+    }
+
  }
  
+/* If 'fo' is currently in fail-open mode and its rconn has connected to the
+ * controller, exits fail open mode. */
  void
-fail_open_wait(struct fail_open *fo UNUSED)
+fail_open_maybe_recover(struct fail_open *fo)
  {
-    /* Nothing to do. */
+    if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) {
+        flow_t flow;
+
+        VLOG_WARN("No longer in fail-open mode");
+        fo->last_disconn_secs = 0;
+        fo->next_bogus_packet_in = LLONG_MAX;
+
+        memset(&flow, 0, sizeof flow);
+        ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY);
+    }
+}
+
+void
+fail_open_wait(struct fail_open *fo)
+{
+    if (fo->next_bogus_packet_in != LLONG_MAX) {
+        poll_timer_wait(fo->next_bogus_packet_in - time_msec());
+    }
  }
  
  void
@@ -92,7 +191,7 @@ fail_open_flushed(struct fail_open *fo)
          action.output.len = htons(sizeof action);
          action.output.port = htons(OFPP_NORMAL);
          memset(&flow, 0, sizeof flow);
-        ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000,
+        ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY,
                           &action, 1, 0);
      }
  }
@@ -121,6 +220,8 @@ fail_open_create(struct ofproto *ofproto,
      fo->last_disconn_secs = 0;
      fo->ss_cat = switch_status_register(switch_status, "fail-open",
                                          fail_open_status_cb, fo);
+    fo->next_bogus_packet_in = LLONG_MAX;
+    fo->bogus_packet_counter = rconn_packet_counter_create();
      return fo;
  }
  
@@ -136,6 +237,7 @@ fail_open_destroy(struct fail_open *fo)
      if (fo) {
          /* We don't own fo->controller. */
          switch_status_unregister(fo->ss_cat);
+        rconn_packet_counter_destroy(fo->bogus_packet_counter);
          free(fo);
      }
  }
diff --git a/ofproto/fail-open.h b/ofproto/fail-open.h

index c0ada2e..900d587 100644 (file)
--- a/ofproto/fail-open.h
+++ b/ofproto/fail-open.h
@@ -26,13 +26,21 @@ struct ofproto;
  struct rconn;
  struct switch_status;
  
+/* Priority of the rule added by the fail-open subsystem when a switch enters
+ * fail-open mode.  This priority value uniquely identifies a fail-open flow
+ * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
+ * creates flows with this priority). */
+#define FAIL_OPEN_PRIORITY 70000
+
  struct fail_open *fail_open_create(struct ofproto *, int trigger_duration,
                                     struct switch_status *,
                                     struct rconn *controller);
  void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration);
  void fail_open_destroy(struct fail_open *);
  void fail_open_wait(struct fail_open *);
+bool fail_open_is_active(const struct fail_open *);
  void fail_open_run(struct fail_open *);
+void fail_open_maybe_recover(struct fail_open *);
  void fail_open_flushed(struct fail_open *);
  
  #endif /* fail-open.h */
diff --git a/ofproto/in-band.c b/ofproto/in-band.c

index 35ea534..2b362bc 100644 (file)
--- a/ofproto/in-band.c
+++ b/ofproto/in-band.c
@@ -43,18 +43,168 @@
  #define THIS_MODULE VLM_in_band
  #include "vlog.h"
  
+/* In-band control allows a single network to be used for OpenFlow
+ * traffic and other data traffic.  Refer to ovs-vswitchd.conf(5) and 
+ * secchan(8) for a description of configuring in-band control.
+ *
+ * This comment is an attempt to describe how in-band control works at a
+ * wire- and implementation-level.  Correctly implementing in-band
+ * control has proven difficult due to its many subtleties, and has thus
+ * gone through many iterations.  Please read through and understand the
+ * reasoning behind the chosen rules before making modifications.
+ *
+ * In Open vSwitch, in-band control is implemented as "hidden" flows (in
+ * that they are not visible through OpenFlow) and at a higher priority
+ * than wildcarded flows can be setup by the controller.  This is done 
+ * so that the controller cannot interfere with them and possibly break 
+ * connectivity with its switches.  It is possible to see all flows, 
+ * including in-band ones, with the ovs-appctl "bridge/dump-flows" 
+ * command.
+ *
+ * The following rules are always enabled with the "normal" action by a 
+ * switch with in-band control:
+ *
+ *    a. DHCP requests sent from the local port.
+ *    b. ARP replies to the local port's MAC address.
+ *    c. ARP requests from the local port's MAC address.
+ *    d. ARP replies to the remote side's MAC address.  Note that the 
+ *       remote side is either the controller or the gateway to reach 
+ *       the controller.
+ *    e. ARP requests from the remote side's MAC address.  Note that
+ *       like (d), the MAC is either for the controller or gateway.
+ *    f. ARP replies containing the controller's IP address as a target.
+ *    g. ARP requests containing the controller's IP address as a source.
+ *    h. OpenFlow (6633/tcp) traffic to the controller's IP.
+ *    i. OpenFlow (6633/tcp) traffic from the controller's IP.
+ *
+ * The goal of these rules is to be as narrow as possible to allow a
+ * switch to join a network and be able to communicate with a
+ * controller.  As mentioned earlier, these rules have higher priority
+ * than the controller's rules, so if they are too broad, they may 
+ * prevent the controller from implementing its policy.  As such,
+ * in-band actively monitors some aspects of flow and packet processing
+ * so that the rules can be made more precise.
+ *
+ * In-band control monitors attempts to add flows into the datapath that
+ * could interfere with its duties.  The datapath only allows exact
+ * match entries, so in-band control is able to be very precise about
+ * the flows it prevents.  Flows that miss in the datapath are sent to
+ * userspace to be processed, so preventing these flows from being
+ * cached in the "fast path" does not affect correctness.  The only type 
+ * of flow that is currently prevented is one that would prevent DHCP 
+ * replies from being seen by the local port.  For example, a rule that 
+ * forwarded all DHCP traffic to the controller would not be allowed, 
+ * but one that forwarded to all ports (including the local port) would.
+ *
+ * As mentioned earlier, packets that miss in the datapath are sent to
+ * the userspace for processing.  The userspace has its own flow table,
+ * the "classifier", so in-band checks whether any special processing 
+ * is needed before the classifier is consulted.  If a packet is a DHCP 
+ * response to a request from the local port, the packet is forwarded to 
+ * the local port, regardless of the flow table.  Note that this requires 
+ * L7 processing of DHCP replies to determine whether the 'chaddr' field 
+ * matches the MAC address of the local port.
+ *
+ * It is interesting to note that for an L3-based in-band control
+ * mechanism, the majority of rules are devoted to ARP traffic.  At first 
+ * glance, some of these rules appear redundant.  However, each serves an 
+ * important role.  First, in order to determine the MAC address of the 
+ * remote side (controller or gateway) for other ARP rules, we must allow 
+ * ARP traffic for our local port with rules (b) and (c).  If we are 
+ * between a switch and its connection to the controller, we have to 
+ * allow the other switch's ARP traffic to through.  This is done with 
+ * rules (d) and (e), since we do not know the addresses of the other
+ * switches a priori, but do know the controller's or gateway's.  Finally, 
+ * if the controller is running in a local guest VM that is not reached 
+ * through the local port, the switch that is connected to the VM must 
+ * allow ARP traffic based on the controller's IP address, since it will 
+ * not know the MAC address of the local port that is sending the traffic 
+ * or the MAC address of the controller in the guest VM.
+ *
+ * With a few notable exceptions below, in-band should work in most
+ * network setups.  The following are considered "supported' in the
+ * current implementation: 
+ *
+ *    - Locally Connected.  The switch and controller are on the same
+ *      subnet.  This uses rules (a), (b), (c), (h), and (i).
+ *
+ *    - Reached through Gateway.  The switch and controller are on
+ *      different subnets and must go through a gateway.  This uses
+ *      rules (a), (b), (c), (h), and (i).
+ *
+ *    - Between Switch and Controller.  This switch is between another
+ *      switch and the controller, and we want to allow the other
+ *      switch's traffic through.  This uses rules (d), (e), (h), and
+ *      (i).  It uses (b) and (c) indirectly in order to know the MAC
+ *      address for rules (d) and (e).  Note that DHCP for the other
+ *      switch will not work unless the controller explicitly lets this 
+ *      switch pass the traffic.
+ *
+ *    - Between Switch and Gateway.  This switch is between another
+ *      switch and the gateway, and we want to allow the other switch's
+ *      traffic through.  This uses the same rules and logic as the
+ *      "Between Switch and Controller" configuration described earlier.
+ *
+ *    - Controller on Local VM.  The controller is a guest VM on the
+ *      system running in-band control.  This uses rules (a), (b), (c), 
+ *      (h), and (i).
+ *
+ *    - Controller on Local VM with Different Networks.  The controller
+ *      is a guest VM on the system running in-band control, but the
+ *      local port is not used to connect to the controller.  For
+ *      example, an IP address is configured on eth0 of the switch.  The
+ *      controller's VM is connected through eth1 of the switch, but an
+ *      IP address has not been configured for that port on the switch.
+ *      As such, the switch will use eth0 to connect to the controller,
+ *      and eth1's rules about the local port will not work.  In the
+ *      example, the switch attached to eth0 would use rules (a), (b), 
+ *      (c), (h), and (i) on eth0.  The switch attached to eth1 would use 
+ *      rules (f), (g), (h), and (i).
+ *
+ * The following are explicitly *not* supported by in-band control:
+ *
+ *    - Specify Controller by Name.  Currently, the controller must be 
+ *      identified by IP address.  A naive approach would be to permit
+ *      all DNS traffic.  Unfortunately, this would prevent the
+ *      controller from defining any policy over DNS.  Since switches
+ *      that are located behind us need to connect to the controller, 
+ *      in-band cannot simply add a rule that allows DNS traffic from
+ *      the local port.  The "correct" way to support this is to parse
+ *      DNS requests to allow all traffic related to a request for the
+ *      controller's name through.  Due to the potential security
+ *      problems and amount of processing, we decided to hold off for
+ *      the time-being.
+ *
+ *    - Multiple Controllers.  There is nothing intrinsic in the high-
+ *      level design that prevents using multiple (known) controllers, 
+ *      however, the current implementation's data structures assume
+ *      only one.
+ *
+ *    - Differing Controllers for Switches.  All switches must know
+ *      the L3 addresses for all the controllers that other switches 
+ *      may use, since rules need to be setup to allow traffic related 
+ *      to those controllers through.  See rules (f), (g), (h), and (i).
+ *
+ *    - Differing Routes for Switches.  In order for the switch to 
+ *      allow other switches to connect to a controller through a 
+ *      gateway, it allows the gateway's traffic through with rules (d)
+ *      and (e).  If the routes to the controller differ for the two
+ *      switches, we will not know the MAC address of the alternate 
+ *      gateway.
+ */
+
  #define IB_BASE_PRIORITY 18181800
  
  enum {
-    IBR_FROM_LOCAL_DHCP,          /* From local port, DHCP. */
-    IBR_TO_LOCAL_ARP,             /* To local port, ARP. */
-    IBR_FROM_LOCAL_ARP,           /* From local port, ARP. */
-    IBR_TO_REMOTE_ARP,            /* To remote MAC, ARP. */
-    IBR_FROM_REMOTE_ARP,          /* From remote MAC, ARP. */
-    IBR_TO_CTL_ARP,               /* To controller IP, ARP. */
-    IBR_FROM_CTL_ARP,             /* From controller IP, ARP. */
-    IBR_TO_CTL_OFP,               /* To controller, OpenFlow port. */
-    IBR_FROM_CTL_OFP,             /* From controller, OpenFlow port. */
+    IBR_FROM_LOCAL_DHCP,          /* (a) From local port, DHCP. */
+    IBR_TO_LOCAL_ARP,             /* (b) To local port, ARP. */
+    IBR_FROM_LOCAL_ARP,           /* (c) From local port, ARP. */
+    IBR_TO_REMOTE_ARP,            /* (d) To remote MAC, ARP. */
+    IBR_FROM_REMOTE_ARP,          /* (e) From remote MAC, ARP. */
+    IBR_TO_CTL_ARP,               /* (f) To controller IP, ARP. */
+    IBR_FROM_CTL_ARP,             /* (g) From controller IP, ARP. */
+    IBR_TO_CTL_OFP,               /* (h) To controller, OpenFlow port. */
+    IBR_FROM_CTL_OFP,             /* (i) From controller, OpenFlow port. */
  #if OFP_TCP_PORT != OFP_SSL_PORT
  #error Need to support separate TCP and SSL flows.
  #endif
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c

index 7650068..3703366 100644 (file)
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -811,9 +811,6 @@ ofproto_run1(struct ofproto *p)
              }
          }
      }
-    if (p->fail_open) {
-        fail_open_run(p->fail_open);
-    }
      pinsched_run(p->miss_sched, send_packet_in_miss, p);
      pinsched_run(p->action_sched, send_packet_in_action, p);
      if (p->executer) {
@@ -825,6 +822,12 @@ ofproto_run1(struct ofproto *p)
          ofconn_run(ofconn, p);
      }
  
+    /* Fail-open maintenance.  Do this after processing the ofconns since
+     * fail-open checks the status of the controller rconn. */
+    if (p->fail_open) {
+        fail_open_run(p->fail_open);
+    }
+
      for (i = 0; i < p->n_listeners; i++) {
          struct vconn *vconn;
          int retval;
@@ -1355,6 +1358,9 @@ ofconn_run(struct ofconn *ofconn, struct ofproto *p)
              if (!of_msg) {
                  break;
              }
+            if (p->fail_open) {
+                fail_open_maybe_recover(p->fail_open);
+            }
              handle_openflow(ofconn, p, of_msg);
              ofpbuf_delete(of_msg);
          }
@@ -2165,7 +2171,7 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn,
      if (opo->buffer_id != htonl(UINT32_MAX)) {
          error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
                                  &buffer, &in_port);
-        if (error) {
+        if (error || !buffer) {
              return error;
          }
          payload = *buffer;
@@ -3081,7 +3087,23 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
  
      rule_execute(p, rule, &payload, &flow);
      rule_reinstall(p, rule);
-    ofpbuf_delete(packet);
+
+    if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY
+        && rconn_is_connected(p->controller->rconn)) {
+        /*
+         * Extra-special case for fail-open mode.
+         *
+         * We are in fail-open mode and the packet matched the fail-open rule,
+         * but we are connected to a controller too.  We should send the packet
+         * up to the controller in the hope that it will try to set up a flow
+         * and thereby allow us to exit fail-open.
+         *
+         * See the top-level comment in fail-open.c for more information.
+         */
+        pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+    } else {
+        ofpbuf_delete(packet);
+    }
  }
  \f
  static void
@@ -3160,7 +3182,7 @@ send_flow_exp(struct ofproto *p, struct rule *rule,
      LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
          if (ofconn->send_flow_exp && rconn_is_connected(ofconn->rconn)) {
              if (prev) {
-                queue_tx(ofpbuf_clone(buf), prev, ofconn->reply_counter);
+                queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
              } else {
                  buf = compose_flow_exp(rule, now, reason);
              }
@@ -3168,7 +3190,7 @@ send_flow_exp(struct ofproto *p, struct rule *rule,
          }
      }
      if (prev) {
-        queue_tx(buf, prev, ofconn->reply_counter);
+        queue_tx(buf, prev, prev->reply_counter);
      }
  }
  
@@ -3267,25 +3289,22 @@ static void
  do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id,
                    const struct ofpbuf *packet, int send_len)
  {
-    struct ofp_packet_in *opi;
-    struct ofpbuf payload, *buf;
-    struct odp_msg *msg;
+    struct odp_msg *msg = packet->data;
+    struct ofpbuf payload;
+    struct ofpbuf *opi;
+    uint8_t reason;
  
-    msg = packet->data;
+    /* Extract packet payload from 'msg'. */
      payload.data = msg + 1;
      payload.size = msg->length - sizeof *msg;
  
-    send_len = MIN(send_len, payload.size);
-    buf = ofpbuf_new(sizeof *opi + send_len);
-    opi = put_openflow_xid(offsetof(struct ofp_packet_in, data),
-                           OFPT_PACKET_IN, 0, buf);
-    opi->buffer_id = htonl(buffer_id);
-    opi->total_len = htons(payload.size);
-    opi->in_port = htons(odp_port_to_ofp_port(msg->port));
-    opi->reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH;
-    ofpbuf_put(buf, payload.data, MIN(send_len, payload.size));
-    update_openflow_length(buf);
-    rconn_send_with_limit(ofconn->rconn, buf, ofconn->packet_in_counter, 100);
+    /* Construct ofp_packet_in message. */
+    reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH;
+    opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port), reason,
+                         &payload, send_len);
+
+    /* Send. */
+    rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
  }
  
  static void
@@ -3308,6 +3327,7 @@ static void
  send_packet_in_miss(struct ofpbuf *packet, void *p_)
  {
      struct ofproto *p = p_;
+    bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
      struct ofconn *ofconn;
      struct ofpbuf payload;
      struct odp_msg *msg;
@@ -3317,8 +3337,10 @@ send_packet_in_miss(struct ofpbuf *packet, void *p_)
      payload.size = msg->length - sizeof *msg;
      LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
          if (ofconn->miss_send_len) {
-            uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
-                                             msg->port);
+            struct pktbuf *pb = ofconn->pktbuf;
+            uint32_t buffer_id = (in_fail_open
+                                  ? pktbuf_get_null()
+                                  : pktbuf_save(pb, &payload, msg->port));
              int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
                              : UINT32_MAX);
              do_send_packet_in(ofconn, buffer_id, packet, send_len);
diff --git a/ofproto/pktbuf.c b/ofproto/pktbuf.c

index b4198a8..450cc3b 100644 (file)
--- a/ofproto/pktbuf.c
+++ b/ofproto/pktbuf.c
@@ -51,6 +51,7 @@ struct packet {
  struct pktbuf {
      struct packet packets[PKTBUF_CNT];
      unsigned int buffer_idx;
+    unsigned int null_idx;
  };
  
  int
@@ -78,6 +79,22 @@ pktbuf_destroy(struct pktbuf *pb)
      }
  }
  
+static unsigned int
+make_id(unsigned int buffer_idx, unsigned int cookie)
+{
+    return buffer_idx | (cookie << PKTBUF_BITS);
+}
+
+/* Attempts to allocate an OpenFlow packet buffer id within 'pb'.  The packet
+ * buffer will store a copy of 'buffer' and the port number 'in_port', which
+ * should be the datapath port number on which 'buffer' was received.
+ *
+ * If successful, returns the packet buffer id (a number other than
+ * UINT32_MAX).  pktbuf_retrieve() can later be used to retrieve the buffer and
+ * its input port number (buffers do expire after a time, so this is not
+ * guaranteed to be true forever).  On failure, returns UINT32_MAX.
+ *
+ * The caller retains ownership of 'buffer'. */
  uint32_t
  pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
  {
@@ -97,9 +114,46 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
      p->buffer = ofpbuf_clone(buffer);
      p->timeout = time_msec() + OVERWRITE_MSECS;
      p->in_port = in_port;
-    return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+    return make_id(p - pb->packets, p->cookie);
+}
+
+/*
+ * Allocates and returns a "null" packet buffer id.  The returned packet buffer
+ * id is considered valid by pktbuf_retrieve(), but it is not associated with
+ * actual buffered data.
+ *
+ * This function is always successful.
+ *
+ * This is useful in one special case: with the current OpenFlow design, the
+ * "fail-open" code cannot always know whether a connection to a controller is
+ * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request,
+ * but at that point the packet in question has already been forwarded (since
+ * we are still in "fail-open" mode).  If the packet was buffered in the usual
+ * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate
+ * packet in the network.  Null packet buffer ids identify such a packet that
+ * has already been forwarded, so that Open vSwitch can quietly ignore the
+ * request to re-send it.  (After that happens, the switch exits fail-open
+ * mode.)
+ *
+ * See the top-level comment in fail-open.c for an overview.
+ */
+uint32_t
+pktbuf_get_null(void)
+{
+    return make_id(0, COOKIE_MAX);
  }
  
+/* Attempts to retrieve a saved packet with the given 'id' from 'pb'.  Returns
+ * 0 if successful, otherwise an OpenFlow error code constructed with
+ * ofp_mkerr().
+ *
+ * On success, ordinarily stores the buffered packet in '*bufferp' and the
+ * datapath port number on which the packet was received in '*in_port'.  The
+ * caller becomes responsible for freeing the buffer.  However, if 'id'
+ * identifies a "null" packet buffer (created with pktbuf_get_null()), stores
+ * NULL in '*bufferp' and -1 in '*in_port'.
+ *
+ * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */
  int
  pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
                  uint16_t *in_port)
@@ -128,11 +182,16 @@ pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
              VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
              error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
          }
-    } else {
+    } else if (id >> PKTBUF_BITS != COOKIE_MAX) {
          COVERAGE_INC(pktbuf_bad_cookie);
          VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
                       id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
          error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+    } else {
+        COVERAGE_INC(pktbuf_null_cookie);
+        VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal "
+                     "if the switch was recently in fail-open mode)", id);
+        error = 0;
      }
      *bufferp = NULL;
      *in_port = -1;
diff --git a/ofproto/pktbuf.h b/ofproto/pktbuf.h

index b27b749..67f4973 100644 (file)
--- a/ofproto/pktbuf.h
+++ b/ofproto/pktbuf.h
@@ -27,6 +27,7 @@ int pktbuf_capacity(void);
  struct pktbuf *pktbuf_create(void);
  void pktbuf_destroy(struct pktbuf *);
  uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+uint32_t pktbuf_get_null(void);
  int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
                      uint16_t *in_port);
  void pktbuf_discard(struct pktbuf *, uint32_t id);
diff --git a/utilities/ovs-appctl.8.in b/utilities/ovs-appctl.8.in

index d5e6b82..61ce4d4 100644 (file)
--- a/utilities/ovs-appctl.8.in
+++ b/utilities/ovs-appctl.8.in
@@ -15,7 +15,7 @@ ovs\-appctl \- utility for configuring running Open vSwitch daemons
  .sp 1
  The available \fItarget\fR options are:
  .br
-[\fB-t\fR \fIpid\fR | \fB--target=\fIpid\fR]
+[\fB-t\fR \fIsocket\fR | \fB--target=\fIsocket\fR]
  .sp 1
  The available \fIaction\fR options are:
  .br
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in

index 658cf13..07ea684 100644 (file)
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -119,6 +119,13 @@ through the controller and every packet is flooded.
  This option is most useful for debugging.  It reduces switching
  performance, so it should not be used in production.
  
+.IP "\fB--mute\fR"
+Prevents ovs\-controller from replying to any OpenFlow messages sent
+to it by switches.
+.IP
+This option is only for debugging the Open vSwitch implementation of
+``fail open'' mode.  It must not be used in production.
+
  .so lib/daemon.man
  .so lib/vlog.man
  .so lib/common.man
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c

index 010cad7..314da18 100644 (file)
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -58,6 +58,10 @@ static bool setup_flows = true;
  /* --max-idle: Maximum idle time, in seconds, before flows expire. */
  static int max_idle = 60;
  
+/* --mute: If true, accept connections from switches but do not reply to any
+ * of their messages (for debugging fail-open mode). */
+static bool mute = false;
+
  static int do_switching(struct switch_ *);
  static void new_switch(struct switch_ *, struct vconn *, const char *name);
  static void parse_options(int argc, char *argv[]);
@@ -211,7 +215,9 @@ do_switching(struct switch_ *sw)
  
      msg = rconn_recv(sw->rconn);
      if (msg) {
-        lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+        if (!mute) {
+            lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+        }
          ofpbuf_delete(msg);
      }
      rconn_run(sw->rconn);
@@ -227,12 +233,14 @@ parse_options(int argc, char *argv[])
      enum {
          OPT_MAX_IDLE = UCHAR_MAX + 1,
          OPT_PEER_CA_CERT,
+        OPT_MUTE,
          VLOG_OPTION_ENUMS
      };
      static struct option long_options[] = {
          {"hub",         no_argument, 0, 'H'},
          {"noflow",      no_argument, 0, 'n'},
          {"max-idle",    required_argument, 0, OPT_MAX_IDLE},
+        {"mute",        no_argument, 0, OPT_MUTE},
          {"help",        no_argument, 0, 'h'},
          {"version",     no_argument, 0, 'V'},
          DAEMON_LONG_OPTIONS,
@@ -263,6 +271,10 @@ parse_options(int argc, char *argv[])
              setup_flows = false;
              break;
  
+        case OPT_MUTE:
+            mute = true;
+            break;
+
          case OPT_MAX_IDLE:
              if (!strcmp(optarg, "permanent")) {
                  max_idle = OFP_FLOW_PERMANENT;
diff --git a/utilities/ovs-pki.in b/utilities/ovs-pki.in

index 22b5f2a..39d5782 100755 (executable)
--- a/utilities/ovs-pki.in
+++ b/utilities/ovs-pki.in
@@ -271,7 +271,7 @@ EOF
              -newkey $newkey -keyout private/cakey.pem -out careq.pem \
              1>&3 2>&3
          openssl ca -config ca.cnf -create_serial -out cacert.pem \
-            -days 1095 -batch -keyfile private/cakey.pem -selfsign \
+            -days 2191 -batch -keyfile private/cakey.pem -selfsign \
              -infiles careq.pem 1>&3 2>&3
          chmod 0700 private/cakey.pem
  
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c

index ac993bf..933f4af 100644 (file)
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -621,6 +621,7 @@ bridge_reconfigure(void)
              VLOG_ERR("bridge %s: problem setting netflow collectors", 
                      br->name);
          }
+        svec_destroy(&nf_hosts);
  
          /* Update the controller and related settings.  It would be more
           * straightforward to call this from bridge_reconfigure_one(), but we
@@ -1590,6 +1591,7 @@ bond_enable_slave(struct iface *iface, bool enable)
          }
          iface->tag = tag_create_random();
      }
+    port_update_bond_compat(port);
  }
  
  static void
@@ -2414,10 +2416,7 @@ bond_send_learning_packets(struct port *port)
      ofpbuf_init(&packet, 128);
      error = n_packets = n_errors = 0;
      LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
-        static const char s[] = "Open vSwitch Bond Failover";
          union ofp_action actions[2], *a;
-        struct eth_header *eth;
-        struct llc_snap_header *llc_snap;
          uint16_t dp_ifidx;
          tag_type tags = 0;
          flow_t flow;
@@ -2428,23 +2427,6 @@ bond_send_learning_packets(struct port *port)
              continue;
          }
  
-        /* Compose packet to send. */
-        ofpbuf_clear(&packet);
-        eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
-        llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
-        ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
-        ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
-
-        memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
-        memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
-        eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
-
-        llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
-        llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
-        llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
-        memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
-        llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
-
          /* Compose actions. */
          memset(actions, 0, sizeof actions);
          a = actions;
@@ -2461,6 +2443,8 @@ bond_send_learning_packets(struct port *port)
  
          /* Send packet. */
          n_packets++;
+        compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
+                              e->mac);
          flow_extract(&packet, ODPP_NONE, &flow);
          retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
                                       &packet);
@@ -3061,8 +3045,21 @@ port_update_bond_compat(struct port *port)
          struct iface *iface = port->ifaces[i];
          struct compat_bond_slave *slave = &bond.slaves[i];
          slave->name = iface->name;
-        slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) ||
-                     (!iface->enabled && iface->delay_expires != LLONG_MAX));
+
+        /* We need to make the same determination as the Linux bonding
+         * code to determine whether a slave should be consider "up".
+         * The Linux function bond_miimon_inspect() supports four 
+         * BOND_LINK_* states:
+         *      
+         *    - BOND_LINK_UP: carrier detected, updelay has passed.
+         *    - BOND_LINK_FAIL: carrier lost, downdelay in progress.
+         *    - BOND_LINK_DOWN: carrier lost, downdelay has passed.
+         *    - BOND_LINK_BACK: carrier detected, updelay in progress.
+         *
+         * The function bond_info_show_slave() only considers BOND_LINK_UP 
+         * to be "up" and anything else to be "down".
+         */
+        slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
          if (slave->up) {
              bond.up = true;
          }
diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c

index d351c05..d4a59c3 100644 (file)
--- a/vswitchd/ovs-brcompatd.c
+++ b/vswitchd/ovs-brcompatd.c
@@ -495,7 +495,7 @@ del_port(const char *br_name, const char *port_name)
  {
      cfg_del_entry("bridge.%s.port=%s", br_name, port_name);
      cfg_del_match("bonding.*.slave=%s", port_name);
-    cfg_del_match("vlan.%s.*", port_name);
+    cfg_del_match("vlan.%s.[!0-9]*", port_name);
  }
  
  static int
diff --git a/xenserver/usr_sbin_xen-bugtool b/xenserver/usr_sbin_xen-bugtool

index 13a1c2f..1605b22 100755 (executable)
--- a/xenserver/usr_sbin_xen-bugtool
+++ b/xenserver/usr_sbin_xen-bugtool
@@ -278,6 +278,7 @@ CAP_SYSTEM_SERVICES      = 'system-services'
  CAP_TAPDISK_LOGS         = 'tapdisk-logs'
  CAP_VNCTERM              = 'vncterm'
  CAP_VSWITCH_CONFIG       = 'vswitch-config'
+CAP_VSWITCH_LOGS         = 'vswitch-logs'
  CAP_VSWITCH_STATUS       = 'vswitch-status'
  CAP_WLB                  = 'wlb'
  CAP_X11_LOGS             = 'X11'
@@ -345,6 +346,7 @@ cap(CAP_TAPDISK_LOGS,        PII_NO,                    max_size=64*KB)
  cap(CAP_VNCTERM,             PII_MAYBE, checked = False)
  cap(CAP_VSWITCH_CONFIG,      PII_YES,
                                          min_size=0,     max_size=20*MB)
+cap(CAP_VSWITCH_LOGS,        PII_YES,                   max_size=20*MB)
  cap(CAP_VSWITCH_STATUS,      PII_YES,                   max_size=19*KB,
      max_time=30)
  cap(CAP_WLB,                 PII_NO,                    max_size=3*MB,
@@ -494,7 +496,7 @@ def main(argv = None):
                     CAP_NETWORK_CONFIG, CAP_NETWORK_STATUS, CAP_PROCESS_LIST, CAP_HIGH_AVAILABILITY,
                     CAP_PAM, CAP_PERSISTENT_STATS, CAP_MULTIPATH,
                     CAP_SYSTEM_LOGS, CAP_SYSTEM_SERVICES, CAP_TAPDISK_LOGS,
-                   CAP_VNCTERM, CAP_VSWITCH_CONFIG, CAP_VSWITCH_STATUS, CAP_WLB, 
+                   CAP_VNCTERM, CAP_VSWITCH_CONFIG, CAP_VSWITCH_LOGS, CAP_VSWITCH_STATUS, CAP_WLB, 
                     CAP_X11_LOGS, CAP_X11_AUTH, CAP_XAPI_DEBUG, CAP_XAPI_SUBPROCESS, 
                     CAP_XENSERVER_CONFIG, CAP_XENSERVER_DOMAINS, CAP_XENSERVER_DATABASES, 
                     CAP_XENSERVER_INSTALL, CAP_XENSERVER_LOGS, CAP_XEN_INFO, CAP_XHA_LIVESET, CAP_YUM]
@@ -709,6 +711,13 @@ exclude those logs from the archive.
  
      file_output(CAP_VSWITCH_CONFIG, [OVS_VSWITCH_CONF])
  
+    file_output(CAP_VSWITCH_LOGS, 
+         [ VAR_LOG_DIR + x for x in
+           [ 'ovs-brcompatd.log', 'ovs-vswitchd.log', 'vswitch-cfg-update.log', 'vswitch-xsplugin.log' ] +
+           [ f % n for n in range(1, 20) \
+                 for f in ['ovs-brcompatd.log.%d', 'ovs-brcompatd.log.%d.gz', 
+                           'ovs-vswitchd.log.%d', 'ovs-vswitchd.log.%d.gz']]])
+
      cmd_output(CAP_VSWITCH_STATUS, [OVS_DPCTL, 'show'])
      tree_output(CAP_VSWITCH_STATUS, VSWITCH_CORE_DIR)
      for d in dp_list():
author	Ben Pfaff <blp@nicira.com>
	Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)
committer	Ben Pfaff <blp@nicira.com>
	Tue, 22 Sep 2009 17:17:44 +0000 (10:17 -0700)
ChangeLog	[new file with mode: 0644]	patch \| blob
configure.ac		patch \| blob \| history
datapath/datapath.h		patch \| blob \| history
lib/automake.mk		patch \| blob \| history
lib/mac-learning.h		patch \| blob \| history
lib/netdev-linux.c		patch \| blob \| history
lib/packets.c	[new file with mode: 0644]	patch \| blob
lib/packets.h		patch \| blob \| history
lib/rconn.c		patch \| blob \| history
lib/rconn.h		patch \| blob \| history
lib/vconn.c		patch \| blob \| history
lib/vconn.h		patch \| blob \| history
ofproto/fail-open.c		patch \| blob \| history
ofproto/fail-open.h		patch \| blob \| history
ofproto/in-band.c		patch \| blob \| history
ofproto/ofproto.c		patch \| blob \| history
ofproto/pktbuf.c		patch \| blob \| history
ofproto/pktbuf.h		patch \| blob \| history
utilities/ovs-appctl.8.in		patch \| blob \| history
utilities/ovs-controller.8.in		patch \| blob \| history
utilities/ovs-controller.c		patch \| blob \| history
utilities/ovs-pki.in		patch \| blob \| history
vswitchd/bridge.c		patch \| blob \| history
vswitchd/ovs-brcompatd.c		patch \| blob \| history
xenserver/usr_sbin_xen-bugtool		patch \| blob \| history