--- /dev/null
+v0.90.5 - 21 Sep 2009
+---------------------
+ - Generalize in-band control to more diverse network setups
+ - Bug fixes
# limitations under the License.
AC_PREREQ(2.63)
-AC_INIT(openvswitch, 0.90.3, bugs@openvswitch.org)
+AC_INIT(openvswitch, 0.90.5, ovs-bugs@openvswitch.org)
NX_BUILDNR
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
* then this should go into include/linux/if_vlan.h. */
#define VLAN_PCP_MASK 0xe000
-#define DP_MAX_PORTS 256
+#define DP_MAX_PORTS 1024
#define DP_MAX_GROUPS 16
#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*)))
lib/ofp-print.h \
lib/ofpbuf.c \
lib/ofpbuf.h \
+ lib/packets.c \
lib/packets.h \
lib/pcap.c \
lib/pcap.h \
#define MAC_HASH_MASK (MAC_HASH_SIZE - 1)
#define MAC_HASH_SIZE (1u << MAC_HASH_BITS)
-#define MAC_MAX 1024
+#define MAC_MAX 2048
/* Time, in seconds, before expiring a mac_entry due to inactivity. */
#define MAC_ENTRY_IDLE_TIME 60
if (!attrs[IFLA_STATS]) {
VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats");
+ ofpbuf_delete(reply);
return EPROTO;
}
stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors;
stats->tx_window_errors = rtnl_stats->tx_window_errors;
+ ofpbuf_delete(reply);
+
return 0;
}
--- /dev/null
+/*
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "packets.h"
+#include <netinet/in.h>
+#include "ofpbuf.h"
+
+/* Fills 'b' with an 802.2 SNAP packet with Ethernet source address 'eth_src',
+ * the Nicira OUI as SNAP organization and 'snap_type' as SNAP type. The text
+ * string in 'tag' is enclosed as the packet payload.
+ *
+ * This function is used by Open vSwitch to compose packets in cases where
+ * context is important but content doesn't (or shouldn't) matter. For this
+ * purpose, 'snap_type' should be a random number and 'tag' should be an
+ * English phrase that explains the purpose of the packet. (The English phrase
+ * gives hapless admins running Wireshark the opportunity to figure out what's
+ * going on.) */
+void
+compose_benign_packet(struct ofpbuf *b, const char *tag, uint16_t snap_type,
+ const uint8_t eth_src[ETH_ADDR_LEN])
+{
+ struct eth_header *eth;
+ struct llc_snap_header *llc_snap;
+
+ /* Compose basic packet structure. (We need the payload size to stick into
+ * the 802.2 header.) */
+ ofpbuf_clear(b);
+ eth = ofpbuf_put_zeros(b, ETH_HEADER_LEN);
+ llc_snap = ofpbuf_put_zeros(b, LLC_SNAP_HEADER_LEN);
+ ofpbuf_put(b, tag, strlen(tag) + 1); /* Includes null byte. */
+ ofpbuf_put(b, eth_src, ETH_ADDR_LEN);
+
+ /* Compose 802.2 header. */
+ memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
+ memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+ eth->eth_type = htons(b->size - ETH_HEADER_LEN);
+
+ /* Compose LLC, SNAP headers. */
+ llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
+ llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
+ llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
+ memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
+ llc_snap->snap.snap_type = htons(snap_type);
+}
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#ifndef PACKETS_H
#define PACKETS_H 1
#include "random.h"
#include "util.h"
+struct ofpbuf;
+
#define ETH_ADDR_LEN 6
static const uint8_t eth_addr_broadcast[ETH_ADDR_LEN] UNUSED
&& (ea[5] & 0xf0) == 0x00);
}
+void compose_benign_packet(struct ofpbuf *, const char *tag,
+ uint16_t snap_type,
+ const uint8_t eth_src[ETH_ADDR_LEN]);
+
/* Example:
*
* uint8_t mac[ETH_ADDR_LEN];
int error = vconn_recv(rc->vconn, &buffer);
if (!error) {
copy_to_monitor(rc, buffer);
- if (is_admitted_msg(buffer)
+ if (rc->probably_admitted || is_admitted_msg(buffer)
|| time_now() - rc->last_connected >= 30) {
rc->probably_admitted = true;
rc->last_admitted = time_now();
return is_connected_state(rconn->state);
}
-/* Returns 0 if 'rconn' is connected. Otherwise, if 'rconn' is in a "failure
- * mode" (that is, it is not connected), returns the number of seconds that it
- * has been in failure mode, ignoring any times that it connected but the
- * controller's admission control policy caused it to be quickly
- * disconnected. */
+/* Returns true if 'rconn' is connected and thought to have been accepted by
+ * the peer's admission-control policy. */
+bool
+rconn_is_admitted(const struct rconn *rconn)
+{
+ return (rconn_is_connected(rconn)
+ && rconn->last_admitted >= rconn->last_connected);
+}
+
+/* Returns 0 if 'rconn' is currently connected and considered to have been
+ * accepted by the peer's admission-control policy, otherwise the number of
+ * seconds since 'rconn' was last in such a state. */
int
rconn_failure_duration(const struct rconn *rconn)
{
- return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted;
+ return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted;
}
/* Returns the IP address of the peer, or 0 if the peer's IP address is not
const char *rconn_get_name(const struct rconn *);
bool rconn_is_alive(const struct rconn *);
bool rconn_is_connected(const struct rconn *);
+bool rconn_is_admitted(const struct rconn *);
int rconn_failure_duration(const struct rconn *);
bool rconn_is_connectivity_questionable(struct rconn *);
return buffer;
}
+struct ofpbuf *
+make_packet_in(uint32_t buffer_id, uint16_t in_port, uint8_t reason,
+ const struct ofpbuf *payload, int max_send_len)
+{
+ struct ofp_packet_in *opi;
+ struct ofpbuf *buf;
+ int send_len;
+
+ send_len = MIN(max_send_len, payload->size);
+ buf = ofpbuf_new(sizeof *opi + send_len);
+ opi = put_openflow_xid(offsetof(struct ofp_packet_in, data),
+ OFPT_PACKET_IN, 0, buf);
+ opi->buffer_id = htonl(buffer_id);
+ opi->total_len = htons(payload->size);
+ opi->in_port = htons(in_port);
+ opi->reason = reason;
+ ofpbuf_put(buf, payload->data, send_len);
+ update_openflow_length(buf);
+
+ return buf;
+}
+
struct ofpbuf *
make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id,
uint16_t in_port,
struct ofpbuf *make_add_simple_flow(const flow_t *,
uint32_t buffer_id, uint16_t out_port,
uint16_t max_idle);
+struct ofpbuf *make_packet_in(uint32_t buffer_id, uint16_t in_port,
+ uint8_t reason,
+ const struct ofpbuf *payload, int max_send_len);
struct ofpbuf *make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id,
uint16_t in_port,
const struct ofp_action_header *,
#include "flow.h"
#include "mac-learning.h"
#include "odp-util.h"
+#include "ofpbuf.h"
#include "ofproto.h"
+#include "pktbuf.h"
+#include "poll-loop.h"
#include "rconn.h"
#include "status.h"
#include "timeval.h"
+#include "vconn.h"
#define THIS_MODULE VLM_fail_open
#include "vlog.h"
+/*
+ * Fail-open mode.
+ *
+ * In fail-open mode, the switch detects when the controller cannot be
+ * contacted or when the controller is dropping switch connections because the
+ * switch does not pass its admission control policy. In those situations the
+ * switch sets up flows itself using the "normal" action.
+ *
+ * There is a little subtlety to implementation, to properly handle the case
+ * where the controller allows switch connections but drops them a few seconds
+ * later for admission control reasons. Because of this case, we don't want to
+ * just stop setting up flows when we connect to the controller: if we did,
+ * then new flow setup and existing flows would stop during the duration of
+ * connection to the controller, and thus the whole network would go down for
+ * that period of time.
+ *
+ * So, instead, we add some special caseswhen we are connected to a controller,
+ * but not yet sure that it has admitted us:
+ *
+ * - We set up flows immediately ourselves, but simultaneously send out an
+ * OFPT_PACKET_IN to the controller. We put a special bogus buffer-id in
+ * these OFPT_PACKET_IN messages so that duplicate packets don't get sent
+ * out to the network when the controller replies.
+ *
+ * - We also send out OFPT_PACKET_IN messages for totally bogus packets
+ * every so often, in case no real new flows are arriving in the network.
+ *
+ * - We don't flush the flow table at the time we connect, because this
+ * could cause network stuttering in a switch with lots of flows or very
+ * high-bandwidth flows by suddenly throwing lots of packets down to
+ * userspace.
+ */
+
struct fail_open {
struct ofproto *ofproto;
struct rconn *controller;
int trigger_duration;
int last_disconn_secs;
struct status_category *ss_cat;
+ long long int next_bogus_packet_in;
+ struct rconn_packet_counter *bogus_packet_counter;
};
-/* Causes the switch to enter or leave fail-open mode, if appropriate. */
-void
-fail_open_run(struct fail_open *fo)
+/* Returns true if 'fo' should be in fail-open mode, otherwise false. */
+static inline bool
+should_fail_open(const struct fail_open *fo)
{
- int disconn_secs = rconn_failure_duration(fo->controller);
- bool open = disconn_secs >= fo->trigger_duration;
- if (open != (fo->last_disconn_secs != 0)) {
- if (!open) {
- flow_t flow;
+ return rconn_failure_duration(fo->controller) >= fo->trigger_duration;
+}
+
+/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */
+bool
+fail_open_is_active(const struct fail_open *fo)
+{
+ return fo->last_disconn_secs != 0;
+}
- VLOG_WARN("No longer in fail-open mode");
- fo->last_disconn_secs = 0;
+static void
+send_bogus_packet_in(struct fail_open *fo)
+{
+ uint8_t mac[ETH_ADDR_LEN];
+ struct ofpbuf *opi;
+ struct ofpbuf b;
- memset(&flow, 0, sizeof flow);
- ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000);
- } else {
+ /* Compose ofp_packet_in. */
+ ofpbuf_init(&b, 128);
+ eth_addr_random(mac);
+ compose_benign_packet(&b, "Open vSwitch Controller Probe", 0xa033, mac);
+ opi = make_packet_in(pktbuf_get_null(), OFPP_LOCAL, OFPR_NO_MATCH, &b, 64);
+ ofpbuf_uninit(&b);
+
+ /* Send. */
+ rconn_send_with_limit(fo->controller, opi, fo->bogus_packet_counter, 1);
+}
+
+/* Enter fail-open mode if we should be in it. Handle reconnecting to a
+ * controller from fail-open mode. */
+void
+fail_open_run(struct fail_open *fo)
+{
+ /* Enter fail-open mode if 'fo' is not in it but should be. */
+ if (should_fail_open(fo)) {
+ int disconn_secs = rconn_failure_duration(fo->controller);
+ if (!fail_open_is_active(fo)) {
VLOG_WARN("Could not connect to controller (or switch failed "
"controller's post-connection admission control "
"policy) for %d seconds, failing open", disconn_secs);
* fail-open rule from fail_open_flushed() when
* ofproto_flush_flows() calls back to us. */
ofproto_flush_flows(fo->ofproto);
+ } else if (disconn_secs > fo->last_disconn_secs + 60) {
+ VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
+ "from controller", disconn_secs);
+ fo->last_disconn_secs = disconn_secs;
}
- } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
- VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
- "from controller", disconn_secs);
- fo->last_disconn_secs = disconn_secs;
}
+
+ /* Schedule a bogus packet-in if we're connected and in fail-open. */
+ if (fail_open_is_active(fo)) {
+ if (rconn_is_connected(fo->controller)) {
+ bool expired = time_msec() >= fo->next_bogus_packet_in;
+ if (expired) {
+ send_bogus_packet_in(fo);
+ }
+ if (expired || fo->next_bogus_packet_in == LLONG_MAX) {
+ fo->next_bogus_packet_in = time_msec() + 2000;
+ }
+ } else {
+ fo->next_bogus_packet_in = LLONG_MAX;
+ }
+ }
+
}
+/* If 'fo' is currently in fail-open mode and its rconn has connected to the
+ * controller, exits fail open mode. */
void
-fail_open_wait(struct fail_open *fo UNUSED)
+fail_open_maybe_recover(struct fail_open *fo)
{
- /* Nothing to do. */
+ if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) {
+ flow_t flow;
+
+ VLOG_WARN("No longer in fail-open mode");
+ fo->last_disconn_secs = 0;
+ fo->next_bogus_packet_in = LLONG_MAX;
+
+ memset(&flow, 0, sizeof flow);
+ ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY);
+ }
+}
+
+void
+fail_open_wait(struct fail_open *fo)
+{
+ if (fo->next_bogus_packet_in != LLONG_MAX) {
+ poll_timer_wait(fo->next_bogus_packet_in - time_msec());
+ }
}
void
action.output.len = htons(sizeof action);
action.output.port = htons(OFPP_NORMAL);
memset(&flow, 0, sizeof flow);
- ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000,
+ ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY,
&action, 1, 0);
}
}
fo->last_disconn_secs = 0;
fo->ss_cat = switch_status_register(switch_status, "fail-open",
fail_open_status_cb, fo);
+ fo->next_bogus_packet_in = LLONG_MAX;
+ fo->bogus_packet_counter = rconn_packet_counter_create();
return fo;
}
if (fo) {
/* We don't own fo->controller. */
switch_status_unregister(fo->ss_cat);
+ rconn_packet_counter_destroy(fo->bogus_packet_counter);
free(fo);
}
}
struct rconn;
struct switch_status;
+/* Priority of the rule added by the fail-open subsystem when a switch enters
+ * fail-open mode. This priority value uniquely identifies a fail-open flow
+ * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
+ * creates flows with this priority). */
+#define FAIL_OPEN_PRIORITY 70000
+
struct fail_open *fail_open_create(struct ofproto *, int trigger_duration,
struct switch_status *,
struct rconn *controller);
void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration);
void fail_open_destroy(struct fail_open *);
void fail_open_wait(struct fail_open *);
+bool fail_open_is_active(const struct fail_open *);
void fail_open_run(struct fail_open *);
+void fail_open_maybe_recover(struct fail_open *);
void fail_open_flushed(struct fail_open *);
#endif /* fail-open.h */
#define THIS_MODULE VLM_in_band
#include "vlog.h"
+/* In-band control allows a single network to be used for OpenFlow
+ * traffic and other data traffic. Refer to ovs-vswitchd.conf(5) and
+ * secchan(8) for a description of configuring in-band control.
+ *
+ * This comment is an attempt to describe how in-band control works at a
+ * wire- and implementation-level. Correctly implementing in-band
+ * control has proven difficult due to its many subtleties, and has thus
+ * gone through many iterations. Please read through and understand the
+ * reasoning behind the chosen rules before making modifications.
+ *
+ * In Open vSwitch, in-band control is implemented as "hidden" flows (in
+ * that they are not visible through OpenFlow) and at a higher priority
+ * than wildcarded flows can be setup by the controller. This is done
+ * so that the controller cannot interfere with them and possibly break
+ * connectivity with its switches. It is possible to see all flows,
+ * including in-band ones, with the ovs-appctl "bridge/dump-flows"
+ * command.
+ *
+ * The following rules are always enabled with the "normal" action by a
+ * switch with in-band control:
+ *
+ * a. DHCP requests sent from the local port.
+ * b. ARP replies to the local port's MAC address.
+ * c. ARP requests from the local port's MAC address.
+ * d. ARP replies to the remote side's MAC address. Note that the
+ * remote side is either the controller or the gateway to reach
+ * the controller.
+ * e. ARP requests from the remote side's MAC address. Note that
+ * like (d), the MAC is either for the controller or gateway.
+ * f. ARP replies containing the controller's IP address as a target.
+ * g. ARP requests containing the controller's IP address as a source.
+ * h. OpenFlow (6633/tcp) traffic to the controller's IP.
+ * i. OpenFlow (6633/tcp) traffic from the controller's IP.
+ *
+ * The goal of these rules is to be as narrow as possible to allow a
+ * switch to join a network and be able to communicate with a
+ * controller. As mentioned earlier, these rules have higher priority
+ * than the controller's rules, so if they are too broad, they may
+ * prevent the controller from implementing its policy. As such,
+ * in-band actively monitors some aspects of flow and packet processing
+ * so that the rules can be made more precise.
+ *
+ * In-band control monitors attempts to add flows into the datapath that
+ * could interfere with its duties. The datapath only allows exact
+ * match entries, so in-band control is able to be very precise about
+ * the flows it prevents. Flows that miss in the datapath are sent to
+ * userspace to be processed, so preventing these flows from being
+ * cached in the "fast path" does not affect correctness. The only type
+ * of flow that is currently prevented is one that would prevent DHCP
+ * replies from being seen by the local port. For example, a rule that
+ * forwarded all DHCP traffic to the controller would not be allowed,
+ * but one that forwarded to all ports (including the local port) would.
+ *
+ * As mentioned earlier, packets that miss in the datapath are sent to
+ * the userspace for processing. The userspace has its own flow table,
+ * the "classifier", so in-band checks whether any special processing
+ * is needed before the classifier is consulted. If a packet is a DHCP
+ * response to a request from the local port, the packet is forwarded to
+ * the local port, regardless of the flow table. Note that this requires
+ * L7 processing of DHCP replies to determine whether the 'chaddr' field
+ * matches the MAC address of the local port.
+ *
+ * It is interesting to note that for an L3-based in-band control
+ * mechanism, the majority of rules are devoted to ARP traffic. At first
+ * glance, some of these rules appear redundant. However, each serves an
+ * important role. First, in order to determine the MAC address of the
+ * remote side (controller or gateway) for other ARP rules, we must allow
+ * ARP traffic for our local port with rules (b) and (c). If we are
+ * between a switch and its connection to the controller, we have to
+ * allow the other switch's ARP traffic to through. This is done with
+ * rules (d) and (e), since we do not know the addresses of the other
+ * switches a priori, but do know the controller's or gateway's. Finally,
+ * if the controller is running in a local guest VM that is not reached
+ * through the local port, the switch that is connected to the VM must
+ * allow ARP traffic based on the controller's IP address, since it will
+ * not know the MAC address of the local port that is sending the traffic
+ * or the MAC address of the controller in the guest VM.
+ *
+ * With a few notable exceptions below, in-band should work in most
+ * network setups. The following are considered "supported' in the
+ * current implementation:
+ *
+ * - Locally Connected. The switch and controller are on the same
+ * subnet. This uses rules (a), (b), (c), (h), and (i).
+ *
+ * - Reached through Gateway. The switch and controller are on
+ * different subnets and must go through a gateway. This uses
+ * rules (a), (b), (c), (h), and (i).
+ *
+ * - Between Switch and Controller. This switch is between another
+ * switch and the controller, and we want to allow the other
+ * switch's traffic through. This uses rules (d), (e), (h), and
+ * (i). It uses (b) and (c) indirectly in order to know the MAC
+ * address for rules (d) and (e). Note that DHCP for the other
+ * switch will not work unless the controller explicitly lets this
+ * switch pass the traffic.
+ *
+ * - Between Switch and Gateway. This switch is between another
+ * switch and the gateway, and we want to allow the other switch's
+ * traffic through. This uses the same rules and logic as the
+ * "Between Switch and Controller" configuration described earlier.
+ *
+ * - Controller on Local VM. The controller is a guest VM on the
+ * system running in-band control. This uses rules (a), (b), (c),
+ * (h), and (i).
+ *
+ * - Controller on Local VM with Different Networks. The controller
+ * is a guest VM on the system running in-band control, but the
+ * local port is not used to connect to the controller. For
+ * example, an IP address is configured on eth0 of the switch. The
+ * controller's VM is connected through eth1 of the switch, but an
+ * IP address has not been configured for that port on the switch.
+ * As such, the switch will use eth0 to connect to the controller,
+ * and eth1's rules about the local port will not work. In the
+ * example, the switch attached to eth0 would use rules (a), (b),
+ * (c), (h), and (i) on eth0. The switch attached to eth1 would use
+ * rules (f), (g), (h), and (i).
+ *
+ * The following are explicitly *not* supported by in-band control:
+ *
+ * - Specify Controller by Name. Currently, the controller must be
+ * identified by IP address. A naive approach would be to permit
+ * all DNS traffic. Unfortunately, this would prevent the
+ * controller from defining any policy over DNS. Since switches
+ * that are located behind us need to connect to the controller,
+ * in-band cannot simply add a rule that allows DNS traffic from
+ * the local port. The "correct" way to support this is to parse
+ * DNS requests to allow all traffic related to a request for the
+ * controller's name through. Due to the potential security
+ * problems and amount of processing, we decided to hold off for
+ * the time-being.
+ *
+ * - Multiple Controllers. There is nothing intrinsic in the high-
+ * level design that prevents using multiple (known) controllers,
+ * however, the current implementation's data structures assume
+ * only one.
+ *
+ * - Differing Controllers for Switches. All switches must know
+ * the L3 addresses for all the controllers that other switches
+ * may use, since rules need to be setup to allow traffic related
+ * to those controllers through. See rules (f), (g), (h), and (i).
+ *
+ * - Differing Routes for Switches. In order for the switch to
+ * allow other switches to connect to a controller through a
+ * gateway, it allows the gateway's traffic through with rules (d)
+ * and (e). If the routes to the controller differ for the two
+ * switches, we will not know the MAC address of the alternate
+ * gateway.
+ */
+
#define IB_BASE_PRIORITY 18181800
enum {
- IBR_FROM_LOCAL_DHCP, /* From local port, DHCP. */
- IBR_TO_LOCAL_ARP, /* To local port, ARP. */
- IBR_FROM_LOCAL_ARP, /* From local port, ARP. */
- IBR_TO_REMOTE_ARP, /* To remote MAC, ARP. */
- IBR_FROM_REMOTE_ARP, /* From remote MAC, ARP. */
- IBR_TO_CTL_ARP, /* To controller IP, ARP. */
- IBR_FROM_CTL_ARP, /* From controller IP, ARP. */
- IBR_TO_CTL_OFP, /* To controller, OpenFlow port. */
- IBR_FROM_CTL_OFP, /* From controller, OpenFlow port. */
+ IBR_FROM_LOCAL_DHCP, /* (a) From local port, DHCP. */
+ IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */
+ IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */
+ IBR_TO_REMOTE_ARP, /* (d) To remote MAC, ARP. */
+ IBR_FROM_REMOTE_ARP, /* (e) From remote MAC, ARP. */
+ IBR_TO_CTL_ARP, /* (f) To controller IP, ARP. */
+ IBR_FROM_CTL_ARP, /* (g) From controller IP, ARP. */
+ IBR_TO_CTL_OFP, /* (h) To controller, OpenFlow port. */
+ IBR_FROM_CTL_OFP, /* (i) From controller, OpenFlow port. */
#if OFP_TCP_PORT != OFP_SSL_PORT
#error Need to support separate TCP and SSL flows.
#endif
}
}
}
- if (p->fail_open) {
- fail_open_run(p->fail_open);
- }
pinsched_run(p->miss_sched, send_packet_in_miss, p);
pinsched_run(p->action_sched, send_packet_in_action, p);
if (p->executer) {
ofconn_run(ofconn, p);
}
+ /* Fail-open maintenance. Do this after processing the ofconns since
+ * fail-open checks the status of the controller rconn. */
+ if (p->fail_open) {
+ fail_open_run(p->fail_open);
+ }
+
for (i = 0; i < p->n_listeners; i++) {
struct vconn *vconn;
int retval;
if (!of_msg) {
break;
}
+ if (p->fail_open) {
+ fail_open_maybe_recover(p->fail_open);
+ }
handle_openflow(ofconn, p, of_msg);
ofpbuf_delete(of_msg);
}
if (opo->buffer_id != htonl(UINT32_MAX)) {
error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
&buffer, &in_port);
- if (error) {
+ if (error || !buffer) {
return error;
}
payload = *buffer;
rule_execute(p, rule, &payload, &flow);
rule_reinstall(p, rule);
- ofpbuf_delete(packet);
+
+ if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY
+ && rconn_is_connected(p->controller->rconn)) {
+ /*
+ * Extra-special case for fail-open mode.
+ *
+ * We are in fail-open mode and the packet matched the fail-open rule,
+ * but we are connected to a controller too. We should send the packet
+ * up to the controller in the hope that it will try to set up a flow
+ * and thereby allow us to exit fail-open.
+ *
+ * See the top-level comment in fail-open.c for more information.
+ */
+ pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+ } else {
+ ofpbuf_delete(packet);
+ }
}
\f
static void
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (ofconn->send_flow_exp && rconn_is_connected(ofconn->rconn)) {
if (prev) {
- queue_tx(ofpbuf_clone(buf), prev, ofconn->reply_counter);
+ queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
} else {
buf = compose_flow_exp(rule, now, reason);
}
}
}
if (prev) {
- queue_tx(buf, prev, ofconn->reply_counter);
+ queue_tx(buf, prev, prev->reply_counter);
}
}
do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id,
const struct ofpbuf *packet, int send_len)
{
- struct ofp_packet_in *opi;
- struct ofpbuf payload, *buf;
- struct odp_msg *msg;
+ struct odp_msg *msg = packet->data;
+ struct ofpbuf payload;
+ struct ofpbuf *opi;
+ uint8_t reason;
- msg = packet->data;
+ /* Extract packet payload from 'msg'. */
payload.data = msg + 1;
payload.size = msg->length - sizeof *msg;
- send_len = MIN(send_len, payload.size);
- buf = ofpbuf_new(sizeof *opi + send_len);
- opi = put_openflow_xid(offsetof(struct ofp_packet_in, data),
- OFPT_PACKET_IN, 0, buf);
- opi->buffer_id = htonl(buffer_id);
- opi->total_len = htons(payload.size);
- opi->in_port = htons(odp_port_to_ofp_port(msg->port));
- opi->reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH;
- ofpbuf_put(buf, payload.data, MIN(send_len, payload.size));
- update_openflow_length(buf);
- rconn_send_with_limit(ofconn->rconn, buf, ofconn->packet_in_counter, 100);
+ /* Construct ofp_packet_in message. */
+ reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH;
+ opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port), reason,
+ &payload, send_len);
+
+ /* Send. */
+ rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
}
static void
send_packet_in_miss(struct ofpbuf *packet, void *p_)
{
struct ofproto *p = p_;
+ bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
struct ofconn *ofconn;
struct ofpbuf payload;
struct odp_msg *msg;
payload.size = msg->length - sizeof *msg;
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (ofconn->miss_send_len) {
- uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
- msg->port);
+ struct pktbuf *pb = ofconn->pktbuf;
+ uint32_t buffer_id = (in_fail_open
+ ? pktbuf_get_null()
+ : pktbuf_save(pb, &payload, msg->port));
int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
: UINT32_MAX);
do_send_packet_in(ofconn, buffer_id, packet, send_len);
struct pktbuf {
struct packet packets[PKTBUF_CNT];
unsigned int buffer_idx;
+ unsigned int null_idx;
};
int
}
}
+static unsigned int
+make_id(unsigned int buffer_idx, unsigned int cookie)
+{
+ return buffer_idx | (cookie << PKTBUF_BITS);
+}
+
+/* Attempts to allocate an OpenFlow packet buffer id within 'pb'. The packet
+ * buffer will store a copy of 'buffer' and the port number 'in_port', which
+ * should be the datapath port number on which 'buffer' was received.
+ *
+ * If successful, returns the packet buffer id (a number other than
+ * UINT32_MAX). pktbuf_retrieve() can later be used to retrieve the buffer and
+ * its input port number (buffers do expire after a time, so this is not
+ * guaranteed to be true forever). On failure, returns UINT32_MAX.
+ *
+ * The caller retains ownership of 'buffer'. */
uint32_t
pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
{
p->buffer = ofpbuf_clone(buffer);
p->timeout = time_msec() + OVERWRITE_MSECS;
p->in_port = in_port;
- return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+ return make_id(p - pb->packets, p->cookie);
+}
+
+/*
+ * Allocates and returns a "null" packet buffer id. The returned packet buffer
+ * id is considered valid by pktbuf_retrieve(), but it is not associated with
+ * actual buffered data.
+ *
+ * This function is always successful.
+ *
+ * This is useful in one special case: with the current OpenFlow design, the
+ * "fail-open" code cannot always know whether a connection to a controller is
+ * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request,
+ * but at that point the packet in question has already been forwarded (since
+ * we are still in "fail-open" mode). If the packet was buffered in the usual
+ * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate
+ * packet in the network. Null packet buffer ids identify such a packet that
+ * has already been forwarded, so that Open vSwitch can quietly ignore the
+ * request to re-send it. (After that happens, the switch exits fail-open
+ * mode.)
+ *
+ * See the top-level comment in fail-open.c for an overview.
+ */
+uint32_t
+pktbuf_get_null(void)
+{
+ return make_id(0, COOKIE_MAX);
}
+/* Attempts to retrieve a saved packet with the given 'id' from 'pb'. Returns
+ * 0 if successful, otherwise an OpenFlow error code constructed with
+ * ofp_mkerr().
+ *
+ * On success, ordinarily stores the buffered packet in '*bufferp' and the
+ * datapath port number on which the packet was received in '*in_port'. The
+ * caller becomes responsible for freeing the buffer. However, if 'id'
+ * identifies a "null" packet buffer (created with pktbuf_get_null()), stores
+ * NULL in '*bufferp' and -1 in '*in_port'.
+ *
+ * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */
int
pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port)
VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
}
- } else {
+ } else if (id >> PKTBUF_BITS != COOKIE_MAX) {
COVERAGE_INC(pktbuf_bad_cookie);
VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+ } else {
+ COVERAGE_INC(pktbuf_null_cookie);
+ VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal "
+ "if the switch was recently in fail-open mode)", id);
+ error = 0;
}
*bufferp = NULL;
*in_port = -1;
struct pktbuf *pktbuf_create(void);
void pktbuf_destroy(struct pktbuf *);
uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+uint32_t pktbuf_get_null(void);
int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port);
void pktbuf_discard(struct pktbuf *, uint32_t id);
.sp 1
The available \fItarget\fR options are:
.br
-[\fB-t\fR \fIpid\fR | \fB--target=\fIpid\fR]
+[\fB-t\fR \fIsocket\fR | \fB--target=\fIsocket\fR]
.sp 1
The available \fIaction\fR options are:
.br
This option is most useful for debugging. It reduces switching
performance, so it should not be used in production.
+.IP "\fB--mute\fR"
+Prevents ovs\-controller from replying to any OpenFlow messages sent
+to it by switches.
+.IP
+This option is only for debugging the Open vSwitch implementation of
+``fail open'' mode. It must not be used in production.
+
.so lib/daemon.man
.so lib/vlog.man
.so lib/common.man
/* --max-idle: Maximum idle time, in seconds, before flows expire. */
static int max_idle = 60;
+/* --mute: If true, accept connections from switches but do not reply to any
+ * of their messages (for debugging fail-open mode). */
+static bool mute = false;
+
static int do_switching(struct switch_ *);
static void new_switch(struct switch_ *, struct vconn *, const char *name);
static void parse_options(int argc, char *argv[]);
msg = rconn_recv(sw->rconn);
if (msg) {
- lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ if (!mute) {
+ lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ }
ofpbuf_delete(msg);
}
rconn_run(sw->rconn);
enum {
OPT_MAX_IDLE = UCHAR_MAX + 1,
OPT_PEER_CA_CERT,
+ OPT_MUTE,
VLOG_OPTION_ENUMS
};
static struct option long_options[] = {
{"hub", no_argument, 0, 'H'},
{"noflow", no_argument, 0, 'n'},
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
+ {"mute", no_argument, 0, OPT_MUTE},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
DAEMON_LONG_OPTIONS,
setup_flows = false;
break;
+ case OPT_MUTE:
+ mute = true;
+ break;
+
case OPT_MAX_IDLE:
if (!strcmp(optarg, "permanent")) {
max_idle = OFP_FLOW_PERMANENT;
-newkey $newkey -keyout private/cakey.pem -out careq.pem \
1>&3 2>&3
openssl ca -config ca.cnf -create_serial -out cacert.pem \
- -days 1095 -batch -keyfile private/cakey.pem -selfsign \
+ -days 2191 -batch -keyfile private/cakey.pem -selfsign \
-infiles careq.pem 1>&3 2>&3
chmod 0700 private/cakey.pem
VLOG_ERR("bridge %s: problem setting netflow collectors",
br->name);
}
+ svec_destroy(&nf_hosts);
/* Update the controller and related settings. It would be more
* straightforward to call this from bridge_reconfigure_one(), but we
}
iface->tag = tag_create_random();
}
+ port_update_bond_compat(port);
}
static void
ofpbuf_init(&packet, 128);
error = n_packets = n_errors = 0;
LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
- static const char s[] = "Open vSwitch Bond Failover";
union ofp_action actions[2], *a;
- struct eth_header *eth;
- struct llc_snap_header *llc_snap;
uint16_t dp_ifidx;
tag_type tags = 0;
flow_t flow;
continue;
}
- /* Compose packet to send. */
- ofpbuf_clear(&packet);
- eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
- llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
- ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
- ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
-
- memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
- memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
- eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
-
- llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
- llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
- llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
- memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
- llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
-
/* Compose actions. */
memset(actions, 0, sizeof actions);
a = actions;
/* Send packet. */
n_packets++;
+ compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
+ e->mac);
flow_extract(&packet, ODPP_NONE, &flow);
retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
&packet);
struct iface *iface = port->ifaces[i];
struct compat_bond_slave *slave = &bond.slaves[i];
slave->name = iface->name;
- slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) ||
- (!iface->enabled && iface->delay_expires != LLONG_MAX));
+
+ /* We need to make the same determination as the Linux bonding
+ * code to determine whether a slave should be consider "up".
+ * The Linux function bond_miimon_inspect() supports four
+ * BOND_LINK_* states:
+ *
+ * - BOND_LINK_UP: carrier detected, updelay has passed.
+ * - BOND_LINK_FAIL: carrier lost, downdelay in progress.
+ * - BOND_LINK_DOWN: carrier lost, downdelay has passed.
+ * - BOND_LINK_BACK: carrier detected, updelay in progress.
+ *
+ * The function bond_info_show_slave() only considers BOND_LINK_UP
+ * to be "up" and anything else to be "down".
+ */
+ slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
if (slave->up) {
bond.up = true;
}
{
cfg_del_entry("bridge.%s.port=%s", br_name, port_name);
cfg_del_match("bonding.*.slave=%s", port_name);
- cfg_del_match("vlan.%s.*", port_name);
+ cfg_del_match("vlan.%s.[!0-9]*", port_name);
}
static int
CAP_TAPDISK_LOGS = 'tapdisk-logs'
CAP_VNCTERM = 'vncterm'
CAP_VSWITCH_CONFIG = 'vswitch-config'
+CAP_VSWITCH_LOGS = 'vswitch-logs'
CAP_VSWITCH_STATUS = 'vswitch-status'
CAP_WLB = 'wlb'
CAP_X11_LOGS = 'X11'
cap(CAP_VNCTERM, PII_MAYBE, checked = False)
cap(CAP_VSWITCH_CONFIG, PII_YES,
min_size=0, max_size=20*MB)
+cap(CAP_VSWITCH_LOGS, PII_YES, max_size=20*MB)
cap(CAP_VSWITCH_STATUS, PII_YES, max_size=19*KB,
max_time=30)
cap(CAP_WLB, PII_NO, max_size=3*MB,
CAP_NETWORK_CONFIG, CAP_NETWORK_STATUS, CAP_PROCESS_LIST, CAP_HIGH_AVAILABILITY,
CAP_PAM, CAP_PERSISTENT_STATS, CAP_MULTIPATH,
CAP_SYSTEM_LOGS, CAP_SYSTEM_SERVICES, CAP_TAPDISK_LOGS,
- CAP_VNCTERM, CAP_VSWITCH_CONFIG, CAP_VSWITCH_STATUS, CAP_WLB,
+ CAP_VNCTERM, CAP_VSWITCH_CONFIG, CAP_VSWITCH_LOGS, CAP_VSWITCH_STATUS, CAP_WLB,
CAP_X11_LOGS, CAP_X11_AUTH, CAP_XAPI_DEBUG, CAP_XAPI_SUBPROCESS,
CAP_XENSERVER_CONFIG, CAP_XENSERVER_DOMAINS, CAP_XENSERVER_DATABASES,
CAP_XENSERVER_INSTALL, CAP_XENSERVER_LOGS, CAP_XEN_INFO, CAP_XHA_LIVESET, CAP_YUM]
file_output(CAP_VSWITCH_CONFIG, [OVS_VSWITCH_CONF])
+ file_output(CAP_VSWITCH_LOGS,
+ [ VAR_LOG_DIR + x for x in
+ [ 'ovs-brcompatd.log', 'ovs-vswitchd.log', 'vswitch-cfg-update.log', 'vswitch-xsplugin.log' ] +
+ [ f % n for n in range(1, 20) \
+ for f in ['ovs-brcompatd.log.%d', 'ovs-brcompatd.log.%d.gz',
+ 'ovs-vswitchd.log.%d', 'ovs-vswitchd.log.%d.gz']]])
+
cmd_output(CAP_VSWITCH_STATUS, [OVS_DPCTL, 'show'])
tree_output(CAP_VSWITCH_STATUS, VSWITCH_CORE_DIR)
for d in dp_list():