-/* Copyright (c) 2008, 2009, 2010 Nicira Networks
+/* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <sys/stat.h>
#include <time.h>
#include <fcntl.h>
#include "dirs.h"
#include "dynamic-string.h"
#include "fatal-signal.h"
+#include "json.h"
#include "leak-checker.h"
#include "netdev.h"
#include "netlink.h"
+#include "netlink-notifier.h"
+#include "netlink-socket.h"
#include "ofpbuf.h"
#include "openvswitch/brcompat-netlink.h"
-#include "ovsdb-idl.h"
#include "packets.h"
#include "poll-loop.h"
#include "process.h"
+#include "rtnetlink-link.h"
#include "signals.h"
+#include "sset.h"
#include "svec.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
-#include "vswitchd/vswitch-idl.h"
-
#include "vlog.h"
-#define THIS_MODULE VLM_brcompatd
+VLOG_DEFINE_THIS_MODULE(brcompatd);
/* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */
-/* Actions to modify bridge compatibility configuration. */
-enum bmc_action {
- BMC_ADD_DP,
- BMC_DEL_DP,
- BMC_ADD_PORT,
- BMC_DEL_PORT
-};
-
-static const char *parse_options(int argc, char *argv[]);
+static void parse_options(int argc, char *argv[]);
static void usage(void) NO_RETURN;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60);
-/* Maximum number of milliseconds to wait before pruning port entries that
- * no longer exist. If set to zero, ports are never pruned. */
-static int prune_timeout = 5000;
+/* --appctl: Absolute path to ovs-appctl. */
+static char *appctl_program;
-/* Shell command to execute (via popen()) to send a control command to the
- * running ovs-vswitchd process. The string must contain one instance of %s,
- * which is replaced by the control command. */
-static char *appctl_command;
+/* --vsctl: Absolute path to ovs-vsctl. */
+static char *vsctl_program;
-/* Netlink socket to listen for interface changes. */
-static struct nl_sock *rtnl_sock;
+/* Options that we should generally pass to ovs-vsctl. */
+#define VSCTL_OPTIONS "--timeout=5", "-vANY:console:WARN"
/* Netlink socket to bridge compatibility kernel module. */
static struct nl_sock *brc_sock;
[BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 }
};
-static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
-};
+static char *
+capture_vsctl_valist(const char *arg0, va_list args)
+{
+ char *stdout_log, *stderr_log;
+ enum vlog_level log_level;
+ struct svec argv;
+ int status;
+ char *msg;
+
+ /* Compose arguments. */
+ svec_init(&argv);
+ svec_add(&argv, arg0);
+ for (;;) {
+ const char *arg = va_arg(args, const char *);
+ if (!arg) {
+ break;
+ }
+ svec_add(&argv, arg);
+ }
+ svec_terminate(&argv);
+
+ /* Run process. */
+ if (process_run_capture(argv.names, &stdout_log, &stderr_log, SIZE_MAX,
+ &status)) {
+ svec_destroy(&argv);
+ return NULL;
+ }
+
+ /* Log results. */
+ if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
+ log_level = code == 0 ? VLL_DBG : code == 1 ? VLL_WARN : VLL_ERR;
+ } else {
+ log_level = VLL_ERR;
+ }
+ msg = process_status_msg(status);
+ VLOG(log_level, "ovs-vsctl exited (%s)", msg);
+ if (stdout_log && *stdout_log) {
+ VLOG(log_level, "ovs-vsctl wrote to stdout:\n%s\n", stdout_log);
+ }
+ if (stderr_log && *stderr_log) {
+ VLOG(log_level, "ovs-vsctl wrote to stderr:\n%s\n", stderr_log);
+ }
+ free(msg);
+
+ svec_destroy(&argv);
+
+ free(stderr_log);
+ if (WIFEXITED(status) && !WEXITSTATUS(status)) {
+ return stdout_log;
+ } else {
+ free(stdout_log);
+ return NULL;
+ }
+}
+
+static char * SENTINEL(0)
+capture_vsctl(const char *arg0, ...)
+{
+ char *stdout_log;
+ va_list args;
+
+ va_start(args, arg0);
+ stdout_log = capture_vsctl_valist(arg0, args);
+ va_end(args);
+
+ return stdout_log;
+}
+
+static bool SENTINEL(0)
+run_vsctl(const char *arg0, ...)
+{
+ char *stdout_log;
+ va_list args;
+ bool ok;
+
+ va_start(args, arg0);
+ stdout_log = capture_vsctl_valist(arg0, args);
+ va_end(args);
+
+ ok = stdout_log != NULL;
+ free(stdout_log);
+ return ok;
+}
static int
lookup_brc_multicast_group(int *multicast_group)
struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)];
int retval;
- retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock);
+ retval = nl_sock_create(NETLINK_GENERIC, &sock);
if (retval) {
return retval;
}
ofpbuf_init(&request, 0);
- nl_msg_put_genlmsghdr(&request, sock, 0, brc_family,
+ nl_msg_put_genlmsghdr(&request, 0, brc_family,
NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1);
retval = nl_sock_transact(sock, &request, &reply);
ofpbuf_uninit(&request);
return retval;
}
- retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock);
+ retval = nl_sock_create(NETLINK_GENERIC, sock);
if (retval) {
return retval;
}
- return 0;
+ retval = nl_sock_join_mcgroup(*sock, multicast_group);
+ if (retval) {
+ nl_sock_destroy(*sock);
+ *sock = NULL;
+ }
+ return retval;
}
static const struct nl_policy brc_dp_policy[] = {
[BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
};
-static struct ovsrec_bridge *
-find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name)
-{
- size_t i;
-
- for (i = 0; i < ovs->n_bridges; i++) {
- if (!strcmp(br_name, ovs->bridges[i]->name)) {
- return ovs->bridges[i];
- }
- }
-
- return NULL;
-}
-
-static int
-execute_appctl_command(const char *unixctl_command, char **output)
-{
- char *stdout_log, *stderr_log;
- int error, status;
- char *argv[5];
-
- argv[0] = "/bin/sh";
- argv[1] = "-c";
- argv[2] = xasprintf(appctl_command, unixctl_command);
- argv[3] = NULL;
-
- /* Run process and log status. */
- error = process_run_capture(argv, &stdout_log, &stderr_log, &status);
- if (error) {
- VLOG_ERR("failed to execute %s command via ovs-appctl: %s",
- unixctl_command, strerror(error));
- } else if (status) {
- char *msg = process_status_msg(status);
- VLOG_ERR("ovs-appctl exited with error (%s)", msg);
- free(msg);
- error = ECHILD;
- }
-
- /* Deal with stdout_log. */
- if (output) {
- *output = stdout_log;
- } else {
- free(stdout_log);
- }
-
- /* Deal with stderr_log */
- if (stderr_log && *stderr_log) {
- VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log);
- }
- free(stderr_log);
-
- free(argv[2]);
-
- return error;
-}
-
-static void
-do_get_bridge_parts(const struct ovsrec_bridge *br, struct svec *parts,
- int vlan, bool break_down_bonds)
-{
- struct svec ports;
- size_t i, j;
-
- svec_init(&ports);
- for (i = 0; i < br->n_ports; i++) {
- const struct ovsrec_port *port = br->ports[i];
-
- svec_add(&ports, port->name);
- if (vlan >= 0) {
- int port_vlan = port->n_tag ? *port->tag : 0;
- if (vlan != port_vlan) {
- continue;
- }
- }
- if (break_down_bonds) {
- for (j = 0; j < port->n_interfaces; j++) {
- const struct ovsrec_interface *iface = port->interfaces[j];
- svec_add(parts, iface->name);
- }
- } else {
- svec_add(parts, port->name);
- }
- }
- svec_destroy(&ports);
-}
-
-/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces
- * down into their constituent parts.
- *
- * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0,
- * then only interfaces for trunk ports or ports with implicit VLAN 0 are
- * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are
- * reported. */
-static void
-get_bridge_ifaces(const struct ovsrec_bridge *br, struct svec *ifaces,
- int vlan)
-{
- do_get_bridge_parts(br, ifaces, vlan, true);
-}
-
-/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under
- * the bond name, not broken down into their constituent interfaces.
- *
- * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then
- * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0,
- * only port with implicit VLAN 'vlan' are reported. */
-static void
-get_bridge_ports(const struct ovsrec_bridge *br, struct svec *ports,
- int vlan)
-{
- do_get_bridge_parts(br, ports, vlan, false);
-}
-
-#if 0
-/* Go through the configuration file and remove any ports that no longer
- * exist associated with a bridge. */
-static void
-prune_ports(void)
-{
- int i, j;
- struct svec bridges, delete;
-
- if (cfg_lock(NULL, 0)) {
- /* Couldn't lock config file. */
- return;
- }
-
- svec_init(&bridges);
- svec_init(&delete);
- cfg_get_subsections(&bridges, "bridge");
- for (i=0; i<bridges.n; i++) {
- const char *br_name = bridges.names[i];
- struct svec ifaces;
-
- /* Check that each bridge interface exists. */
- svec_init(&ifaces);
- get_bridge_ifaces(br_name, &ifaces, -1);
- for (j = 0; j < ifaces.n; j++) {
- const char *iface_name = ifaces.names[j];
-
- /* The local port and internal ports are created and destroyed by
- * ovs-vswitchd itself, so don't bother checking for them at all.
- * In practice, they might not exist if ovs-vswitchd hasn't
- * finished reloading since the configuration file was updated. */
- if (!strcmp(iface_name, br_name)
- || cfg_get_bool(0, "iface.%s.internal", iface_name)) {
- continue;
- }
-
- if (!netdev_exists(iface_name)) {
- VLOG_INFO_RL(&rl, "removing dead interface %s from %s",
- iface_name, br_name);
- svec_add(&delete, iface_name);
- }
- }
- svec_destroy(&ifaces);
- }
- svec_destroy(&bridges);
-
- if (delete.n) {
- size_t i;
-
- for (i = 0; i < delete.n; i++) {
- cfg_del_match("bridge.*.port=%s", delete.names[i]);
- cfg_del_match("bonding.*.slave=%s", delete.names[i]);
- }
- reload_config();
- cfg_unlock();
- } else {
- cfg_unlock();
- }
- svec_destroy(&delete);
-}
-#endif
-
-static struct ovsdb_idl_txn *
-txn_from_openvswitch(const struct ovsrec_open_vswitch *ovs)
-{
- return ovsdb_idl_txn_get(&ovs->header_);
-}
-
-static bool
-port_is_fake_bridge(const struct ovsrec_port *port)
-{
- return (port->fake_bridge
- && port->tag
- && *port->tag >= 1 && *port->tag <= 4095);
-}
-
-static void
-ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs,
- struct ovsrec_bridge *bridge)
-{
- struct ovsrec_bridge **bridges;
- size_t i;
-
- bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1));
- for (i = 0; i < ovs->n_bridges; i++) {
- bridges[i] = ovs->bridges[i];
- }
- bridges[ovs->n_bridges] = bridge;
- ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1);
- free(bridges);
-}
-
-static int
-add_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name)
-{
- struct ovsrec_bridge *br;
- struct ovsrec_port *port;
- struct ovsrec_interface *iface;
-
- if (find_bridge(ovs, br_name)) {
- VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name);
- return EEXIST;
- } else if (netdev_exists(br_name)) {
- size_t i;
-
- for (i = 0; i < ovs->n_bridges; i++) {
- size_t j;
- struct ovsrec_bridge *br_cfg = ovs->bridges[i];
-
- for (j = 0; j < br_cfg->n_ports; j++) {
- if (port_is_fake_bridge(br_cfg->ports[j])) {
- VLOG_WARN("addbr %s: %s exists as a fake bridge",
- br_name, br_name);
- return 0;
- }
- }
- }
-
- VLOG_WARN("addbr %s: cannot create bridge %s because a network "
- "device named %s already exists",
- br_name, br_name, br_name);
- return EEXIST;
- }
-
- iface = ovsrec_interface_insert(txn_from_openvswitch(ovs));
- ovsrec_interface_set_name(iface, br_name);
-
- port = ovsrec_port_insert(txn_from_openvswitch(ovs));
- ovsrec_port_set_name(port, br_name);
- ovsrec_port_set_interfaces(port, &iface, 1);
-
- br = ovsrec_bridge_insert(txn_from_openvswitch(ovs));
- ovsrec_bridge_set_name(br, br_name);
- ovsrec_bridge_set_ports(br, &port, 1);
-
- ovs_insert_bridge(ovs, br);
-
- VLOG_INFO("addbr %s: success", br_name);
-
- return 0;
-}
-
-static void
-add_port(const struct ovsrec_open_vswitch *ovs,
- const struct ovsrec_bridge *br, const char *port_name)
-{
- struct ovsrec_interface *iface;
- struct ovsrec_port *port;
- struct ovsrec_port **ports;
- size_t i;
-
- /* xxx Check conflicts? */
- iface = ovsrec_interface_insert(txn_from_openvswitch(ovs));
- ovsrec_interface_set_name(iface, port_name);
-
- port = ovsrec_port_insert(txn_from_openvswitch(ovs));
- ovsrec_port_set_name(port, port_name);
- ovsrec_port_set_interfaces(port, &iface, 1);
-
- ports = xmalloc(sizeof *br->ports * (br->n_ports + 1));
- for (i = 0; i < br->n_ports; i++) {
- ports[i] = br->ports[i];
- }
- ports[br->n_ports] = port;
- ovsrec_bridge_set_ports(br, ports, br->n_ports + 1);
- free(ports);
-}
-
-static void
-del_port(const struct ovsrec_bridge *br, const char *port_name)
-{
- size_t i, j;
- struct ovsrec_port *port_rec = NULL;
-
- for (i = 0; i < br->n_ports; i++) {
- struct ovsrec_port *port = br->ports[i];
- if (!strcmp(port_name, port->name)) {
- port_rec = port;
- }
- for (j = 0; j < port->n_interfaces; j++) {
- struct ovsrec_interface *iface = port->interfaces[j];
- if (!strcmp(port_name, iface->name)) {
- ovsrec_interface_delete(iface);
- }
- }
- }
-
- /* xxx Probably can move this into the "for" loop. */
- if (port_rec) {
- struct ovsrec_port **ports;
- size_t n;
-
- ports = xmalloc(sizeof *br->ports * br->n_ports);
- for (i = n = 0; i < br->n_ports; i++) {
- if (br->ports[i] != port_rec) {
- ports[n++] = br->ports[i];
- }
- }
- ovsrec_bridge_set_ports(br, ports, n);
- free(ports);
- }
-}
-
-static int
-del_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name)
-{
- struct ovsrec_bridge *br = find_bridge(ovs, br_name);
- struct ovsrec_bridge **bridges;
- size_t i, n;
-
- if (!br) {
- VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name);
- return ENXIO;
- }
-
- del_port(br, br_name);
-
- ovsrec_bridge_delete(br);
-
- bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges);
- for (i = n = 0; i < ovs->n_bridges; i++) {
- if (ovs->bridges[i] != br) {
- bridges[n++] = ovs->bridges[i];
- }
- }
- ovsrec_open_vswitch_set_bridges(ovs, bridges, n);
- free(bridges);
-
- VLOG_INFO("delbr %s: success", br_name);
-
- return 0;
-}
-
static int
parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name,
const char **port_name, uint64_t *count, uint64_t *skip)
compose_reply(uint32_t seq, int error)
{
struct ofpbuf *reply = ofpbuf_new(4096);
- nl_msg_put_genlmsghdr(reply, brc_sock, 32, brc_family, NLM_F_REQUEST,
+ nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST,
BRC_GENL_C_DP_RESULT, 1);
((struct nlmsghdr *) reply->data)->nlmsg_seq = seq;
nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error);
}
static int
-handle_bridge_cmd(const struct ovsrec_open_vswitch *ovs,
- struct ofpbuf *buffer, bool add)
+handle_bridge_cmd(struct ofpbuf *buffer, bool add)
{
const char *br_name;
uint32_t seq;
error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL);
if (!error) {
- error = add ? add_bridge(ovs, br_name) : del_bridge(ovs, br_name);
+ const char *vsctl_cmd = add ? "add-br" : "del-br";
+ const char *brctl_cmd = add ? "addbr" : "delbr";
+ if (!run_vsctl(vsctl_program, VSCTL_OPTIONS,
+ "--", vsctl_cmd, br_name,
+ "--", "comment", "ovs-brcompatd:", brctl_cmd, br_name,
+ (char *) NULL)) {
+ error = add ? EEXIST : ENXIO;
+ }
send_simple_reply(seq, error);
}
return error;
};
static int
-handle_port_cmd(const struct ovsrec_open_vswitch *ovs,
- struct ofpbuf *buffer, bool add)
+handle_port_cmd(struct ofpbuf *buffer, bool add)
{
- const char *cmd_name = add ? "add-if" : "del-if";
const char *br_name, *port_name;
uint32_t seq;
int error;
error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL);
if (!error) {
- struct ovsrec_bridge *br = find_bridge(ovs, br_name);
-
- if (!br) {
- VLOG_WARN("%s %s %s: no bridge named %s",
- cmd_name, br_name, port_name, br_name);
+ const char *vsctl_cmd = add ? "add-port" : "del-port";
+ const char *brctl_cmd = add ? "addif" : "delif";
+ if (!run_vsctl(vsctl_program, VSCTL_OPTIONS,
+ "--", vsctl_cmd, br_name, port_name,
+ "--", "comment", "ovs-brcompatd:", brctl_cmd,
+ br_name, port_name, (char *) NULL)) {
error = EINVAL;
- } else if (!netdev_exists(port_name)) {
- VLOG_WARN("%s %s %s: no network device named %s",
- cmd_name, br_name, port_name, port_name);
- error = EINVAL;
- } else {
- if (add) {
- add_port(ovs, br, port_name);
- } else {
- del_port(br, port_name);
- }
- VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name);
}
send_simple_reply(seq, error);
}
-
return error;
}
-/* The caller is responsible for freeing '*ovs_name' if the call is
- * successful. */
-static int
-linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs,
- const char *linux_name,
- const struct ovsrec_bridge **ovs_bridge,
- int *br_vlan)
+static char *
+linux_bridge_to_ovs_bridge(const char *linux_name, int *br_vlanp)
{
- *ovs_bridge = find_bridge(ovs, linux_name);
- if (*ovs_bridge) {
- /* Bridge name is the same. We are interested in VLAN 0. */
- *br_vlan = 0;
- return 0;
- } else {
- /* No such Open vSwitch bridge 'linux_name', but there might be an
- * internal port named 'linux_name' on some other bridge
- * 'ovs_bridge'. If so then we are interested in the VLAN assigned to
- * port 'linux_name' on the bridge named 'ovs_bridge'. */
- size_t i, j;
-
- for (i = 0; i < ovs->n_bridges; i++) {
- const struct ovsrec_bridge *br = ovs->bridges[i];
-
- for (j = 0; j < br->n_ports; j++) {
- const struct ovsrec_port *port = br->ports[j];
-
- if (!strcmp(port->name, linux_name)) {
- *ovs_bridge = br;
- *br_vlan = port->n_tag ? *port->tag : -1;
- return 0;
- }
- }
+ char *save_ptr = NULL;
+ const char *br_name, *br_vlan;
+ char *br_name_copy;
+ char *output;
- }
- return ENODEV;
+ output = capture_vsctl(vsctl_program, VSCTL_OPTIONS,
+ "--", "br-to-parent", linux_name,
+ "--", "br-to-vlan", linux_name,
+ (char *) NULL);
+ if (!output) {
+ return NULL;
+ }
+
+ br_name = strtok_r(output, " \t\r\n", &save_ptr);
+ br_vlan = strtok_r(NULL, " \t\r\n", &save_ptr);
+ if (!br_name || !br_vlan) {
+ free(output);
+ return NULL;
+ }
+ br_name_copy = xstrdup(br_name);
+ *br_vlanp = atoi(br_vlan);
+
+ free(output);
+
+ return br_name_copy;
+}
+
+static void
+get_bridge_ifaces(const char *br_name, struct sset *ifaces)
+{
+ char *save_ptr = NULL;
+ char *output;
+ char *iface;
+
+ output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ifaces",
+ br_name, (char *) NULL);
+ if (!output) {
+ return;
+ }
+
+ for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface;
+ iface = strtok_r(NULL, " \t\r\n", &save_ptr)) {
+ sset_add(ifaces, iface);
}
+ free(output);
}
static int
-handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs,
- struct ofpbuf *buffer)
+handle_fdb_query_cmd(struct ofpbuf *buffer)
{
/* This structure is copied directly from the Linux 2.6.30 header files.
* It would be more straightforward to #include <linux/if_bridge.h>, but
* pretend that the former is the case even though the latter is the
* implementation. */
const char *linux_name; /* Name used by brctl. */
- const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */
int br_vlan; /* VLAN tag. */
- struct svec ifaces;
+ struct sset ifaces;
struct ofpbuf query_data;
+ const char *iface_name;
struct ofpbuf *reply;
- char *unixctl_command;
uint64_t count, skip;
+ char *br_name;
char *output;
char *save_ptr;
uint32_t seq;
int error;
- /* Parse the command received from brcompat_mod. */
+ /* Parse the command received from brcompat. */
error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip);
if (error) {
return error;
}
/* Figure out vswitchd bridge and VLAN. */
- error = linux_bridge_to_ovs_bridge(ovs, linux_name,
- &ovs_bridge, &br_vlan);
- if (error) {
+ br_name = linux_bridge_to_ovs_bridge(linux_name, &br_vlan);
+ if (!br_name) {
+ error = EINVAL;
send_simple_reply(seq, error);
return error;
}
/* Fetch the forwarding database using ovs-appctl. */
- unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name);
- error = execute_appctl_command(unixctl_command, &output);
- free(unixctl_command);
- if (error) {
+ output = capture_vsctl(appctl_program, "fdb/show", br_name,
+ (char *) NULL);
+ if (!output) {
+ error = ECHILD;
send_simple_reply(seq, error);
return error;
}
/* Fetch the MAC address for each interface on the bridge, so that we can
* fill in the is_local field in the response. */
- svec_init(&ifaces);
- get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan);
- local_macs = xmalloc(ifaces.n * sizeof *local_macs);
+ sset_init(&ifaces);
+ get_bridge_ifaces(linux_name, &ifaces);
+ local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs);
n_local_macs = 0;
- for (i = 0; i < ifaces.n; i++) {
- const char *iface_name = ifaces.names[i];
+ SSET_FOR_EACH (iface_name, &ifaces) {
struct mac *mac = &local_macs[n_local_macs];
struct netdev *netdev;
- error = netdev_open_default(iface_name, &netdev);
- if (netdev) {
+ error = netdev_open(iface_name, "system", &netdev);
+ if (!error) {
if (!netdev_get_etheraddr(netdev, mac->addr)) {
n_local_macs++;
}
netdev_close(netdev);
}
}
- svec_destroy(&ifaces);
+ sset_destroy(&ifaces);
/* Parse the response from ovs-appctl and convert it to binary format to
* pass back to the kernel. */
if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d",
&port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age)
!= 2 + ETH_ADDR_SCAN_COUNT + 1) {
- struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line);
continue;
}
/* Free memory. */
ofpbuf_uninit(&query_data);
+ free(local_macs);
return 0;
}
static void
-send_ifindex_reply(uint32_t seq, struct svec *ifaces)
+send_ifindex_reply(uint32_t seq, char *output)
{
+ size_t allocated_indices;
+ char *save_ptr = NULL;
struct ofpbuf *reply;
const char *iface;
size_t n_indices;
int *indices;
- size_t i;
- /* Make sure that any given interface only occurs once. This shouldn't
- * happen, but who knows what people put into their configuration files. */
- svec_sort_unique(ifaces);
+ indices = NULL;
+ n_indices = allocated_indices = 0;
+ for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface;
+ iface = strtok_r(NULL, " \t\r\n", &save_ptr)) {
+ int ifindex;
+
+ if (n_indices >= allocated_indices) {
+ indices = x2nrealloc(indices, &allocated_indices, sizeof *indices);
+ }
- /* Convert 'ifaces' into ifindexes. */
- n_indices = 0;
- indices = xmalloc(ifaces->n * sizeof *indices);
- SVEC_FOR_EACH (i, iface, ifaces) {
- int ifindex = if_nametoindex(iface);
+ ifindex = if_nametoindex(iface);
if (ifindex) {
indices[n_indices++] = ifindex;
}
}
static int
-handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs,
- struct ofpbuf *buffer)
+handle_get_bridges_cmd(struct ofpbuf *buffer)
{
- struct svec bridges;
- size_t i, j;
-
+ char *output;
uint32_t seq;
-
int error;
/* Parse Netlink command.
return error;
}
- /* Get all the real bridges and all the fake ones. */
- svec_init(&bridges);
- for (i = 0; i < ovs->n_bridges; i++) {
- const struct ovsrec_bridge *br = ovs->bridges[i];
-
- svec_add(&bridges, br->name);
- for (j = 0; j < br->n_ports; j++) {
- const struct ovsrec_port *port = br->ports[j];
-
- if (port->fake_bridge) {
- svec_add(&bridges, port->name);
- }
- }
+ output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-br", (char *) NULL);
+ if (!output) {
+ return ENODEV;
}
- send_ifindex_reply(seq, &bridges);
- svec_destroy(&bridges);
-
+ send_ifindex_reply(seq, output);
+ free(output);
return 0;
}
static int
-handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs,
- struct ofpbuf *buffer)
+handle_get_ports_cmd(struct ofpbuf *buffer)
{
- uint32_t seq;
-
const char *linux_name;
- const struct ovsrec_bridge *ovs_bridge;
- int br_vlan;
-
- struct svec ports;
-
+ uint32_t seq;
+ char *output;
int error;
/* Parse Netlink command. */
return error;
}
- error = linux_bridge_to_ovs_bridge(ovs, linux_name,
- &ovs_bridge, &br_vlan);
- if (error) {
- send_simple_reply(seq, error);
- return error;
+ output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ports", linux_name,
+ (char *) NULL);
+ if (!output) {
+ return ENODEV;
}
- svec_init(&ports);
- get_bridge_ports(ovs_bridge, &ports, br_vlan);
- svec_sort(&ports);
- svec_del(&ports, linux_name);
- send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */
- svec_destroy(&ports);
-
+ send_ifindex_reply(seq, output);
+ free(output);
return 0;
}
-static void
-brc_recv_update(const struct ovsrec_open_vswitch *ovs)
+static bool
+brc_recv_update__(struct ofpbuf *buffer)
{
- int retval;
- struct ofpbuf *buffer;
- struct genlmsghdr *genlmsghdr;
+ for (;;) {
+ int retval = nl_sock_recv(brc_sock, buffer, false);
+ switch (retval) {
+ case 0:
+ if (nl_msg_nlmsgerr(buffer, NULL)
+ || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) {
+ break;
+ }
+ return true;
+ case ENOBUFS:
+ break;
- buffer = NULL;
- do {
- ofpbuf_delete(buffer);
- retval = nl_sock_recv(brc_sock, &buffer, false);
- } while (retval == ENOBUFS
- || (!retval
- && (nl_msg_nlmsgerr(buffer, NULL)
- || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE)));
- if (retval) {
- if (retval != EAGAIN) {
+ case EAGAIN:
+ return false;
+
+ default:
VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval));
+ return false;
}
- return;
}
+}
- genlmsghdr = nl_msg_genlmsghdr(buffer);
+static void
+brc_recv_update(void)
+{
+ struct genlmsghdr *genlmsghdr;
+ uint64_t buffer_stub[1024 / 8];
+ struct ofpbuf buffer;
+
+ ofpbuf_use_stub(&buffer, buffer_stub, sizeof buffer_stub);
+ if (!brc_recv_update__(&buffer)) {
+ goto error;
+ }
+
+ genlmsghdr = nl_msg_genlmsghdr(&buffer);
if (!genlmsghdr) {
VLOG_WARN_RL(&rl, "received packet too short for generic NetLink");
goto error;
}
- if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) {
+ if (nl_msg_nlmsghdr(&buffer)->nlmsg_type != brc_family) {
VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)",
- nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family);
+ nl_msg_nlmsghdr(&buffer)->nlmsg_type, brc_family);
goto error;
}
- /* Just drop the request on the floor if a valid configuration
- * doesn't exist. We don't immediately do this check, because we
- * want to drain pending netlink messages. */
- if (!ovs) {
- VLOG_WARN_RL(&rl, "could not find valid configuration to update");
- goto error;
- }
+ /* Service all pending network device notifications before executing the
+ * command. This is very important to avoid a race in a scenario like the
+ * following, which is what happens with XenServer Tools version 5.0.0
+ * during boot of a Windows VM:
+ *
+ * 1. Create tap1.0 and vif1.0.
+ * 2. Delete tap1.0.
+ * 3. Delete vif1.0.
+ * 4. Re-create vif1.0.
+ *
+ * We must process the network device notification from step 3 before we
+ * process the brctl command from step 4. If we process them in the
+ * reverse order, then step 4 completes as a no-op but step 3 then deletes
+ * the port that was just added.
+ *
+ * (XenServer Tools 5.5.0 does not exhibit this behavior, and neither does
+ * a VM without Tools installed at all.)
+ */
+ rtnetlink_link_run();
switch (genlmsghdr->cmd) {
case BRC_GENL_C_DP_ADD:
- handle_bridge_cmd(ovs, buffer, true);
+ handle_bridge_cmd(&buffer, true);
break;
case BRC_GENL_C_DP_DEL:
- handle_bridge_cmd(ovs, buffer, false);
+ handle_bridge_cmd(&buffer, false);
break;
case BRC_GENL_C_PORT_ADD:
- handle_port_cmd(ovs, buffer, true);
+ handle_port_cmd(&buffer, true);
break;
case BRC_GENL_C_PORT_DEL:
- handle_port_cmd(ovs, buffer, false);
+ handle_port_cmd(&buffer, false);
break;
case BRC_GENL_C_FDB_QUERY:
- handle_fdb_query_cmd(ovs, buffer);
+ handle_fdb_query_cmd(&buffer);
break;
case BRC_GENL_C_GET_BRIDGES:
- handle_get_bridges_cmd(ovs, buffer);
+ handle_get_bridges_cmd(&buffer);
break;
case BRC_GENL_C_GET_PORTS:
- handle_get_ports_cmd(ovs, buffer);
+ handle_get_ports_cmd(&buffer);
break;
default:
VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n",
- genlmsghdr->cmd);
+ genlmsghdr->cmd);
break;
}
error:
- ofpbuf_delete(buffer);
- return;
+ ofpbuf_uninit(&buffer);
}
-/* Check for interface configuration changes announced through RTNL. */
static void
-rtnl_recv_update(const struct ovsrec_open_vswitch *ovs)
+netdev_changed_cb(const struct rtnetlink_link_change *change,
+ void *aux OVS_UNUSED)
{
- struct ofpbuf *buf;
+ char br_name[IFNAMSIZ];
+ const char *port_name;
- int error = nl_sock_recv(rtnl_sock, &buf, false);
- if (error == EAGAIN) {
- /* Nothing to do. */
- } else if (error == ENOBUFS) {
+ if (!change) {
VLOG_WARN_RL(&rl, "network monitor socket overflowed");
- } else if (error) {
- VLOG_WARN_RL(&rl, "error on network monitor socket: %s",
- strerror(error));
- } else {
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- struct nlmsghdr *nlh;
- struct ifinfomsg *iim;
-
- nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
- iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim);
- if (!iim) {
- VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)");
- ofpbuf_delete(buf);
- return;
- }
-
- if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- VLOG_WARN_RL(&rl,"received bad rtnl message (policy)");
- ofpbuf_delete(buf);
- return;
- }
- if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) {
- const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]);
- char br_name[IFNAMSIZ];
- uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]);
-
- if (!if_indextoname(br_idx, br_name)) {
- ofpbuf_delete(buf);
- return;
- }
+ return;
+ }
- if (!netdev_exists(port_name)) {
- /* Network device is really gone. */
- struct ovsrec_bridge *br = find_bridge(ovs, br_name);
-
- VLOG_INFO("network device %s destroyed, "
- "removing from bridge %s", port_name, br_name);
-
- if (!br) {
- VLOG_WARN("no bridge named %s from which to remove %s",
- br_name, port_name);
- ofpbuf_delete(buf);
- return;
- }
-
- del_port(br, port_name);
- } else {
- /* A network device by that name exists even though the kernel
- * told us it had disappeared. Probably, what happened was
- * this:
- *
- * 1. Device destroyed.
- * 2. Notification sent to us.
- * 3. New device created with same name as old one.
- * 4. ovs-brcompatd notified, removes device from bridge.
- *
- * There's no a priori reason that in this situation that the
- * new device with the same name should remain in the bridge;
- * on the contrary, that would be unexpected. *But* there is
- * one important situation where, if we do this, bad things
- * happen. This is the case of XenServer Tools version 5.0.0,
- * which on boot of a Windows VM cause something like this to
- * happen on the Xen host:
- *
- * i. Create tap1.0 and vif1.0.
- * ii. Delete tap1.0.
- * iii. Delete vif1.0.
- * iv. Re-create vif1.0.
- *
- * (XenServer Tools 5.5.0 does not exhibit this behavior, and
- * neither does a VM without Tools installed at all.@.)
- *
- * Steps iii and iv happen within a few seconds of each other.
- * Step iv causes /etc/xensource/scripts/vif to run, which in
- * turn calls ovs-cfg-mod to add the new device to the bridge.
- * If step iv happens after step 4 (in our first list of
- * steps), then all is well, but if it happens between 3 and 4
- * (which can easily happen if ovs-brcompatd has to wait to
- * lock the configuration file), then we will remove the new
- * incarnation from the bridge instead of the old one!
- *
- * So, to avoid this problem, we do nothing here. This is
- * strictly incorrect except for this one particular case, and
- * perhaps that will bite us someday. If that happens, then we
- * will have to somehow track network devices by ifindex, since
- * a new device will have a new ifindex even if it has the same
- * name as an old device.
- */
- VLOG_INFO("kernel reported network device %s removed but "
- "a device by that name exists (XS Tools 5.0.0?)",
- port_name);
- }
- }
- ofpbuf_delete(buf);
+ if (change->nlmsg_type != RTM_DELLINK || !change->master_ifindex) {
+ return;
+ }
+
+ port_name = change->ifname;
+ if (!if_indextoname(change->master_ifindex, br_name)) {
+ return;
}
+
+ VLOG_INFO("network device %s destroyed, removing from bridge %s",
+ port_name, br_name);
+
+ run_vsctl(vsctl_program, VSCTL_OPTIONS,
+ "--", "--if-exists", "del-port", port_name,
+ "--", "comment", "ovs-brcompatd:", port_name, "disappeared",
+ (char *) NULL);
}
int
main(int argc, char *argv[])
{
+ extern struct vlog_module VLM_reconnect;
+ struct nln_notifier *link_notifier;
struct unixctl_server *unixctl;
- const char *remote;
- struct ovsdb_idl *idl;
int retval;
proctitle_init(argc, argv);
set_program_name(argv[0]);
- time_init();
- vlog_init();
- vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_WARN);
- vlog_set_levels(VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN);
+ vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN);
- remote = parse_options(argc, argv);
+ parse_options(argc, argv);
signal(SIGPIPE, SIG_IGN);
process_init();
- ovsrec_init();
- die_if_already_running();
daemonize_start();
retval = unixctl_server_create(NULL, &unixctl);
}
if (brc_open(&brc_sock)) {
- ovs_fatal(0, "could not open brcompat socket. Check "
- "\"brcompat\" kernel module.");
+ VLOG_FATAL("could not open brcompat socket. Check "
+ "\"brcompat\" kernel module.");
}
- if (prune_timeout) {
- if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) {
- ovs_fatal(0, "could not create rtnetlink socket");
- }
- }
+ link_notifier = rtnetlink_link_notifier_create(netdev_changed_cb, NULL);
daemonize_complete();
- idl = ovsdb_idl_create(remote, &ovsrec_idl_class);
-
for (;;) {
- const struct ovsrec_open_vswitch *ovs;
- struct ovsdb_idl_txn *txn;
- enum ovsdb_idl_txn_status status;
-
- ovsdb_idl_run(idl);
-
- txn = ovsdb_idl_txn_create(idl);
-
unixctl_server_run(unixctl);
- ovs = ovsrec_open_vswitch_first(idl);
- brc_recv_update(ovs);
+ rtnetlink_link_run();
+ brc_recv_update();
- if (!ovs && ovsdb_idl_has_ever_connected(idl)) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
- VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch "
- "configuration", remote);
- }
netdev_run();
- /* If 'prune_timeout' is non-zero, we actively prune from the
- * configuration of port entries that are no longer valid. We
- * use two methods:
- *
- * 1) The kernel explicitly notifies us of removed ports
- * through the RTNL messages.
- *
- * 2) We periodically check all ports associated with bridges
- * to see if they no longer exist.
- */
- if (ovs && prune_timeout) {
- rtnl_recv_update(ovs);
-#if 0
- prune_ports();
-#endif
-
- nl_sock_wait(rtnl_sock, POLLIN);
- poll_timer_wait(prune_timeout);
- }
-
- while ((status = ovsdb_idl_txn_commit(txn)) == TXN_INCOMPLETE) {
- ovsdb_idl_run(idl);
- ovsdb_idl_wait(idl);
- ovsdb_idl_txn_wait(txn);
- poll_block();
- }
-
- switch (status) {
- case TXN_INCOMPLETE:
- NOT_REACHED();
-
- case TXN_ABORTED:
- /* Should not happen--we never call ovsdb_idl_txn_abort(). */
- ovs_fatal(0, "transaction aborted");
-
- case TXN_SUCCESS:
- case TXN_UNCHANGED:
- break;
-
- case TXN_TRY_AGAIN:
- /* xxx Handle this better! */
- VLOG_ERR("OVSDB transaction needs retry");
- break;
-
- case TXN_ERROR:
- /* xxx Handle this better! */
- VLOG_ERR("OVSDB transaction failed: %s",
- ovsdb_idl_txn_get_error(txn));
- break;
-
- default:
- NOT_REACHED();
- }
- ovsdb_idl_txn_destroy(txn);
-
nl_sock_wait(brc_sock, POLLIN);
- ovsdb_idl_wait(idl);
unixctl_server_wait(unixctl);
+ rtnetlink_link_wait();
netdev_wait();
poll_block();
}
- ovsdb_idl_destroy(idl);
+ rtnetlink_link_notifier_destroy(link_notifier);
return 0;
}
static void
-validate_appctl_command(void)
-{
- const char *p;
- int n;
-
- n = 0;
- for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) {
- if (p[1] == '%') {
- /* Nothing to do. */
- } else if (p[1] == 's') {
- n++;
- } else {
- ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command");
- }
- }
- if (n != 1) {
- ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command");
- }
-}
-
-static const char *
parse_options(int argc, char *argv[])
{
enum {
- OPT_PRUNE_TIMEOUT,
- OPT_APPCTL_COMMAND,
+ OPT_APPCTL,
+ OPT_VSCTL,
VLOG_OPTION_ENUMS,
- LEAK_CHECKER_OPTION_ENUMS
+ LEAK_CHECKER_OPTION_ENUMS,
+ DAEMON_OPTION_ENUMS
};
static struct option long_options[] = {
- {"help", no_argument, 0, 'h'},
- {"version", no_argument, 0, 'V'},
- {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT},
- {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ {"appctl", required_argument, NULL, OPT_APPCTL},
+ {"vsctl", required_argument, NULL, OPT_VSCTL},
DAEMON_LONG_OPTIONS,
VLOG_LONG_OPTIONS,
LEAK_CHECKER_LONG_OPTIONS,
- {0, 0, 0, 0},
+ {NULL, 0, NULL, 0},
};
char *short_options = long_options_to_short_options(long_options);
+ const char *appctl = "ovs-appctl";
+ const char *vsctl = "ovs-vsctl";
- appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir);
for (;;) {
int c;
}
switch (c) {
- case 'H':
case 'h':
usage();
case 'V':
- OVS_PRINT_VERSION(0, 0);
+ ovs_print_version(0, 0);
exit(EXIT_SUCCESS);
- case OPT_PRUNE_TIMEOUT:
- prune_timeout = atoi(optarg) * 1000;
+ case OPT_APPCTL:
+ appctl = optarg;
break;
- case OPT_APPCTL_COMMAND:
- appctl_command = optarg;
+ case OPT_VSCTL:
+ vsctl = optarg;
break;
VLOG_OPTION_HANDLERS
}
free(short_options);
- validate_appctl_command();
-
- argc -= optind;
- argv += optind;
+ appctl_program = process_search_path(appctl);
+ if (!appctl_program) {
+ VLOG_FATAL("%s: not found in $PATH (use --appctl to specify an "
+ "alternate location)", appctl);
+ }
- if (argc != 1) {
- ovs_fatal(0, "database socket is non-option argument; "
- "use --help for usage");
+ vsctl_program = process_search_path(vsctl);
+ if (!vsctl_program) {
+ VLOG_FATAL("%s: not found in $PATH (use --vsctl to specify an "
+ "alternate location)", vsctl);
}
- return argv[0];
+ if (argc != optind) {
+ VLOG_FATAL("no non-option arguments are supported; "
+ "use --help for usage");
+ }
}
static void
usage(void)
{
printf("%s: bridge compatibility front-end for ovs-vswitchd\n"
- "usage: %s [OPTIONS] CONFIG\n"
- "CONFIG is the configuration file used by ovs-vswitchd.\n",
+ "usage: %s [OPTIONS]\n",
program_name, program_name);
printf("\nConfiguration options:\n"
- " --appctl-command=COMMAND shell command to run ovs-appctl\n"
- " --prune-timeout=SECS wait at most SECS before pruning ports\n"
+ " --appctl=PROGRAM overrides $PATH for finding ovs-appctl\n"
+ " --vsctl=PROGRAM overrides $PATH for finding ovs-vsctl\n"
);
daemon_usage();
vlog_usage();
" -h, --help display this help message\n"
" -V, --version display version information\n");
leak_checker_usage();
- printf("\nThe default appctl command is:\n%s\n", appctl_command);
exit(EXIT_SUCCESS);
}