X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=41aabe48205c260f9d62ce60f3942cf3f5d23629;hb=refs%2Fheads%2Fbranch-1.5;hp=973b098b2f1e43303964d28f146e5be0f18f9450;hpb=e3c17733704fd54fa937ed5e1e8c4b51e00961f1;p=sliver-openvswitch.git diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 973b098b2..41aabe482 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -43,50 +44,39 @@ #include "leak-checker.h" #include "netdev.h" #include "netlink.h" +#include "netlink-notifier.h" #include "netlink-socket.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" -#include "ovsdb-idl.h" #include "packets.h" #include "poll-loop.h" #include "process.h" +#include "rtnetlink-link.h" #include "signals.h" #include "sset.h" +#include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vlog.h" -#include "vswitchd/vswitch-idl.h" VLOG_DEFINE_THIS_MODULE(brcompatd); - /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ -/* Actions to modify bridge compatibility configuration. */ -enum bmc_action { - BMC_ADD_DP, - BMC_DEL_DP, - BMC_ADD_PORT, - BMC_DEL_PORT -}; - -static const char *parse_options(int argc, char *argv[]); +static void parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Maximum number of milliseconds to wait before pruning port entries that - * no longer exist. If set to zero, ports are never pruned. */ -static int prune_timeout = 5000; +/* --appctl: Absolute path to ovs-appctl. */ +static char *appctl_program; -/* Shell command to execute (via popen()) to send a control command to the - * running ovs-vswitchd process. The string must contain one instance of %s, - * which is replaced by the control command. */ -static char *appctl_command; +/* --vsctl: Absolute path to ovs-vsctl. */ +static char *vsctl_program; -/* Netlink socket to listen for interface changes. */ -static struct nl_sock *rtnl_sock; +/* Options that we should generally pass to ovs-vsctl. */ +#define VSCTL_OPTIONS "--timeout=5", "-vANY:console:WARN" /* Netlink socket to bridge compatibility kernel module. */ static struct nl_sock *brc_sock; @@ -98,10 +88,90 @@ static const struct nl_policy brc_multicast_policy[] = { [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } }; -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, -}; +static char * +capture_vsctl_valist(const char *arg0, va_list args) +{ + char *stdout_log, *stderr_log; + enum vlog_level log_level; + struct svec argv; + int status; + char *msg; + + /* Compose arguments. */ + svec_init(&argv); + svec_add(&argv, arg0); + for (;;) { + const char *arg = va_arg(args, const char *); + if (!arg) { + break; + } + svec_add(&argv, arg); + } + svec_terminate(&argv); + + /* Run process. */ + if (process_run_capture(argv.names, &stdout_log, &stderr_log, SIZE_MAX, + &status)) { + svec_destroy(&argv); + return NULL; + } + + /* Log results. */ + if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); + log_level = code == 0 ? VLL_DBG : code == 1 ? VLL_WARN : VLL_ERR; + } else { + log_level = VLL_ERR; + } + msg = process_status_msg(status); + VLOG(log_level, "ovs-vsctl exited (%s)", msg); + if (stdout_log && *stdout_log) { + VLOG(log_level, "ovs-vsctl wrote to stdout:\n%s\n", stdout_log); + } + if (stderr_log && *stderr_log) { + VLOG(log_level, "ovs-vsctl wrote to stderr:\n%s\n", stderr_log); + } + free(msg); + + svec_destroy(&argv); + + free(stderr_log); + if (WIFEXITED(status) && !WEXITSTATUS(status)) { + return stdout_log; + } else { + free(stdout_log); + return NULL; + } +} + +static char * SENTINEL(0) +capture_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + return stdout_log; +} + +static bool SENTINEL(0) +run_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + bool ok; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + ok = stdout_log != NULL; + free(stdout_log); + return ok; +} static int lookup_brc_multicast_group(int *multicast_group) @@ -173,424 +243,6 @@ static const struct nl_policy brc_dp_policy[] = { [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, }; -static struct ovsrec_bridge * -find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) -{ - size_t i; - - for (i = 0; i < ovs->n_bridges; i++) { - if (!strcmp(br_name, ovs->bridges[i]->name)) { - return ovs->bridges[i]; - } - } - - return NULL; -} - -static int -execute_appctl_command(const char *unixctl_command, char **output) -{ - char *stdout_log, *stderr_log; - int error, status; - char *argv[5]; - - argv[0] = "/bin/sh"; - argv[1] = "-c"; - argv[2] = xasprintf(appctl_command, unixctl_command); - argv[3] = NULL; - - /* Run process and log status. */ - error = process_run_capture(argv, &stdout_log, &stderr_log, &status); - if (error) { - VLOG_ERR("failed to execute %s command via ovs-appctl: %s", - unixctl_command, strerror(error)); - } else if (status) { - char *msg = process_status_msg(status); - VLOG_ERR("ovs-appctl exited with error (%s)", msg); - free(msg); - error = ECHILD; - } - - /* Deal with stdout_log. */ - if (output) { - *output = stdout_log; - } else { - free(stdout_log); - } - - /* Deal with stderr_log */ - if (stderr_log && *stderr_log) { - VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log); - } - free(stderr_log); - - free(argv[2]); - - return error; -} - -static void -do_get_bridge_parts(const struct ovsrec_bridge *br, struct sset *parts, - int vlan, bool break_down_bonds) -{ - size_t i, j; - - for (i = 0; i < br->n_ports; i++) { - const struct ovsrec_port *port = br->ports[i]; - - if (vlan >= 0) { - int port_vlan = port->n_tag ? *port->tag : 0; - if (vlan != port_vlan) { - continue; - } - } - if (break_down_bonds) { - for (j = 0; j < port->n_interfaces; j++) { - const struct ovsrec_interface *iface = port->interfaces[j]; - sset_add(parts, iface->name); - } - } else { - sset_add(parts, port->name); - } - } -} - -/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces - * down into their constituent parts. - * - * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, - * then only interfaces for trunk ports or ports with implicit VLAN 0 are - * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are - * reported. */ -static void -get_bridge_ifaces(const struct ovsrec_bridge *br, struct sset *ifaces, - int vlan) -{ - do_get_bridge_parts(br, ifaces, vlan, true); -} - -/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under - * the bond name, not broken down into their constituent interfaces. - * - * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then - * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, - * only port with implicit VLAN 'vlan' are reported. */ -static void -get_bridge_ports(const struct ovsrec_bridge *br, struct sset *ports, - int vlan) -{ - do_get_bridge_parts(br, ports, vlan, false); -} - -static struct ovsdb_idl_txn * -txn_from_openvswitch(const struct ovsrec_open_vswitch *ovs) -{ - return ovsdb_idl_txn_get(&ovs->header_); -} - -static bool -port_is_fake_bridge(const struct ovsrec_port *port) -{ - return (port->fake_bridge - && port->tag - && *port->tag >= 1 && *port->tag <= 4095); -} - -static void -ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, - struct ovsrec_bridge *bridge) -{ - struct ovsrec_bridge **bridges; - size_t i; - - bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); - for (i = 0; i < ovs->n_bridges; i++) { - bridges[i] = ovs->bridges[i]; - } - bridges[ovs->n_bridges] = bridge; - ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); - free(bridges); -} - -static struct json * -where_uuid_equals(const struct uuid *uuid) -{ - return - json_array_create_1( - json_array_create_3( - json_string_create("_uuid"), - json_string_create("=="), - json_array_create_2( - json_string_create("uuid"), - json_string_create_nocopy( - xasprintf(UUID_FMT, UUID_ARGS(uuid)))))); -} - -/* Commits 'txn'. If 'wait_for_reload' is true, also waits for Open vSwitch to - reload the configuration before returning. - - Returns EAGAIN if the caller should try the operation again, 0 on success, - otherwise a positive errno value. */ -static int -commit_txn(struct ovsdb_idl_txn *txn, bool wait_for_reload) -{ - struct ovsdb_idl *idl = ovsdb_idl_txn_get_idl (txn); - enum ovsdb_idl_txn_status status; - int64_t next_cfg = 0; - - if (wait_for_reload) { - const struct ovsrec_open_vswitch *ovs = ovsrec_open_vswitch_first(idl); - struct json *where = where_uuid_equals(&ovs->header_.uuid); - ovsdb_idl_txn_increment(txn, "Open_vSwitch", "next_cfg", where); - json_destroy(where); - } - status = ovsdb_idl_txn_commit_block(txn); - if (wait_for_reload && status == TXN_SUCCESS) { - next_cfg = ovsdb_idl_txn_get_increment_new_value(txn); - } - ovsdb_idl_txn_destroy(txn); - - switch (status) { - case TXN_INCOMPLETE: - NOT_REACHED(); - - case TXN_ABORTED: - VLOG_ERR_RL(&rl, "OVSDB transaction unexpectedly aborted"); - return ECONNABORTED; - - case TXN_UNCHANGED: - return 0; - - case TXN_SUCCESS: - if (wait_for_reload) { - for (;;) { - /* We can't use 'ovs' any longer because ovsdb_idl_run() can - * destroy it. */ - const struct ovsrec_open_vswitch *ovs2; - - ovsdb_idl_run(idl); - OVSREC_OPEN_VSWITCH_FOR_EACH (ovs2, idl) { - if (ovs2->cur_cfg >= next_cfg) { - goto done; - } - } - ovsdb_idl_wait(idl); - poll_block(); - } - done: ; - } - return 0; - - case TXN_TRY_AGAIN: - VLOG_ERR_RL(&rl, "OVSDB transaction needs retry"); - return EAGAIN; - - case TXN_ERROR: - VLOG_ERR_RL(&rl, "OVSDB transaction failed: %s", - ovsdb_idl_txn_get_error(txn)); - return EBUSY; - - default: - NOT_REACHED(); - } -} - -static int -add_bridge(struct ovsdb_idl *idl, const struct ovsrec_open_vswitch *ovs, - const char *br_name) -{ - struct ovsrec_bridge *br; - struct ovsrec_port *port; - struct ovsrec_interface *iface; - struct ovsdb_idl_txn *txn; - - if (find_bridge(ovs, br_name)) { - VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); - return EEXIST; - } else if (netdev_exists(br_name)) { - size_t i; - - for (i = 0; i < ovs->n_bridges; i++) { - size_t j; - struct ovsrec_bridge *br_cfg = ovs->bridges[i]; - - for (j = 0; j < br_cfg->n_ports; j++) { - if (port_is_fake_bridge(br_cfg->ports[j])) { - VLOG_WARN("addbr %s: %s exists as a fake bridge", - br_name, br_name); - return 0; - } - } - } - - VLOG_WARN("addbr %s: cannot create bridge %s because a network " - "device named %s already exists", - br_name, br_name, br_name); - return EEXIST; - } - - txn = ovsdb_idl_txn_create(idl); - - ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: addbr %s", br_name); - - iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); - ovsrec_interface_set_name(iface, br_name); - - port = ovsrec_port_insert(txn_from_openvswitch(ovs)); - ovsrec_port_set_name(port, br_name); - ovsrec_port_set_interfaces(port, &iface, 1); - - br = ovsrec_bridge_insert(txn_from_openvswitch(ovs)); - ovsrec_bridge_set_name(br, br_name); - ovsrec_bridge_set_ports(br, &port, 1); - - ovs_insert_bridge(ovs, br); - - return commit_txn(txn, true); -} - -static void -add_port(const struct ovsrec_open_vswitch *ovs, - const struct ovsrec_bridge *br, const char *port_name) -{ - struct ovsrec_interface *iface; - struct ovsrec_port *port; - struct ovsrec_port **ports; - size_t i; - - /* xxx Check conflicts? */ - iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); - ovsrec_interface_set_name(iface, port_name); - - port = ovsrec_port_insert(txn_from_openvswitch(ovs)); - ovsrec_port_set_name(port, port_name); - ovsrec_port_set_interfaces(port, &iface, 1); - - ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); - for (i = 0; i < br->n_ports; i++) { - ports[i] = br->ports[i]; - } - ports[br->n_ports] = port; - ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); - free(ports); -} - -/* Deletes 'port' from 'br'. - * - * After calling this function, 'port' must not be referenced again. */ -static void -del_port(const struct ovsrec_bridge *br, const struct ovsrec_port *port) -{ - struct ovsrec_port **ports; - size_t i, n; - - /* Remove 'port' from the bridge's list of ports. */ - ports = xmalloc(sizeof *br->ports * br->n_ports); - for (i = n = 0; i < br->n_ports; i++) { - if (br->ports[i] != port) { - ports[n++] = br->ports[i]; - } - } - ovsrec_bridge_set_ports(br, ports, n); - free(ports); -} - -/* Delete 'iface' from 'port' (which must be within 'br'). If 'iface' was - * 'port''s only interface, delete 'port' from 'br' also. - * - * After calling this function, 'iface' must not be referenced again. */ -static void -del_interface(const struct ovsrec_bridge *br, - const struct ovsrec_port *port, - const struct ovsrec_interface *iface) -{ - if (port->n_interfaces == 1) { - del_port(br, port); - } else { - struct ovsrec_interface **ifaces; - size_t i, n; - - ifaces = xmalloc(sizeof *port->interfaces * port->n_interfaces); - for (i = n = 0; i < port->n_interfaces; i++) { - if (port->interfaces[i] != iface) { - ifaces[n++] = port->interfaces[i]; - } - } - ovsrec_port_set_interfaces(port, ifaces, n); - free(ifaces); - } -} - -/* Find and return a port within 'br' named 'port_name'. */ -static const struct ovsrec_port * -find_port(const struct ovsrec_bridge *br, const char *port_name) -{ - size_t i; - - for (i = 0; i < br->n_ports; i++) { - struct ovsrec_port *port = br->ports[i]; - if (!strcmp(port_name, port->name)) { - return port; - } - } - return NULL; -} - -/* Find and return an interface within 'br' named 'iface_name'. */ -static const struct ovsrec_interface * -find_interface(const struct ovsrec_bridge *br, const char *iface_name, - struct ovsrec_port **portp) -{ - size_t i; - - for (i = 0; i < br->n_ports; i++) { - struct ovsrec_port *port = br->ports[i]; - size_t j; - - for (j = 0; j < port->n_interfaces; j++) { - struct ovsrec_interface *iface = port->interfaces[j]; - if (!strcmp(iface->name, iface_name)) { - *portp = port; - return iface; - } - } - } - - *portp = NULL; - return NULL; -} - -static int -del_bridge(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, const char *br_name) -{ - struct ovsrec_bridge *br = find_bridge(ovs, br_name); - struct ovsrec_bridge **bridges; - struct ovsdb_idl_txn *txn; - size_t i, n; - - if (!br) { - VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); - return ENXIO; - } - - txn = ovsdb_idl_txn_create(idl); - - ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: delbr %s", br_name); - - /* Remove 'br' from the vswitch's list of bridges. */ - bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); - for (i = n = 0; i < ovs->n_bridges; i++) { - if (ovs->bridges[i] != br) { - bridges[n++] = ovs->bridges[i]; - } - } - ovsrec_open_vswitch_set_bridges(ovs, bridges, n); - free(bridges); - - return commit_txn(txn, true); -} - static int parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, const char **port_name, uint64_t *count, uint64_t *skip) @@ -663,9 +315,7 @@ send_simple_reply(uint32_t seq, int error) } static int -handle_bridge_cmd(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer, bool add) +handle_bridge_cmd(struct ofpbuf *buffer, bool add) { const char *br_name; uint32_t seq; @@ -673,14 +323,14 @@ handle_bridge_cmd(struct ovsdb_idl *idl, error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); if (!error) { - int retval; - - do { - retval = (add ? add_bridge : del_bridge)(idl, ovs, br_name); - VLOG_INFO_RL(&rl, "%sbr %s: %s", - add ? "add" : "del", br_name, strerror(retval)); - } while (retval == EAGAIN); - + const char *vsctl_cmd = add ? "add-br" : "del-br"; + const char *brctl_cmd = add ? "addbr" : "delbr"; + if (!run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", vsctl_cmd, br_name, + "--", "comment", "ovs-brcompatd:", brctl_cmd, br_name, + (char *) NULL)) { + error = add ? EEXIST : ENXIO; + } send_simple_reply(seq, error); } return error; @@ -692,97 +342,79 @@ static const struct nl_policy brc_port_policy[] = { }; static int -handle_port_cmd(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer, bool add) +handle_port_cmd(struct ofpbuf *buffer, bool add) { - const char *cmd_name = add ? "add-if" : "del-if"; const char *br_name, *port_name; uint32_t seq; int error; error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); if (!error) { - struct ovsrec_bridge *br = find_bridge(ovs, br_name); - - if (!br) { - VLOG_WARN("%s %s %s: no bridge named %s", - cmd_name, br_name, port_name, br_name); - error = EINVAL; - } else if (!netdev_exists(port_name)) { - VLOG_WARN("%s %s %s: no network device named %s", - cmd_name, br_name, port_name, port_name); + const char *vsctl_cmd = add ? "add-port" : "del-port"; + const char *brctl_cmd = add ? "addif" : "delif"; + if (!run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", vsctl_cmd, br_name, port_name, + "--", "comment", "ovs-brcompatd:", brctl_cmd, + br_name, port_name, (char *) NULL)) { error = EINVAL; - } else { - do { - struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl); - - if (add) { - ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: add-if %s", - port_name); - add_port(ovs, br, port_name); - } else { - const struct ovsrec_port *port = find_port(br, port_name); - if (port) { - ovsdb_idl_txn_add_comment(txn, - "ovs-brcompatd: del-if %s", - port_name); - del_port(br, port); - } - } - - error = commit_txn(txn, true); - VLOG_INFO_RL(&rl, "%s %s %s: %s", - cmd_name, br_name, port_name, strerror(error)); - } while (error == EAGAIN); } send_simple_reply(seq, error); } - return error; } -/* The caller is responsible for freeing '*ovs_name' if the call is - * successful. */ -static int -linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs, - const char *linux_name, - const struct ovsrec_bridge **ovs_bridge, - int *br_vlan) +static char * +linux_bridge_to_ovs_bridge(const char *linux_name, int *br_vlanp) { - *ovs_bridge = find_bridge(ovs, linux_name); - if (*ovs_bridge) { - /* Bridge name is the same. We are interested in VLAN 0. */ - *br_vlan = 0; - return 0; - } else { - /* No such Open vSwitch bridge 'linux_name', but there might be an - * internal port named 'linux_name' on some other bridge - * 'ovs_bridge'. If so then we are interested in the VLAN assigned to - * port 'linux_name' on the bridge named 'ovs_bridge'. */ - size_t i, j; - - for (i = 0; i < ovs->n_bridges; i++) { - const struct ovsrec_bridge *br = ovs->bridges[i]; - - for (j = 0; j < br->n_ports; j++) { - const struct ovsrec_port *port = br->ports[j]; - - if (!strcmp(port->name, linux_name)) { - *ovs_bridge = br; - *br_vlan = port->n_tag ? *port->tag : -1; - return 0; - } - } + char *save_ptr = NULL; + const char *br_name, *br_vlan; + char *br_name_copy; + char *output; - } - return ENODEV; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", "br-to-parent", linux_name, + "--", "br-to-vlan", linux_name, + (char *) NULL); + if (!output) { + return NULL; + } + + br_name = strtok_r(output, " \t\r\n", &save_ptr); + br_vlan = strtok_r(NULL, " \t\r\n", &save_ptr); + if (!br_name || !br_vlan) { + free(output); + return NULL; } + br_name_copy = xstrdup(br_name); + *br_vlanp = atoi(br_vlan); + + free(output); + + return br_name_copy; +} + +static void +get_bridge_ifaces(const char *br_name, struct sset *ifaces) +{ + char *save_ptr = NULL; + char *output; + char *iface; + + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ifaces", + br_name, (char *) NULL); + if (!output) { + return; + } + + for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface; + iface = strtok_r(NULL, " \t\r\n", &save_ptr)) { + sset_add(ifaces, iface); + } + free(output); } static int -handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_fdb_query_cmd(struct ofpbuf *buffer) { /* This structure is copied directly from the Linux 2.6.30 header files. * It would be more straightforward to #include , but @@ -811,15 +443,14 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, * pretend that the former is the case even though the latter is the * implementation. */ const char *linux_name; /* Name used by brctl. */ - const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */ int br_vlan; /* VLAN tag. */ struct sset ifaces; struct ofpbuf query_data; const char *iface_name; struct ofpbuf *reply; - char *unixctl_command; uint64_t count, skip; + char *br_name; char *output; char *save_ptr; uint32_t seq; @@ -832,18 +463,18 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, } /* Figure out vswitchd bridge and VLAN. */ - error = linux_bridge_to_ovs_bridge(ovs, linux_name, - &ovs_bridge, &br_vlan); - if (error) { + br_name = linux_bridge_to_ovs_bridge(linux_name, &br_vlan); + if (!br_name) { + error = EINVAL; send_simple_reply(seq, error); return error; } /* Fetch the forwarding database using ovs-appctl. */ - unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name); - error = execute_appctl_command(unixctl_command, &output); - free(unixctl_command); - if (error) { + output = capture_vsctl(appctl_program, "fdb/show", br_name, + (char *) NULL); + if (!output) { + error = ECHILD; send_simple_reply(seq, error); return error; } @@ -851,14 +482,14 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, /* Fetch the MAC address for each interface on the bridge, so that we can * fill in the is_local field in the response. */ sset_init(&ifaces); - get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); + get_bridge_ifaces(linux_name, &ifaces); local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs); n_local_macs = 0; SSET_FOR_EACH (iface_name, &ifaces) { struct mac *mac = &local_macs[n_local_macs]; struct netdev *netdev; - error = netdev_open_default(iface_name, &netdev); + error = netdev_open(iface_name, "system", &netdev); if (!error) { if (!netdev_get_etheraddr(netdev, mac->addr)) { n_local_macs++; @@ -937,18 +568,26 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, } static void -send_ifindex_reply(uint32_t seq, struct sset *ifaces) +send_ifindex_reply(uint32_t seq, char *output) { + size_t allocated_indices; + char *save_ptr = NULL; struct ofpbuf *reply; const char *iface; size_t n_indices; int *indices; - /* Convert 'ifaces' into ifindexes. */ - n_indices = 0; - indices = xmalloc(sset_count(ifaces) * sizeof *indices); - SSET_FOR_EACH (iface, ifaces) { - int ifindex = if_nametoindex(iface); + indices = NULL; + n_indices = allocated_indices = 0; + for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface; + iface = strtok_r(NULL, " \t\r\n", &save_ptr)) { + int ifindex; + + if (n_indices >= allocated_indices) { + indices = x2nrealloc(indices, &allocated_indices, sizeof *indices); + } + + ifindex = if_nametoindex(iface); if (ifindex) { indices[n_indices++] = ifindex; } @@ -965,14 +604,10 @@ send_ifindex_reply(uint32_t seq, struct sset *ifaces) } static int -handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_get_bridges_cmd(struct ofpbuf *buffer) { - struct sset bridges; - size_t i, j; - + char *output; uint32_t seq; - int error; /* Parse Netlink command. @@ -984,39 +619,22 @@ handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, return error; } - /* Get all the real bridges and all the fake ones. */ - sset_init(&bridges); - for (i = 0; i < ovs->n_bridges; i++) { - const struct ovsrec_bridge *br = ovs->bridges[i]; - - sset_add(&bridges, br->name); - for (j = 0; j < br->n_ports; j++) { - const struct ovsrec_port *port = br->ports[j]; - - if (port->fake_bridge) { - sset_add(&bridges, port->name); - } - } + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-br", (char *) NULL); + if (!output) { + return ENODEV; } - send_ifindex_reply(seq, &bridges); - sset_destroy(&bridges); - + send_ifindex_reply(seq, output); + free(output); return 0; } static int -handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_get_ports_cmd(struct ofpbuf *buffer) { - uint32_t seq; - const char *linux_name; - const struct ovsrec_bridge *ovs_bridge; - int br_vlan; - - struct sset ports; - + uint32_t seq; + char *output; int error; /* Parse Netlink command. */ @@ -1025,19 +643,14 @@ handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, return error; } - error = linux_bridge_to_ovs_bridge(ovs, linux_name, - &ovs_bridge, &br_vlan); - if (error) { - send_simple_reply(seq, error); - return error; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ports", linux_name, + (char *) NULL); + if (!output) { + return ENODEV; } - sset_init(&ports); - get_bridge_ports(ovs_bridge, &ports, br_vlan); - sset_find_and_delete(&ports, linux_name); - send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ - sset_destroy(&ports); - + send_ifindex_reply(seq, output); + free(output); return 0; } @@ -1072,11 +685,10 @@ brc_recv_update__(void) } static void -brc_recv_update(struct ovsdb_idl *idl) +brc_recv_update(void) { struct ofpbuf *buffer; struct genlmsghdr *genlmsghdr; - const struct ovsrec_open_vswitch *ovs; buffer = brc_recv_update__(); if (!buffer) { @@ -1095,43 +707,53 @@ brc_recv_update(struct ovsdb_idl *idl) goto error; } - /* Get the Open vSwitch configuration. Just drop the request on the floor - * if a valid configuration doesn't exist. (We could check this earlier, - * but we want to drain pending Netlink messages even when there is no Open - * vSwitch configuration.) */ - ovs = ovsrec_open_vswitch_first(idl); - if (!ovs) { - VLOG_WARN_RL(&rl, "could not find valid configuration to update"); - goto error; - } + /* Service all pending network device notifications before executing the + * command. This is very important to avoid a race in a scenario like the + * following, which is what happens with XenServer Tools version 5.0.0 + * during boot of a Windows VM: + * + * 1. Create tap1.0 and vif1.0. + * 2. Delete tap1.0. + * 3. Delete vif1.0. + * 4. Re-create vif1.0. + * + * We must process the network device notification from step 3 before we + * process the brctl command from step 4. If we process them in the + * reverse order, then step 4 completes as a no-op but step 3 then deletes + * the port that was just added. + * + * (XenServer Tools 5.5.0 does not exhibit this behavior, and neither does + * a VM without Tools installed at all.) + */ + rtnetlink_link_run(); switch (genlmsghdr->cmd) { case BRC_GENL_C_DP_ADD: - handle_bridge_cmd(idl, ovs, buffer, true); + handle_bridge_cmd(buffer, true); break; case BRC_GENL_C_DP_DEL: - handle_bridge_cmd(idl, ovs, buffer, false); + handle_bridge_cmd(buffer, false); break; case BRC_GENL_C_PORT_ADD: - handle_port_cmd(idl, ovs, buffer, true); + handle_port_cmd(buffer, true); break; case BRC_GENL_C_PORT_DEL: - handle_port_cmd(idl, ovs, buffer, false); + handle_port_cmd(buffer, false); break; case BRC_GENL_C_FDB_QUERY: - handle_fdb_query_cmd(ovs, buffer); + handle_fdb_query_cmd(buffer); break; case BRC_GENL_C_GET_BRIDGES: - handle_get_bridges_cmd(ovs, buffer); + handle_get_bridges_cmd(buffer); break; case BRC_GENL_C_GET_PORTS: - handle_get_ports_cmd(ovs, buffer); + handle_get_ports_cmd(buffer); break; default: @@ -1142,152 +764,53 @@ brc_recv_update(struct ovsdb_idl *idl) error: ofpbuf_delete(buffer); - return; } -/* Check for interface configuration changes announced through RTNL. */ static void -rtnl_recv_update(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs) +netdev_changed_cb(const struct rtnetlink_link_change *change, + void *aux OVS_UNUSED) { - struct ofpbuf *buf; + char br_name[IFNAMSIZ]; + const char *port_name; - int error = nl_sock_recv(rtnl_sock, &buf, false); - if (error == EAGAIN) { - /* Nothing to do. */ - } else if (error == ENOBUFS) { + if (!change) { VLOG_WARN_RL(&rl, "network monitor socket overflowed"); - } else if (error) { - VLOG_WARN_RL(&rl, "error on network monitor socket: %s", - strerror(error)); - } else { - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - struct nlmsghdr *nlh; - struct ifinfomsg *iim; - - nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); - iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); - if (!iim) { - VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); - ofpbuf_delete(buf); - return; - } + return; + } - if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - VLOG_WARN_RL(&rl,"received bad rtnl message (policy)"); - ofpbuf_delete(buf); - return; - } - if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) { - const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); - char br_name[IFNAMSIZ]; - uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - - if (!if_indextoname(br_idx, br_name)) { - ofpbuf_delete(buf); - return; - } + if (change->nlmsg_type != RTM_DELLINK || !change->master_ifindex) { + return; + } - if (!netdev_exists(port_name)) { - /* Network device is really gone. */ - struct ovsdb_idl_txn *txn; - const struct ovsrec_interface *iface; - struct ovsrec_port *port; - struct ovsrec_bridge *br; - - VLOG_INFO("network device %s destroyed, " - "removing from bridge %s", port_name, br_name); - - br = find_bridge(ovs, br_name); - if (!br) { - VLOG_WARN("no bridge named %s from which to remove %s", - br_name, port_name); - ofpbuf_delete(buf); - return; - } - - txn = ovsdb_idl_txn_create(idl); - - iface = find_interface(br, port_name, &port); - if (iface) { - del_interface(br, port, iface); - ovsdb_idl_txn_add_comment(txn, - "ovs-brcompatd: destroy port %s", - port_name); - } - - commit_txn(txn, false); - } else { - /* A network device by that name exists even though the kernel - * told us it had disappeared. Probably, what happened was - * this: - * - * 1. Device destroyed. - * 2. Notification sent to us. - * 3. New device created with same name as old one. - * 4. ovs-brcompatd notified, removes device from bridge. - * - * There's no a priori reason that in this situation that the - * new device with the same name should remain in the bridge; - * on the contrary, that would be unexpected. *But* there is - * one important situation where, if we do this, bad things - * happen. This is the case of XenServer Tools version 5.0.0, - * which on boot of a Windows VM cause something like this to - * happen on the Xen host: - * - * i. Create tap1.0 and vif1.0. - * ii. Delete tap1.0. - * iii. Delete vif1.0. - * iv. Re-create vif1.0. - * - * (XenServer Tools 5.5.0 does not exhibit this behavior, and - * neither does a VM without Tools installed at all.@.) - * - * Steps iii and iv happen within a few seconds of each other. - * Step iv causes /etc/xensource/scripts/vif to run, which in - * turn calls ovs-cfg-mod to add the new device to the bridge. - * If step iv happens after step 4 (in our first list of - * steps), then all is well, but if it happens between 3 and 4 - * (which can easily happen if ovs-brcompatd has to wait to - * lock the configuration file), then we will remove the new - * incarnation from the bridge instead of the old one! - * - * So, to avoid this problem, we do nothing here. This is - * strictly incorrect except for this one particular case, and - * perhaps that will bite us someday. If that happens, then we - * will have to somehow track network devices by ifindex, since - * a new device will have a new ifindex even if it has the same - * name as an old device. - */ - VLOG_INFO("kernel reported network device %s removed but " - "a device by that name exists (XS Tools 5.0.0?)", - port_name); - } - } - ofpbuf_delete(buf); + port_name = change->ifname; + if (!if_indextoname(change->master_ifindex, br_name)) { + return; } + + VLOG_INFO("network device %s destroyed, removing from bridge %s", + port_name, br_name); + + run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", "--if-exists", "del-port", port_name, + "--", "comment", "ovs-brcompatd:", port_name, "disappeared", + (char *) NULL); } int main(int argc, char *argv[]) { extern struct vlog_module VLM_reconnect; + struct nln_notifier *link_notifier; struct unixctl_server *unixctl; - const char *remote; - struct ovsdb_idl *idl; int retval; proctitle_init(argc, argv); set_program_name(argv[0]); - vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); - remote = parse_options(argc, argv); + parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); process_init(); - ovsrec_init(); daemonize_start(); @@ -1301,98 +824,35 @@ main(int argc, char *argv[]) "\"brcompat\" kernel module."); } - if (prune_timeout) { - int error; - - error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); - if (error) { - VLOG_FATAL("could not create rtnetlink socket (%s)", - strerror(error)); - } - - error = nl_sock_join_mcgroup(rtnl_sock, RTNLGRP_LINK); - if (error) { - VLOG_FATAL("could not join RTNLGRP_LINK multicast group (%s)", - strerror(error)); - } - } + link_notifier = rtnetlink_link_notifier_create(netdev_changed_cb, NULL); daemonize_complete(); - idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true); - for (;;) { - const struct ovsrec_open_vswitch *ovs; - - ovsdb_idl_run(idl); - unixctl_server_run(unixctl); - brc_recv_update(idl); + rtnetlink_link_run(); + brc_recv_update(); - ovs = ovsrec_open_vswitch_first(idl); - if (!ovs && ovsdb_idl_has_ever_connected(idl)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch " - "configuration", remote); - } netdev_run(); - /* If 'prune_timeout' is non-zero, we actively prune from the - * configuration of port entries that are no longer valid. We - * use two methods: - * - * 1) The kernel explicitly notifies us of removed ports - * through the RTNL messages. - * - * 2) We periodically check all ports associated with bridges - * to see if they no longer exist. - */ - if (ovs && prune_timeout) { - rtnl_recv_update(idl, ovs); - nl_sock_wait(rtnl_sock, POLLIN); - poll_timer_wait(prune_timeout); - } - - nl_sock_wait(brc_sock, POLLIN); - ovsdb_idl_wait(idl); unixctl_server_wait(unixctl); + rtnetlink_link_wait(); netdev_wait(); poll_block(); } - ovsdb_idl_destroy(idl); + rtnetlink_link_notifier_destroy(link_notifier); return 0; } static void -validate_appctl_command(void) -{ - const char *p; - int n; - - n = 0; - for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) { - if (p[1] == '%') { - /* Nothing to do. */ - } else if (p[1] == 's') { - n++; - } else { - VLOG_FATAL("only '%%s' and '%%%%' allowed in --appctl-command"); - } - } - if (n != 1) { - VLOG_FATAL("'%%s' must appear exactly once in --appctl-command"); - } -} - -static const char * parse_options(int argc, char *argv[]) { enum { - OPT_PRUNE_TIMEOUT, - OPT_APPCTL_COMMAND, + OPT_APPCTL, + OPT_VSCTL, VLOG_OPTION_ENUMS, LEAK_CHECKER_OPTION_ENUMS, DAEMON_OPTION_ENUMS @@ -1400,16 +860,17 @@ parse_options(int argc, char *argv[]) static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, - {"prune-timeout", required_argument, NULL, OPT_PRUNE_TIMEOUT}, - {"appctl-command", required_argument, NULL, OPT_APPCTL_COMMAND}, + {"appctl", required_argument, NULL, OPT_APPCTL}, + {"vsctl", required_argument, NULL, OPT_VSCTL}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, LEAK_CHECKER_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); + const char *appctl = "ovs-appctl"; + const char *vsctl = "ovs-vsctl"; - appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir()); for (;;) { int c; @@ -1419,20 +880,19 @@ parse_options(int argc, char *argv[]) } switch (c) { - case 'H': case 'h': usage(); case 'V': - OVS_PRINT_VERSION(0, 0); + ovs_print_version(0, 0); exit(EXIT_SUCCESS); - case OPT_PRUNE_TIMEOUT: - prune_timeout = atoi(optarg) * 1000; + case OPT_APPCTL: + appctl = optarg; break; - case OPT_APPCTL_COMMAND: - appctl_command = optarg; + case OPT_VSCTL: + vsctl = optarg; break; VLOG_OPTION_HANDLERS @@ -1448,29 +908,33 @@ parse_options(int argc, char *argv[]) } free(short_options); - validate_appctl_command(); + appctl_program = process_search_path(appctl); + if (!appctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --appctl to specify an " + "alternate location)", appctl); + } - argc -= optind; - argv += optind; + vsctl_program = process_search_path(vsctl); + if (!vsctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --vsctl to specify an " + "alternate location)", vsctl); + } - if (argc != 1) { - VLOG_FATAL("database socket is non-option argument; " + if (argc != optind) { + VLOG_FATAL("no non-option arguments are supported; " "use --help for usage"); } - - return argv[0]; } static void usage(void) { printf("%s: bridge compatibility front-end for ovs-vswitchd\n" - "usage: %s [OPTIONS] CONFIG\n" - "CONFIG is the configuration file used by ovs-vswitchd.\n", + "usage: %s [OPTIONS]\n", program_name, program_name); printf("\nConfiguration options:\n" - " --appctl-command=COMMAND shell command to run ovs-appctl\n" - " --prune-timeout=SECS wait at most SECS before pruning ports\n" + " --appctl=PROGRAM overrides $PATH for finding ovs-appctl\n" + " --vsctl=PROGRAM overrides $PATH for finding ovs-vsctl\n" ); daemon_usage(); vlog_usage(); @@ -1478,6 +942,5 @@ usage(void) " -h, --help display this help message\n" " -V, --version display version information\n"); leak_checker_usage(); - printf("\nThe default appctl command is:\n%s\n", appctl_command); exit(EXIT_SUCCESS); }