X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=adabe89492538242ec6665bc7e8da0a0edec8ca4;hb=ea523221d4b11e9e067ec8b25a3955d1b64eb537;hp=0b0c31ba3005a4a8ce12bc86dcf73eda5913d760;hpb=e569fae67bbbd7f4261a2a8c6d714d0fd166d2cf;p=sliver-openvswitch.git diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 0b0c31ba3..adabe8949 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008, 2009, 2010 Nicira Networks +/* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -43,49 +44,39 @@ #include "leak-checker.h" #include "netdev.h" #include "netlink.h" +#include "netlink-notifier.h" +#include "netlink-socket.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" -#include "ovsdb-idl.h" #include "packets.h" #include "poll-loop.h" #include "process.h" +#include "rtnetlink-link.h" #include "signals.h" +#include "sset.h" #include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" -#include "vswitchd/vswitch-idl.h" - #include "vlog.h" -#define THIS_MODULE VLM_brcompatd +VLOG_DEFINE_THIS_MODULE(brcompatd); /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ -/* Actions to modify bridge compatibility configuration. */ -enum bmc_action { - BMC_ADD_DP, - BMC_DEL_DP, - BMC_ADD_PORT, - BMC_DEL_PORT -}; - -static const char *parse_options(int argc, char *argv[]); +static void parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Maximum number of milliseconds to wait before pruning port entries that - * no longer exist. If set to zero, ports are never pruned. */ -static int prune_timeout = 5000; +/* --appctl: Absolute path to ovs-appctl. */ +static char *appctl_program; -/* Shell command to execute (via popen()) to send a control command to the - * running ovs-vswitchd process. The string must contain one instance of %s, - * which is replaced by the control command. */ -static char *appctl_command; +/* --vsctl: Absolute path to ovs-vsctl. */ +static char *vsctl_program; -/* Netlink socket to listen for interface changes. */ -static struct nl_sock *rtnl_sock; +/* Options that we should generally pass to ovs-vsctl. */ +#define VSCTL_OPTIONS "--timeout=5", "-vconsole:warn" /* Netlink socket to bridge compatibility kernel module. */ static struct nl_sock *brc_sock; @@ -97,10 +88,90 @@ static const struct nl_policy brc_multicast_policy[] = { [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } }; -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, -}; +static char * +capture_vsctl_valist(const char *arg0, va_list args) +{ + char *stdout_log, *stderr_log; + enum vlog_level log_level; + struct svec argv; + int status; + char *msg; + + /* Compose arguments. */ + svec_init(&argv); + svec_add(&argv, arg0); + for (;;) { + const char *arg = va_arg(args, const char *); + if (!arg) { + break; + } + svec_add(&argv, arg); + } + svec_terminate(&argv); + + /* Run process. */ + if (process_run_capture(argv.names, &stdout_log, &stderr_log, SIZE_MAX, + &status)) { + svec_destroy(&argv); + return NULL; + } + + /* Log results. */ + if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); + log_level = code == 0 ? VLL_DBG : code == 1 ? VLL_WARN : VLL_ERR; + } else { + log_level = VLL_ERR; + } + msg = process_status_msg(status); + VLOG(log_level, "ovs-vsctl exited (%s)", msg); + if (stdout_log && *stdout_log) { + VLOG(log_level, "ovs-vsctl wrote to stdout:\n%s\n", stdout_log); + } + if (stderr_log && *stderr_log) { + VLOG(log_level, "ovs-vsctl wrote to stderr:\n%s\n", stderr_log); + } + free(msg); + + svec_destroy(&argv); + + free(stderr_log); + if (WIFEXITED(status) && !WEXITSTATUS(status)) { + return stdout_log; + } else { + free(stdout_log); + return NULL; + } +} + +static char * SENTINEL(0) +capture_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + return stdout_log; +} + +static bool SENTINEL(0) +run_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + bool ok; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + ok = stdout_log != NULL; + free(stdout_log); + return ok; +} static int lookup_brc_multicast_group(int *multicast_group) @@ -110,12 +181,12 @@ lookup_brc_multicast_group(int *multicast_group) struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; int retval; - retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + retval = nl_sock_create(NETLINK_GENERIC, &sock); if (retval) { return retval; } ofpbuf_init(&request, 0); - nl_msg_put_genlmsghdr(&request, sock, 0, brc_family, + nl_msg_put_genlmsghdr(&request, 0, brc_family, NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); retval = nl_sock_transact(sock, &request, &reply); ofpbuf_uninit(&request); @@ -155,454 +226,23 @@ brc_open(struct nl_sock **sock) return retval; } - retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock); + retval = nl_sock_create(NETLINK_GENERIC, sock); if (retval) { return retval; } - return 0; + retval = nl_sock_join_mcgroup(*sock, multicast_group); + if (retval) { + nl_sock_destroy(*sock); + *sock = NULL; + } + return retval; } static const struct nl_policy brc_dp_policy[] = { [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, }; -static struct ovsrec_bridge * -find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) -{ - size_t i; - - for (i = 0; i < ovs->n_bridges; i++) { - if (!strcmp(br_name, ovs->bridges[i]->name)) { - return ovs->bridges[i]; - } - } - - return NULL; -} - -static int -execute_appctl_command(const char *unixctl_command, char **output) -{ - char *stdout_log, *stderr_log; - int error, status; - char *argv[5]; - - argv[0] = "/bin/sh"; - argv[1] = "-c"; - argv[2] = xasprintf(appctl_command, unixctl_command); - argv[3] = NULL; - - /* Run process and log status. */ - error = process_run_capture(argv, &stdout_log, &stderr_log, &status); - if (error) { - VLOG_ERR("failed to execute %s command via ovs-appctl: %s", - unixctl_command, strerror(error)); - } else if (status) { - char *msg = process_status_msg(status); - VLOG_ERR("ovs-appctl exited with error (%s)", msg); - free(msg); - error = ECHILD; - } - - /* Deal with stdout_log. */ - if (output) { - *output = stdout_log; - } else { - free(stdout_log); - } - - /* Deal with stderr_log */ - if (stderr_log && *stderr_log) { - VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log); - } - free(stderr_log); - - free(argv[2]); - - return error; -} - -static void -do_get_bridge_parts(const struct ovsrec_bridge *br, struct svec *parts, - int vlan, bool break_down_bonds) -{ - struct svec ports; - size_t i, j; - - svec_init(&ports); - for (i = 0; i < br->n_ports; i++) { - const struct ovsrec_port *port = br->ports[i]; - - svec_add(&ports, port->name); - if (vlan >= 0) { - int port_vlan = port->n_tag ? *port->tag : 0; - if (vlan != port_vlan) { - continue; - } - } - if (break_down_bonds) { - for (j = 0; j < port->n_interfaces; j++) { - const struct ovsrec_interface *iface = port->interfaces[j]; - svec_add(parts, iface->name); - } - } else { - svec_add(parts, port->name); - } - } - svec_destroy(&ports); -} - -/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces - * down into their constituent parts. - * - * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, - * then only interfaces for trunk ports or ports with implicit VLAN 0 are - * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are - * reported. */ -static void -get_bridge_ifaces(const struct ovsrec_bridge *br, struct svec *ifaces, - int vlan) -{ - do_get_bridge_parts(br, ifaces, vlan, true); -} - -/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under - * the bond name, not broken down into their constituent interfaces. - * - * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then - * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, - * only port with implicit VLAN 'vlan' are reported. */ -static void -get_bridge_ports(const struct ovsrec_bridge *br, struct svec *ports, - int vlan) -{ - do_get_bridge_parts(br, ports, vlan, false); -} - -#if 0 -/* Go through the configuration file and remove any ports that no longer - * exist associated with a bridge. */ -static void -prune_ports(void) -{ - int i, j; - struct svec bridges, delete; - - if (cfg_lock(NULL, 0)) { - /* Couldn't lock config file. */ - return; - } - - svec_init(&bridges); - svec_init(&delete); - cfg_get_subsections(&bridges, "bridge"); - for (i=0; iheader_); -} - -static bool -port_is_fake_bridge(const struct ovsrec_port *port) -{ - return (port->fake_bridge - && port->tag - && *port->tag >= 1 && *port->tag <= 4095); -} - -static void -ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, - struct ovsrec_bridge *bridge) -{ - struct ovsrec_bridge **bridges; - size_t i; - - bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); - for (i = 0; i < ovs->n_bridges; i++) { - bridges[i] = ovs->bridges[i]; - } - bridges[ovs->n_bridges] = bridge; - ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); - free(bridges); -} - -static struct json * -where_uuid_equals(const struct uuid *uuid) -{ - return - json_array_create_1( - json_array_create_3( - json_string_create("_uuid"), - json_string_create("=="), - json_array_create_2( - json_string_create("uuid"), - json_string_create_nocopy( - xasprintf(UUID_FMT, UUID_ARGS(uuid)))))); -} - -/* Commits 'txn'. If 'wait_for_reload' is true, also waits for Open vSwitch to - reload the configuration before returning. - - Returns EAGAIN if the caller should try the operation again, 0 on success, - otherwise a positive errno value. */ -static int -commit_txn(struct ovsdb_idl_txn *txn, bool wait_for_reload) -{ - struct ovsdb_idl *idl = ovsdb_idl_txn_get_idl (txn); - enum ovsdb_idl_txn_status status; - int64_t next_cfg = 0; - - if (wait_for_reload) { - const struct ovsrec_open_vswitch *ovs = ovsrec_open_vswitch_first(idl); - struct json *where = where_uuid_equals(&ovs->header_.uuid); - ovsdb_idl_txn_increment(txn, "Open_vSwitch", "next_cfg", where); - json_destroy(where); - } - status = ovsdb_idl_txn_commit_block(txn); - if (wait_for_reload && status == TXN_SUCCESS) { - next_cfg = ovsdb_idl_txn_get_increment_new_value(txn); - } - ovsdb_idl_txn_destroy(txn); - - switch (status) { - case TXN_INCOMPLETE: - NOT_REACHED(); - - case TXN_ABORTED: - VLOG_ERR_RL(&rl, "OVSDB transaction unexpectedly aborted"); - return ECONNABORTED; - - case TXN_UNCHANGED: - return 0; - - case TXN_SUCCESS: - if (wait_for_reload) { - for (;;) { - /* We can't use 'ovs' any longer because ovsdb_idl_run() can - * destroy it. */ - const struct ovsrec_open_vswitch *ovs2; - - ovsdb_idl_run(idl); - OVSREC_OPEN_VSWITCH_FOR_EACH (ovs2, idl) { - if (ovs2->cur_cfg >= next_cfg) { - goto done; - } - } - ovsdb_idl_wait(idl); - poll_block(); - } - done: ; - } - return 0; - - case TXN_TRY_AGAIN: - VLOG_ERR_RL(&rl, "OVSDB transaction needs retry"); - return EAGAIN; - - case TXN_ERROR: - VLOG_ERR_RL(&rl, "OVSDB transaction failed: %s", - ovsdb_idl_txn_get_error(txn)); - return EBUSY; - - default: - NOT_REACHED(); - } -} - -static int -add_bridge(struct ovsdb_idl *idl, const struct ovsrec_open_vswitch *ovs, - const char *br_name) -{ - struct ovsrec_bridge *br; - struct ovsrec_port *port; - struct ovsrec_interface *iface; - struct ovsdb_idl_txn *txn; - - if (find_bridge(ovs, br_name)) { - VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); - return EEXIST; - } else if (netdev_exists(br_name)) { - size_t i; - - for (i = 0; i < ovs->n_bridges; i++) { - size_t j; - struct ovsrec_bridge *br_cfg = ovs->bridges[i]; - - for (j = 0; j < br_cfg->n_ports; j++) { - if (port_is_fake_bridge(br_cfg->ports[j])) { - VLOG_WARN("addbr %s: %s exists as a fake bridge", - br_name, br_name); - return 0; - } - } - } - - VLOG_WARN("addbr %s: cannot create bridge %s because a network " - "device named %s already exists", - br_name, br_name, br_name); - return EEXIST; - } - - txn = ovsdb_idl_txn_create(idl); - - iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); - ovsrec_interface_set_name(iface, br_name); - - port = ovsrec_port_insert(txn_from_openvswitch(ovs)); - ovsrec_port_set_name(port, br_name); - ovsrec_port_set_interfaces(port, &iface, 1); - - br = ovsrec_bridge_insert(txn_from_openvswitch(ovs)); - ovsrec_bridge_set_name(br, br_name); - ovsrec_bridge_set_ports(br, &port, 1); - - ovs_insert_bridge(ovs, br); - - return commit_txn(txn, true); -} - -static void -add_port(const struct ovsrec_open_vswitch *ovs, - const struct ovsrec_bridge *br, const char *port_name) -{ - struct ovsrec_interface *iface; - struct ovsrec_port *port; - struct ovsrec_port **ports; - size_t i; - - /* xxx Check conflicts? */ - iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); - ovsrec_interface_set_name(iface, port_name); - - port = ovsrec_port_insert(txn_from_openvswitch(ovs)); - ovsrec_port_set_name(port, port_name); - ovsrec_port_set_interfaces(port, &iface, 1); - - ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); - for (i = 0; i < br->n_ports; i++) { - ports[i] = br->ports[i]; - } - ports[br->n_ports] = port; - ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); - free(ports); -} - -static void -del_port(const struct ovsrec_bridge *br, const char *port_name) -{ - size_t i, j; - struct ovsrec_port *port_rec = NULL; - - for (i = 0; i < br->n_ports; i++) { - struct ovsrec_port *port = br->ports[i]; - if (!strcmp(port_name, port->name)) { - port_rec = port; - } - for (j = 0; j < port->n_interfaces; j++) { - struct ovsrec_interface *iface = port->interfaces[j]; - if (!strcmp(port_name, iface->name)) { - ovsrec_interface_delete(iface); - } - } - } - - /* xxx Probably can move this into the "for" loop. */ - if (port_rec) { - struct ovsrec_port **ports; - size_t n; - - ports = xmalloc(sizeof *br->ports * br->n_ports); - for (i = n = 0; i < br->n_ports; i++) { - if (br->ports[i] != port_rec) { - ports[n++] = br->ports[i]; - } - } - ovsrec_bridge_set_ports(br, ports, n); - free(ports); - - ovsrec_port_delete(port_rec); - } -} - -static int -del_bridge(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, const char *br_name) -{ - struct ovsrec_bridge *br = find_bridge(ovs, br_name); - struct ovsrec_bridge **bridges; - struct ovsdb_idl_txn *txn; - size_t i, n; - - if (!br) { - VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); - return ENXIO; - } - - txn = ovsdb_idl_txn_create(idl); - - del_port(br, br_name); - - bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); - for (i = n = 0; i < ovs->n_bridges; i++) { - if (ovs->bridges[i] != br) { - bridges[n++] = ovs->bridges[i]; - } - } - ovsrec_open_vswitch_set_bridges(ovs, bridges, n); - free(bridges); - - /* Delete the bridge itself. */ - ovsrec_bridge_delete(br); - - return commit_txn(txn, true); -} - static int parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, const char **port_name, uint64_t *count, uint64_t *skip) @@ -647,7 +287,7 @@ static struct ofpbuf * compose_reply(uint32_t seq, int error) { struct ofpbuf *reply = ofpbuf_new(4096); - nl_msg_put_genlmsghdr(reply, brc_sock, 32, brc_family, NLM_F_REQUEST, + nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST, BRC_GENL_C_DP_RESULT, 1); ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq; nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); @@ -675,9 +315,7 @@ send_simple_reply(uint32_t seq, int error) } static int -handle_bridge_cmd(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer, bool add) +handle_bridge_cmd(struct ofpbuf *buffer, bool add) { const char *br_name; uint32_t seq; @@ -685,14 +323,14 @@ handle_bridge_cmd(struct ovsdb_idl *idl, error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); if (!error) { - int retval; - - do { - retval = (add ? add_bridge : del_bridge)(idl, ovs, br_name); - VLOG_INFO_RL(&rl, "%sbr %s: %s", - add ? "add" : "del", br_name, strerror(retval)); - } while (retval == EAGAIN); - + const char *vsctl_cmd = add ? "add-br" : "del-br"; + const char *brctl_cmd = add ? "addbr" : "delbr"; + if (!run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", vsctl_cmd, br_name, + "--", "comment", "ovs-brcompatd:", brctl_cmd, br_name, + (char *) NULL)) { + error = add ? EEXIST : ENXIO; + } send_simple_reply(seq, error); } return error; @@ -704,87 +342,79 @@ static const struct nl_policy brc_port_policy[] = { }; static int -handle_port_cmd(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer, bool add) +handle_port_cmd(struct ofpbuf *buffer, bool add) { - const char *cmd_name = add ? "add-if" : "del-if"; const char *br_name, *port_name; uint32_t seq; int error; error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); if (!error) { - struct ovsrec_bridge *br = find_bridge(ovs, br_name); - - if (!br) { - VLOG_WARN("%s %s %s: no bridge named %s", - cmd_name, br_name, port_name, br_name); - error = EINVAL; - } else if (!netdev_exists(port_name)) { - VLOG_WARN("%s %s %s: no network device named %s", - cmd_name, br_name, port_name, port_name); + const char *vsctl_cmd = add ? "add-port" : "del-port"; + const char *brctl_cmd = add ? "addif" : "delif"; + if (!run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", vsctl_cmd, br_name, port_name, + "--", "comment", "ovs-brcompatd:", brctl_cmd, + br_name, port_name, (char *) NULL)) { error = EINVAL; - } else { - do { - struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl); - if (add) { - add_port(ovs, br, port_name); - } else { - del_port(br, port_name); - } - error = commit_txn(txn, true); - VLOG_INFO_RL(&rl, "%s %s %s: %s", - cmd_name, br_name, port_name, strerror(error)); - } while (error == EAGAIN); } send_simple_reply(seq, error); } - return error; } -/* The caller is responsible for freeing '*ovs_name' if the call is - * successful. */ -static int -linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs, - const char *linux_name, - const struct ovsrec_bridge **ovs_bridge, - int *br_vlan) +static char * +linux_bridge_to_ovs_bridge(const char *linux_name, int *br_vlanp) { - *ovs_bridge = find_bridge(ovs, linux_name); - if (*ovs_bridge) { - /* Bridge name is the same. We are interested in VLAN 0. */ - *br_vlan = 0; - return 0; - } else { - /* No such Open vSwitch bridge 'linux_name', but there might be an - * internal port named 'linux_name' on some other bridge - * 'ovs_bridge'. If so then we are interested in the VLAN assigned to - * port 'linux_name' on the bridge named 'ovs_bridge'. */ - size_t i, j; - - for (i = 0; i < ovs->n_bridges; i++) { - const struct ovsrec_bridge *br = ovs->bridges[i]; - - for (j = 0; j < br->n_ports; j++) { - const struct ovsrec_port *port = br->ports[j]; - - if (!strcmp(port->name, linux_name)) { - *ovs_bridge = br; - *br_vlan = port->n_tag ? *port->tag : -1; - return 0; - } - } + char *save_ptr = NULL; + const char *br_name, *br_vlan; + char *br_name_copy; + char *output; - } - return ENODEV; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", "br-to-parent", linux_name, + "--", "br-to-vlan", linux_name, + (char *) NULL); + if (!output) { + return NULL; + } + + br_name = strtok_r(output, " \t\r\n", &save_ptr); + br_vlan = strtok_r(NULL, " \t\r\n", &save_ptr); + if (!br_name || !br_vlan) { + free(output); + return NULL; + } + br_name_copy = xstrdup(br_name); + *br_vlanp = atoi(br_vlan); + + free(output); + + return br_name_copy; +} + +static void +get_bridge_ifaces(const char *br_name, struct sset *ifaces) +{ + char *save_ptr = NULL; + char *output; + char *iface; + + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ifaces", + br_name, (char *) NULL); + if (!output) { + return; } + + for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface; + iface = strtok_r(NULL, " \t\r\n", &save_ptr)) { + sset_add(ifaces, iface); + } + free(output); } static int -handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_fdb_query_cmd(struct ofpbuf *buffer) { /* This structure is copied directly from the Linux 2.6.30 header files. * It would be more straightforward to #include , but @@ -813,54 +443,53 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, * pretend that the former is the case even though the latter is the * implementation. */ const char *linux_name; /* Name used by brctl. */ - const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */ int br_vlan; /* VLAN tag. */ - struct svec ifaces; + struct sset ifaces; struct ofpbuf query_data; + const char *iface_name; struct ofpbuf *reply; - char *unixctl_command; uint64_t count, skip; + char *br_name; char *output; char *save_ptr; uint32_t seq; int error; - /* Parse the command received from brcompat_mod. */ + /* Parse the command received from brcompat. */ error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip); if (error) { return error; } /* Figure out vswitchd bridge and VLAN. */ - error = linux_bridge_to_ovs_bridge(ovs, linux_name, - &ovs_bridge, &br_vlan); - if (error) { + br_name = linux_bridge_to_ovs_bridge(linux_name, &br_vlan); + if (!br_name) { + error = EINVAL; send_simple_reply(seq, error); return error; } /* Fetch the forwarding database using ovs-appctl. */ - unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name); - error = execute_appctl_command(unixctl_command, &output); - free(unixctl_command); - if (error) { + output = capture_vsctl(appctl_program, "fdb/show", br_name, + (char *) NULL); + if (!output) { + error = ECHILD; send_simple_reply(seq, error); return error; } /* Fetch the MAC address for each interface on the bridge, so that we can * fill in the is_local field in the response. */ - svec_init(&ifaces); - get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); - local_macs = xmalloc(ifaces.n * sizeof *local_macs); + sset_init(&ifaces); + get_bridge_ifaces(linux_name, &ifaces); + local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs); n_local_macs = 0; - for (i = 0; i < ifaces.n; i++) { - const char *iface_name = ifaces.names[i]; + SSET_FOR_EACH (iface_name, &ifaces) { struct mac *mac = &local_macs[n_local_macs]; struct netdev *netdev; - error = netdev_open_default(iface_name, &netdev); + error = netdev_open(iface_name, "system", &netdev); if (!error) { if (!netdev_get_etheraddr(netdev, mac->addr)) { n_local_macs++; @@ -868,7 +497,7 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, netdev_close(netdev); } } - svec_destroy(&ifaces); + sset_destroy(&ifaces); /* Parse the response from ovs-appctl and convert it to binary format to * pass back to the kernel. */ @@ -890,7 +519,7 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) != 2 + ETH_ADDR_SCAN_COUNT + 1) { - struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); continue; } @@ -933,28 +562,32 @@ handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, /* Free memory. */ ofpbuf_uninit(&query_data); + free(local_macs); return 0; } static void -send_ifindex_reply(uint32_t seq, struct svec *ifaces) +send_ifindex_reply(uint32_t seq, char *output) { + size_t allocated_indices; + char *save_ptr = NULL; struct ofpbuf *reply; const char *iface; size_t n_indices; int *indices; - size_t i; - /* Make sure that any given interface only occurs once. This shouldn't - * happen, but who knows what people put into their configuration files. */ - svec_sort_unique(ifaces); + indices = NULL; + n_indices = allocated_indices = 0; + for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface; + iface = strtok_r(NULL, " \t\r\n", &save_ptr)) { + int ifindex; + + if (n_indices >= allocated_indices) { + indices = x2nrealloc(indices, &allocated_indices, sizeof *indices); + } - /* Convert 'ifaces' into ifindexes. */ - n_indices = 0; - indices = xmalloc(ifaces->n * sizeof *indices); - SVEC_FOR_EACH (i, iface, ifaces) { - int ifindex = if_nametoindex(iface); + ifindex = if_nametoindex(iface); if (ifindex) { indices[n_indices++] = ifindex; } @@ -971,14 +604,10 @@ send_ifindex_reply(uint32_t seq, struct svec *ifaces) } static int -handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_get_bridges_cmd(struct ofpbuf *buffer) { - struct svec bridges; - size_t i, j; - + char *output; uint32_t seq; - int error; /* Parse Netlink command. @@ -990,39 +619,22 @@ handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, return error; } - /* Get all the real bridges and all the fake ones. */ - svec_init(&bridges); - for (i = 0; i < ovs->n_bridges; i++) { - const struct ovsrec_bridge *br = ovs->bridges[i]; - - svec_add(&bridges, br->name); - for (j = 0; j < br->n_ports; j++) { - const struct ovsrec_port *port = br->ports[j]; - - if (port->fake_bridge) { - svec_add(&bridges, port->name); - } - } + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-br", (char *) NULL); + if (!output) { + return ENODEV; } - send_ifindex_reply(seq, &bridges); - svec_destroy(&bridges); - + send_ifindex_reply(seq, output); + free(output); return 0; } static int -handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, - struct ofpbuf *buffer) +handle_get_ports_cmd(struct ofpbuf *buffer) { - uint32_t seq; - const char *linux_name; - const struct ovsrec_bridge *ovs_bridge; - int br_vlan; - - struct svec ports; - + uint32_t seq; + char *output; int error; /* Parse Netlink command. */ @@ -1031,95 +643,114 @@ handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, return error; } - error = linux_bridge_to_ovs_bridge(ovs, linux_name, - &ovs_bridge, &br_vlan); - if (error) { - send_simple_reply(seq, error); - return error; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ports", linux_name, + (char *) NULL); + if (!output) { + return ENODEV; } - svec_init(&ports); - get_bridge_ports(ovs_bridge, &ports, br_vlan); - svec_sort(&ports); - svec_del(&ports, linux_name); - send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ - svec_destroy(&ports); - + send_ifindex_reply(seq, output); + free(output); return 0; } -static void -brc_recv_update(struct ovsdb_idl *idl) +static bool +brc_recv_update__(struct ofpbuf *buffer) { - int retval; - struct ofpbuf *buffer; - struct genlmsghdr *genlmsghdr; - const struct ovsrec_open_vswitch *ovs; - - buffer = NULL; - do { - ofpbuf_delete(buffer); - retval = nl_sock_recv(brc_sock, &buffer, false); - } while (retval == ENOBUFS - || (!retval - && (nl_msg_nlmsgerr(buffer, NULL) - || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE))); - if (retval) { - if (retval != EAGAIN) { + for (;;) { + int retval = nl_sock_recv(brc_sock, buffer, false); + switch (retval) { + case 0: + if (nl_msg_nlmsgerr(buffer, NULL) + || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) { + break; + } + return true; + + case ENOBUFS: + break; + + case EAGAIN: + return false; + + default: VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); + return false; } - return; } +} + +static void +brc_recv_update(void) +{ + struct genlmsghdr *genlmsghdr; + uint64_t buffer_stub[1024 / 8]; + struct ofpbuf buffer; - genlmsghdr = nl_msg_genlmsghdr(buffer); + ofpbuf_use_stub(&buffer, buffer_stub, sizeof buffer_stub); + if (!brc_recv_update__(&buffer)) { + goto error; + } + + genlmsghdr = nl_msg_genlmsghdr(&buffer); if (!genlmsghdr) { VLOG_WARN_RL(&rl, "received packet too short for generic NetLink"); goto error; } - if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) { + if (nl_msg_nlmsghdr(&buffer)->nlmsg_type != brc_family) { VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)", - nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family); + nl_msg_nlmsghdr(&buffer)->nlmsg_type, brc_family); goto error; } - /* Get the Open vSwitch configuration. Just drop the request on the floor - * if a valid configuration doesn't exist. (We could check this earlier, - * but we want to drain pending Netlink messages even when there is no Open - * vSwitch configuration.) */ - ovs = ovsrec_open_vswitch_first(idl); - if (!ovs) { - VLOG_WARN_RL(&rl, "could not find valid configuration to update"); - goto error; - } + /* Service all pending network device notifications before executing the + * command. This is very important to avoid a race in a scenario like the + * following, which is what happens with XenServer Tools version 5.0.0 + * during boot of a Windows VM: + * + * 1. Create tap1.0 and vif1.0. + * 2. Delete tap1.0. + * 3. Delete vif1.0. + * 4. Re-create vif1.0. + * + * We must process the network device notification from step 3 before we + * process the brctl command from step 4. If we process them in the + * reverse order, then step 4 completes as a no-op but step 3 then deletes + * the port that was just added. + * + * (XenServer Tools 5.5.0 does not exhibit this behavior, and neither does + * a VM without Tools installed at all.) + */ + rtnetlink_link_run(); switch (genlmsghdr->cmd) { case BRC_GENL_C_DP_ADD: - handle_bridge_cmd(idl, ovs, buffer, true); + handle_bridge_cmd(&buffer, true); break; case BRC_GENL_C_DP_DEL: - handle_bridge_cmd(idl, ovs, buffer, false); + handle_bridge_cmd(&buffer, false); break; case BRC_GENL_C_PORT_ADD: - handle_port_cmd(idl, ovs, buffer, true); + handle_port_cmd(&buffer, true); break; case BRC_GENL_C_PORT_DEL: - handle_port_cmd(idl, ovs, buffer, false); + handle_port_cmd(&buffer, false); break; case BRC_GENL_C_FDB_QUERY: - handle_fdb_query_cmd(ovs, buffer); + handle_fdb_query_cmd(&buffer); break; case BRC_GENL_C_GET_BRIDGES: - handle_get_bridges_cmd(ovs, buffer); + handle_get_bridges_cmd(&buffer); break; case BRC_GENL_C_GET_PORTS: - handle_get_ports_cmd(ovs, buffer); + handle_get_ports_cmd(&buffer); break; default: @@ -1129,146 +760,55 @@ brc_recv_update(struct ovsdb_idl *idl) } error: - ofpbuf_delete(buffer); - return; + ofpbuf_uninit(&buffer); } -/* Check for interface configuration changes announced through RTNL. */ static void -rtnl_recv_update(struct ovsdb_idl *idl, - const struct ovsrec_open_vswitch *ovs) +netdev_changed_cb(const struct rtnetlink_link_change *change, + void *aux OVS_UNUSED) { - struct ofpbuf *buf; + char br_name[IFNAMSIZ]; + const char *port_name; - int error = nl_sock_recv(rtnl_sock, &buf, false); - if (error == EAGAIN) { - /* Nothing to do. */ - } else if (error == ENOBUFS) { + if (!change) { VLOG_WARN_RL(&rl, "network monitor socket overflowed"); - } else if (error) { - VLOG_WARN_RL(&rl, "error on network monitor socket: %s", - strerror(error)); - } else { - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - struct nlmsghdr *nlh; - struct ifinfomsg *iim; - - nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); - iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); - if (!iim) { - VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); - ofpbuf_delete(buf); - return; - } - - if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - VLOG_WARN_RL(&rl,"received bad rtnl message (policy)"); - ofpbuf_delete(buf); - return; - } - if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) { - const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); - char br_name[IFNAMSIZ]; - uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - - if (!if_indextoname(br_idx, br_name)) { - ofpbuf_delete(buf); - return; - } + return; + } - if (!netdev_exists(port_name)) { - /* Network device is really gone. */ - struct ovsdb_idl_txn *txn; - struct ovsrec_bridge *br; - - VLOG_INFO("network device %s destroyed, " - "removing from bridge %s", port_name, br_name); - - br = find_bridge(ovs, br_name); - if (!br) { - VLOG_WARN("no bridge named %s from which to remove %s", - br_name, port_name); - ofpbuf_delete(buf); - return; - } - - txn = ovsdb_idl_txn_create(idl); - del_port(br, port_name); - commit_txn(txn, false); - } else { - /* A network device by that name exists even though the kernel - * told us it had disappeared. Probably, what happened was - * this: - * - * 1. Device destroyed. - * 2. Notification sent to us. - * 3. New device created with same name as old one. - * 4. ovs-brcompatd notified, removes device from bridge. - * - * There's no a priori reason that in this situation that the - * new device with the same name should remain in the bridge; - * on the contrary, that would be unexpected. *But* there is - * one important situation where, if we do this, bad things - * happen. This is the case of XenServer Tools version 5.0.0, - * which on boot of a Windows VM cause something like this to - * happen on the Xen host: - * - * i. Create tap1.0 and vif1.0. - * ii. Delete tap1.0. - * iii. Delete vif1.0. - * iv. Re-create vif1.0. - * - * (XenServer Tools 5.5.0 does not exhibit this behavior, and - * neither does a VM without Tools installed at all.@.) - * - * Steps iii and iv happen within a few seconds of each other. - * Step iv causes /etc/xensource/scripts/vif to run, which in - * turn calls ovs-cfg-mod to add the new device to the bridge. - * If step iv happens after step 4 (in our first list of - * steps), then all is well, but if it happens between 3 and 4 - * (which can easily happen if ovs-brcompatd has to wait to - * lock the configuration file), then we will remove the new - * incarnation from the bridge instead of the old one! - * - * So, to avoid this problem, we do nothing here. This is - * strictly incorrect except for this one particular case, and - * perhaps that will bite us someday. If that happens, then we - * will have to somehow track network devices by ifindex, since - * a new device will have a new ifindex even if it has the same - * name as an old device. - */ - VLOG_INFO("kernel reported network device %s removed but " - "a device by that name exists (XS Tools 5.0.0?)", - port_name); - } - } - ofpbuf_delete(buf); + if (change->nlmsg_type != RTM_DELLINK || !change->master_ifindex) { + return; } + + port_name = change->ifname; + if (!if_indextoname(change->master_ifindex, br_name)) { + return; + } + + VLOG_INFO("network device %s destroyed, removing from bridge %s", + port_name, br_name); + + run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", "--if-exists", "del-port", port_name, + "--", "comment", "ovs-brcompatd:", port_name, "disappeared", + (char *) NULL); } int main(int argc, char *argv[]) { + extern struct vlog_module VLM_reconnect; + struct nln_notifier *link_notifier; struct unixctl_server *unixctl; - const char *remote; - struct ovsdb_idl *idl; int retval; proctitle_init(argc, argv); set_program_name(argv[0]); - time_init(); - vlog_init(); - vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_WARN); - vlog_set_levels(VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); + vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); - remote = parse_options(argc, argv); + parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); process_init(); - ovsrec_init(); - die_if_already_running(); daemonize_start(); retval = unixctl_server_create(NULL, &unixctl); @@ -1277,112 +817,57 @@ main(int argc, char *argv[]) } if (brc_open(&brc_sock)) { - ovs_fatal(0, "could not open brcompat socket. Check " - "\"brcompat\" kernel module."); + VLOG_FATAL("could not open brcompat socket. Check " + "\"brcompat\" kernel module."); } - if (prune_timeout) { - if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) { - ovs_fatal(0, "could not create rtnetlink socket"); - } - } + link_notifier = rtnetlink_link_notifier_create(netdev_changed_cb, NULL); daemonize_complete(); - idl = ovsdb_idl_create(remote, &ovsrec_idl_class); - for (;;) { - const struct ovsrec_open_vswitch *ovs; - - ovsdb_idl_run(idl); - unixctl_server_run(unixctl); - brc_recv_update(idl); + rtnetlink_link_run(); + brc_recv_update(); - ovs = ovsrec_open_vswitch_first(idl); - if (!ovs && ovsdb_idl_has_ever_connected(idl)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch " - "configuration", remote); - } netdev_run(); - /* If 'prune_timeout' is non-zero, we actively prune from the - * configuration of port entries that are no longer valid. We - * use two methods: - * - * 1) The kernel explicitly notifies us of removed ports - * through the RTNL messages. - * - * 2) We periodically check all ports associated with bridges - * to see if they no longer exist. - */ - if (ovs && prune_timeout) { - rtnl_recv_update(idl, ovs); -#if 0 - prune_ports(); -#endif - - nl_sock_wait(rtnl_sock, POLLIN); - poll_timer_wait(prune_timeout); - } - - nl_sock_wait(brc_sock, POLLIN); - ovsdb_idl_wait(idl); unixctl_server_wait(unixctl); + rtnetlink_link_wait(); netdev_wait(); poll_block(); } - ovsdb_idl_destroy(idl); + rtnetlink_link_notifier_destroy(link_notifier); return 0; } static void -validate_appctl_command(void) -{ - const char *p; - int n; - - n = 0; - for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) { - if (p[1] == '%') { - /* Nothing to do. */ - } else if (p[1] == 's') { - n++; - } else { - ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command"); - } - } - if (n != 1) { - ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command"); - } -} - -static const char * parse_options(int argc, char *argv[]) { enum { - OPT_PRUNE_TIMEOUT, - OPT_APPCTL_COMMAND, + OPT_APPCTL, + OPT_VSCTL, VLOG_OPTION_ENUMS, - LEAK_CHECKER_OPTION_ENUMS + LEAK_CHECKER_OPTION_ENUMS, + DAEMON_OPTION_ENUMS }; static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"version", no_argument, 0, 'V'}, - {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, - {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"appctl", required_argument, NULL, OPT_APPCTL}, + {"vsctl", required_argument, NULL, OPT_VSCTL}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, LEAK_CHECKER_LONG_OPTIONS, - {0, 0, 0, 0}, + {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); + const char *appctl = "ovs-appctl"; + const char *vsctl = "ovs-vsctl"; - appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir); for (;;) { int c; @@ -1392,20 +877,19 @@ parse_options(int argc, char *argv[]) } switch (c) { - case 'H': case 'h': usage(); case 'V': - OVS_PRINT_VERSION(0, 0); + ovs_print_version(0, 0); exit(EXIT_SUCCESS); - case OPT_PRUNE_TIMEOUT: - prune_timeout = atoi(optarg) * 1000; + case OPT_APPCTL: + appctl = optarg; break; - case OPT_APPCTL_COMMAND: - appctl_command = optarg; + case OPT_VSCTL: + vsctl = optarg; break; VLOG_OPTION_HANDLERS @@ -1421,29 +905,33 @@ parse_options(int argc, char *argv[]) } free(short_options); - validate_appctl_command(); - - argc -= optind; - argv += optind; + appctl_program = process_search_path(appctl); + if (!appctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --appctl to specify an " + "alternate location)", appctl); + } - if (argc != 1) { - ovs_fatal(0, "database socket is non-option argument; " - "use --help for usage"); + vsctl_program = process_search_path(vsctl); + if (!vsctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --vsctl to specify an " + "alternate location)", vsctl); } - return argv[0]; + if (argc != optind) { + VLOG_FATAL("no non-option arguments are supported; " + "use --help for usage"); + } } static void usage(void) { printf("%s: bridge compatibility front-end for ovs-vswitchd\n" - "usage: %s [OPTIONS] CONFIG\n" - "CONFIG is the configuration file used by ovs-vswitchd.\n", + "usage: %s [OPTIONS]\n", program_name, program_name); printf("\nConfiguration options:\n" - " --appctl-command=COMMAND shell command to run ovs-appctl\n" - " --prune-timeout=SECS wait at most SECS before pruning ports\n" + " --appctl=PROGRAM overrides $PATH for finding ovs-appctl\n" + " --vsctl=PROGRAM overrides $PATH for finding ovs-vsctl\n" ); daemon_usage(); vlog_usage(); @@ -1451,6 +939,5 @@ usage(void) " -h, --help display this help message\n" " -V, --version display version information\n"); leak_checker_usage(); - printf("\nThe default appctl command is:\n%s\n", appctl_command); exit(EXIT_SUCCESS); }