X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=9d5cdfb13d2686cdd9cd2dcddd3eba7da3eaea8e;hb=e534fe1bcf37ca9333356d422d9996ca12845302;hp=9254c5888686fc091273e2c4479e73cc30005dc4;hpb=110d69af04a250cc2560644edd65a27158fa45e3;p=sliver-openvswitch.git diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 9254c5888..9d5cdfb13 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008, 2009 Nicira Networks +/* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,31 +33,32 @@ #include #include -#include "cfg.h" #include "command-line.h" #include "coverage.h" #include "daemon.h" #include "dirs.h" -#include "dpif.h" #include "dynamic-string.h" #include "fatal-signal.h" -#include "fault.h" +#include "json.h" #include "leak-checker.h" #include "netdev.h" #include "netlink.h" +#include "netlink-socket.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" +#include "ovsdb-idl.h" #include "packets.h" #include "poll-loop.h" #include "process.h" #include "signals.h" -#include "svec.h" +#include "sset.h" #include "timeval.h" #include "unixctl.h" #include "util.h" - #include "vlog.h" -#define THIS_MODULE VLM_brcompatd +#include "vswitchd/vswitch-idl.h" + +VLOG_DEFINE_THIS_MODULE(brcompatd); /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ @@ -70,22 +71,15 @@ enum bmc_action { BMC_DEL_PORT }; -static void parse_options(int argc, char *argv[]); +static const char *parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Maximum number of milliseconds to wait for the config file to be - * unlocked. If set to zero, no waiting will occur. */ -static int lock_timeout = 500; - -/* Maximum number of milliseconds to wait before pruning port entries that +/* Maximum number of milliseconds to wait before pruning port entries that * no longer exist. If set to zero, ports are never pruned. */ static int prune_timeout = 5000; -/* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */ -static char *config_file; - /* Shell command to execute (via popen()) to send a control command to the * running ovs-vswitchd process. The string must contain one instance of %s, * which is replaced by the control command. */ @@ -117,12 +111,12 @@ lookup_brc_multicast_group(int *multicast_group) struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; int retval; - retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + retval = nl_sock_create(NETLINK_GENERIC, &sock); if (retval) { return retval; } ofpbuf_init(&request, 0); - nl_msg_put_genlmsghdr(&request, sock, 0, brc_family, + nl_msg_put_genlmsghdr(&request, 0, brc_family, NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); retval = nl_sock_transact(sock, &request, &reply); ofpbuf_uninit(&request); @@ -162,22 +156,35 @@ brc_open(struct nl_sock **sock) return retval; } - retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock); + retval = nl_sock_create(NETLINK_GENERIC, sock); if (retval) { return retval; } - return 0; + retval = nl_sock_join_mcgroup(*sock, multicast_group); + if (retval) { + nl_sock_destroy(*sock); + *sock = NULL; + } + return retval; } static const struct nl_policy brc_dp_policy[] = { [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, }; -static bool -bridge_exists(const char *name) +static struct ovsrec_bridge * +find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) { - return cfg_has_section("bridge.%s", name); + size_t i; + + for (i = 0; i < ovs->n_bridges; i++) { + if (!strcmp(br_name, ovs->bridges[i]->name)) { + return ovs->bridges[i]; + } + } + + return NULL; } static int @@ -222,190 +229,366 @@ execute_appctl_command(const char *unixctl_command, char **output) return error; } -static int -rewrite_and_reload_config(void) +static void +do_get_bridge_parts(const struct ovsrec_bridge *br, struct sset *parts, + int vlan, bool break_down_bonds) { - if (cfg_is_dirty()) { - int error1 = cfg_write(); - int error2 = cfg_read(); - long long int reload_start = time_msec(); - int error3 = execute_appctl_command("vswitchd/reload", NULL); - long long int elapsed = time_msec() - reload_start; - COVERAGE_INC(brcompatd_reload); - if (elapsed > 0) { - VLOG_INFO("reload command executed in %lld ms", elapsed); + size_t i, j; + + for (i = 0; i < br->n_ports; i++) { + const struct ovsrec_port *port = br->ports[i]; + + if (vlan >= 0) { + int port_vlan = port->n_tag ? *port->tag : 0; + if (vlan != port_vlan) { + continue; + } + } + if (break_down_bonds) { + for (j = 0; j < port->n_interfaces; j++) { + const struct ovsrec_interface *iface = port->interfaces[j]; + sset_add(parts, iface->name); + } + } else { + sset_add(parts, port->name); } - return error1 ? error1 : error2 ? error2 : error3; } - return 0; } -/* Get all the interfaces for 'bridge' as 'ifaces', breaking bonded interfaces +/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces * down into their constituent parts. * * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, * then only interfaces for trunk ports or ports with implicit VLAN 0 are - * reported. If 'vlan' > 0, only interfaces with implict VLAN 'vlan' are + * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are * reported. */ static void -get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan) +get_bridge_ifaces(const struct ovsrec_bridge *br, struct sset *ifaces, + int vlan) { - struct svec ports; - int i; + do_get_bridge_parts(br, ifaces, vlan, true); +} - svec_init(&ports); - svec_init(ifaces); - cfg_get_all_keys(&ports, "bridge.%s.port", bridge); - for (i = 0; i < ports.n; i++) { - const char *port_name = ports.names[i]; - if (vlan >= 0) { - int port_vlan = cfg_get_vlan(0, "vlan.%s.tag", port_name); - if (port_vlan < 0) { - port_vlan = 0; - } - if (vlan != port_vlan) { - continue; - } - } - if (cfg_has_section("bonding.%s", port_name)) { - struct svec slaves; - svec_init(&slaves); - cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name); - svec_append(ifaces, &slaves); - svec_destroy(&slaves); - } else { - svec_add(ifaces, port_name); - } - } - svec_destroy(&ports); +/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under + * the bond name, not broken down into their constituent interfaces. + * + * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then + * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, + * only port with implicit VLAN 'vlan' are reported. */ +static void +get_bridge_ports(const struct ovsrec_bridge *br, struct sset *ports, + int vlan) +{ + do_get_bridge_parts(br, ports, vlan, false); +} + +static struct ovsdb_idl_txn * +txn_from_openvswitch(const struct ovsrec_open_vswitch *ovs) +{ + return ovsdb_idl_txn_get(&ovs->header_); +} + +static bool +port_is_fake_bridge(const struct ovsrec_port *port) +{ + return (port->fake_bridge + && port->tag + && *port->tag >= 1 && *port->tag <= 4095); } -/* Go through the configuration file and remove any ports that no longer - * exist associated with a bridge. */ static void -prune_ports(void) +ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, + struct ovsrec_bridge *bridge) { - int i, j; - int error; - struct svec bridges, delete; + struct ovsrec_bridge **bridges; + size_t i; - if (cfg_lock(NULL, 0)) { - /* Couldn't lock config file. */ - return; + bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); + for (i = 0; i < ovs->n_bridges; i++) { + bridges[i] = ovs->bridges[i]; } + bridges[ovs->n_bridges] = bridge; + ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); + free(bridges); +} - svec_init(&bridges); - svec_init(&delete); - cfg_get_subsections(&bridges, "bridge"); - for (i=0; iheader_.uuid); + ovsdb_idl_txn_increment(txn, "Open_vSwitch", "next_cfg", where); + json_destroy(where); + } + status = ovsdb_idl_txn_commit_block(txn); + if (wait_for_reload && status == TXN_SUCCESS) { + next_cfg = ovsdb_idl_txn_get_increment_new_value(txn); + } + ovsdb_idl_txn_destroy(txn); + + switch (status) { + case TXN_INCOMPLETE: + NOT_REACHED(); + + case TXN_ABORTED: + VLOG_ERR_RL(&rl, "OVSDB transaction unexpectedly aborted"); + return ECONNABORTED; + + case TXN_UNCHANGED: + return 0; + + case TXN_SUCCESS: + if (wait_for_reload) { + for (;;) { + /* We can't use 'ovs' any longer because ovsdb_idl_run() can + * destroy it. */ + const struct ovsrec_open_vswitch *ovs2; + + ovsdb_idl_run(idl); + OVSREC_OPEN_VSWITCH_FOR_EACH (ovs2, idl) { + if (ovs2->cur_cfg >= next_cfg) { + goto done; + } + } + ovsdb_idl_wait(idl); + poll_block(); } + done: ; } - svec_destroy(&ifaces); + return 0; + + case TXN_TRY_AGAIN: + VLOG_ERR_RL(&rl, "OVSDB transaction needs retry"); + return EAGAIN; + + case TXN_ERROR: + VLOG_ERR_RL(&rl, "OVSDB transaction failed: %s", + ovsdb_idl_txn_get_error(txn)); + return EBUSY; + + default: + NOT_REACHED(); } - svec_destroy(&bridges); +} + +static int +add_bridge(struct ovsdb_idl *idl, const struct ovsrec_open_vswitch *ovs, + const char *br_name) +{ + struct ovsrec_bridge *br; + struct ovsrec_port *port; + struct ovsrec_interface *iface; + struct ovsdb_idl_txn *txn; - if (delete.n) { + if (find_bridge(ovs, br_name)) { + VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); + return EEXIST; + } else if (netdev_exists(br_name)) { size_t i; - for (i = 0; i < delete.n; i++) { - cfg_del_match("bridge.*.port=%s", delete.names[i]); - cfg_del_match("bonding.*.slave=%s", delete.names[i]); + for (i = 0; i < ovs->n_bridges; i++) { + size_t j; + struct ovsrec_bridge *br_cfg = ovs->bridges[i]; + + for (j = 0; j < br_cfg->n_ports; j++) { + if (port_is_fake_bridge(br_cfg->ports[j])) { + VLOG_WARN("addbr %s: %s exists as a fake bridge", + br_name, br_name); + return 0; + } + } } - rewrite_and_reload_config(); - cfg_unlock(); - } else { - cfg_unlock(); + + VLOG_WARN("addbr %s: cannot create bridge %s because a network " + "device named %s already exists", + br_name, br_name, br_name); + return EEXIST; } - svec_destroy(&delete); + + txn = ovsdb_idl_txn_create(idl); + + ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: addbr %s", br_name); + + iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); + ovsrec_interface_set_name(iface, br_name); + + port = ovsrec_port_insert(txn_from_openvswitch(ovs)); + ovsrec_port_set_name(port, br_name); + ovsrec_port_set_interfaces(port, &iface, 1); + + br = ovsrec_bridge_insert(txn_from_openvswitch(ovs)); + ovsrec_bridge_set_name(br, br_name); + ovsrec_bridge_set_ports(br, &port, 1); + + ovs_insert_bridge(ovs, br); + + return commit_txn(txn, true); } +static void +add_port(const struct ovsrec_open_vswitch *ovs, + const struct ovsrec_bridge *br, const char *port_name) +{ + struct ovsrec_interface *iface; + struct ovsrec_port *port; + struct ovsrec_port **ports; + size_t i; + + /* xxx Check conflicts? */ + iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); + ovsrec_interface_set_name(iface, port_name); + + port = ovsrec_port_insert(txn_from_openvswitch(ovs)); + ovsrec_port_set_name(port, port_name); + ovsrec_port_set_interfaces(port, &iface, 1); + + ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); + for (i = 0; i < br->n_ports; i++) { + ports[i] = br->ports[i]; + } + ports[br->n_ports] = port; + ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); + free(ports); +} -/* Checks whether a network device named 'name' exists and returns true if so, - * false otherwise. +/* Deletes 'port' from 'br'. * - * XXX it is possible that this doesn't entirely accomplish what we want in - * context, since ovs-vswitchd.conf may cause vswitchd to create or destroy - * network devices based on iface.*.internal settings. - * - * XXX may want to move this to lib/netdev. + * After calling this function, 'port' must not be referenced again. */ +static void +del_port(const struct ovsrec_bridge *br, const struct ovsrec_port *port) +{ + struct ovsrec_port **ports; + size_t i, n; + + /* Remove 'port' from the bridge's list of ports. */ + ports = xmalloc(sizeof *br->ports * br->n_ports); + for (i = n = 0; i < br->n_ports; i++) { + if (br->ports[i] != port) { + ports[n++] = br->ports[i]; + } + } + ovsrec_bridge_set_ports(br, ports, n); + free(ports); +} + +/* Delete 'iface' from 'port' (which must be within 'br'). If 'iface' was + * 'port''s only interface, delete 'port' from 'br' also. * - * XXX why not just use netdev_nodev_get_flags() or similar function? */ -static bool -netdev_exists(const char *name) + * After calling this function, 'iface' must not be referenced again. */ +static void +del_interface(const struct ovsrec_bridge *br, + const struct ovsrec_port *port, + const struct ovsrec_interface *iface) { - struct stat s; - char *filename; - int error; + if (port->n_interfaces == 1) { + del_port(br, port); + } else { + struct ovsrec_interface **ifaces; + size_t i, n; - filename = xasprintf("/sys/class/net/%s", name); - error = stat(filename, &s); - free(filename); - return !error; + ifaces = xmalloc(sizeof *port->interfaces * port->n_interfaces); + for (i = n = 0; i < port->n_interfaces; i++) { + if (port->interfaces[i] != iface) { + ifaces[n++] = port->interfaces[i]; + } + } + ovsrec_port_set_interfaces(port, ifaces, n); + free(ifaces); + } } -static int -add_bridge(const char *br_name) +/* Find and return a port within 'br' named 'port_name'. */ +static const struct ovsrec_port * +find_port(const struct ovsrec_bridge *br, const char *port_name) { - if (bridge_exists(br_name)) { - VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); - return EEXIST; - } else if (netdev_exists(br_name)) { - if (cfg_get_bool(0, "iface.%s.fake-bridge", br_name)) { - VLOG_WARN("addbr %s: %s exists as a fake bridge", - br_name, br_name); - return 0; - } else { - VLOG_WARN("addbr %s: cannot create bridge %s because a network " - "device named %s already exists", - br_name, br_name, br_name); - return EEXIST; + size_t i; + + for (i = 0; i < br->n_ports; i++) { + struct ovsrec_port *port = br->ports[i]; + if (!strcmp(port_name, port->name)) { + return port; } } + return NULL; +} + +/* Find and return an interface within 'br' named 'iface_name'. */ +static const struct ovsrec_interface * +find_interface(const struct ovsrec_bridge *br, const char *iface_name, + struct ovsrec_port **portp) +{ + size_t i; - cfg_add_entry("bridge.%s.port=%s", br_name, br_name); - VLOG_INFO("addbr %s: success", br_name); + for (i = 0; i < br->n_ports; i++) { + struct ovsrec_port *port = br->ports[i]; + size_t j; - return 0; + for (j = 0; j < port->n_interfaces; j++) { + struct ovsrec_interface *iface = port->interfaces[j]; + if (!strcmp(iface->name, iface_name)) { + *portp = port; + return iface; + } + } + } + + *portp = NULL; + return NULL; } -static int -del_bridge(const char *br_name) +static int +del_bridge(struct ovsdb_idl *idl, + const struct ovsrec_open_vswitch *ovs, const char *br_name) { - if (!bridge_exists(br_name)) { + struct ovsrec_bridge *br = find_bridge(ovs, br_name); + struct ovsrec_bridge **bridges; + struct ovsdb_idl_txn *txn; + size_t i, n; + + if (!br) { VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); return ENXIO; } - cfg_del_section("bridge.%s", br_name); - VLOG_INFO("delbr %s: success", br_name); + txn = ovsdb_idl_txn_create(idl); - return 0; + ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: delbr %s", br_name); + + /* Remove 'br' from the vswitch's list of bridges. */ + bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); + for (i = n = 0; i < ovs->n_bridges; i++) { + if (ovs->bridges[i] != br) { + bridges[n++] = ovs->bridges[i]; + } + } + ovsrec_open_vswitch_set_bridges(ovs, bridges, n); + free(bridges); + + return commit_txn(txn, true); } static int @@ -413,7 +596,7 @@ parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, const char **port_name, uint64_t *count, uint64_t *skip) { static const struct nl_policy policy[] = { - [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, + [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING, .optional = true }, [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true }, [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true }, [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true }, @@ -422,6 +605,7 @@ parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, attrs, ARRAY_SIZE(policy)) + || (br_name && !attrs[BRC_GENL_A_DP_NAME]) || (port_name && !attrs[BRC_GENL_A_PORT_NAME]) || (count && !attrs[BRC_GENL_A_FDB_COUNT]) || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) { @@ -429,7 +613,9 @@ parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, } *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; - *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + if (br_name) { + *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + } if (port_name) { *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); } @@ -442,34 +628,44 @@ parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, return 0; } -static void -send_reply(uint32_t seq, int error, struct ofpbuf *fdb_query_data) +/* Composes and returns a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. The caller may add additional + * attributes to the message, then it may send it with send_reply(). */ +static struct ofpbuf * +compose_reply(uint32_t seq, int error) { - struct ofpbuf msg; - int retval; - - /* Compose reply. */ - ofpbuf_init(&msg, 0); - nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST, + struct ofpbuf *reply = ofpbuf_new(4096); + nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST, BRC_GENL_C_DP_RESULT, 1); - ((struct nlmsghdr *) msg.data)->nlmsg_seq = seq; - nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error); - if (fdb_query_data) { - nl_msg_put_unspec(&msg, BRC_GENL_A_FDB_DATA, - fdb_query_data->data, fdb_query_data->size); - } + ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq; + nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); + return reply; +} - /* Send reply. */ - retval = nl_sock_send(brc_sock, &msg, false); +/* Sends 'reply' to the datapath and frees it. */ +static void +send_reply(struct ofpbuf *reply) +{ + int retval = nl_sock_send(brc_sock, reply, false); if (retval) { VLOG_WARN_RL(&rl, "replying to brcompat request: %s", strerror(retval)); } - ofpbuf_uninit(&msg); + ofpbuf_delete(reply); +} + +/* Composes and sends a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. */ +static void +send_simple_reply(uint32_t seq, int error) +{ + send_reply(compose_reply(seq, error)); } static int -handle_bridge_cmd(struct ofpbuf *buffer, bool add) +handle_bridge_cmd(struct ovsdb_idl *idl, + const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer, bool add) { const char *br_name; uint32_t seq; @@ -477,11 +673,15 @@ handle_bridge_cmd(struct ofpbuf *buffer, bool add) error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); if (!error) { - error = add ? add_bridge(br_name) : del_bridge(br_name); - if (!error) { - error = rewrite_and_reload_config(); - } - send_reply(seq, error, NULL); + int retval; + + do { + retval = (add ? add_bridge : del_bridge)(idl, ovs, br_name); + VLOG_INFO_RL(&rl, "%sbr %s: %s", + add ? "add" : "del", br_name, strerror(retval)); + } while (retval == EAGAIN); + + send_simple_reply(seq, error); } return error; } @@ -491,16 +691,10 @@ static const struct nl_policy brc_port_policy[] = { [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, }; -static void -del_port(const char *br_name, const char *port_name) -{ - cfg_del_entry("bridge.%s.port=%s", br_name, port_name); - cfg_del_match("bonding.*.slave=%s", port_name); - cfg_del_match("vlan.%s.*", port_name); -} - static int -handle_port_cmd(struct ofpbuf *buffer, bool add) +handle_port_cmd(struct ovsdb_idl *idl, + const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer, bool add) { const char *cmd_name = add ? "add-if" : "del-if"; const char *br_name, *port_name; @@ -509,7 +703,9 @@ handle_port_cmd(struct ofpbuf *buffer, bool add) error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); if (!error) { - if (!bridge_exists(br_name)) { + struct ovsrec_bridge *br = find_bridge(ovs, br_name); + + if (!br) { VLOG_WARN("%s %s %s: no bridge named %s", cmd_name, br_name, port_name, br_name); error = EINVAL; @@ -518,43 +714,75 @@ handle_port_cmd(struct ofpbuf *buffer, bool add) cmd_name, br_name, port_name, port_name); error = EINVAL; } else { - if (add) { - cfg_add_entry("bridge.%s.port=%s", br_name, port_name); - } else { - del_port(br_name, port_name); - } - VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name); - error = rewrite_and_reload_config(); + do { + struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl); + + if (add) { + ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: add-if %s", + port_name); + add_port(ovs, br, port_name); + } else { + const struct ovsrec_port *port = find_port(br, port_name); + if (port) { + ovsdb_idl_txn_add_comment(txn, + "ovs-brcompatd: del-if %s", + port_name); + del_port(br, port); + } + } + + error = commit_txn(txn, true); + VLOG_INFO_RL(&rl, "%s %s %s: %s", + cmd_name, br_name, port_name, strerror(error)); + } while (error == EAGAIN); } - send_reply(seq, error, NULL); + send_simple_reply(seq, error); } return error; } -/* Returns the name of the bridge that contains a port named 'port_name', as a - * malloc'd string that the caller must free, or a null pointer if no bridge - * contains a port named 'port_name'. */ -static char * -get_bridge_containing_port(const char *port_name) +/* The caller is responsible for freeing '*ovs_name' if the call is + * successful. */ +static int +linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs, + const char *linux_name, + const struct ovsrec_bridge **ovs_bridge, + int *br_vlan) { - struct svec matches; - const char *start, *end; - - svec_init(&matches); - cfg_get_matches(&matches, "bridge.*.port=%s", port_name); - if (!matches.n) { + *ovs_bridge = find_bridge(ovs, linux_name); + if (*ovs_bridge) { + /* Bridge name is the same. We are interested in VLAN 0. */ + *br_vlan = 0; return 0; - } + } else { + /* No such Open vSwitch bridge 'linux_name', but there might be an + * internal port named 'linux_name' on some other bridge + * 'ovs_bridge'. If so then we are interested in the VLAN assigned to + * port 'linux_name' on the bridge named 'ovs_bridge'. */ + size_t i, j; + + for (i = 0; i < ovs->n_bridges; i++) { + const struct ovsrec_bridge *br = ovs->bridges[i]; + + for (j = 0; j < br->n_ports; j++) { + const struct ovsrec_port *port = br->ports[j]; - start = matches.names[0] + strlen("bridge."); - end = strstr(start, ".port="); - assert(end); - return xmemdup0(start, end - start); + if (!strcmp(port->name, linux_name)) { + *ovs_bridge = br; + *br_vlan = port->n_tag ? *port->tag : -1; + return 0; + } + } + + } + return ENODEV; + } } static int -handle_fdb_query_cmd(struct ofpbuf *buffer) +handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) { /* This structure is copied directly from the Linux 2.6.30 header files. * It would be more straightforward to #include , but @@ -582,12 +810,14 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) * vswitchd can deal with all the VLANs on a single bridge. We have to * pretend that the former is the case even though the latter is the * implementation. */ - const char *linux_bridge; /* Name used by brctl. */ - char *ovs_bridge; /* Name used by ovs-vswitchd. */ + const char *linux_name; /* Name used by brctl. */ + const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */ int br_vlan; /* VLAN tag. */ - struct svec ifaces; + struct sset ifaces; struct ofpbuf query_data; + const char *iface_name; + struct ofpbuf *reply; char *unixctl_command; uint64_t count, skip; char *output; @@ -596,56 +826,47 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) int error; /* Parse the command received from brcompat_mod. */ - error = parse_command(buffer, &seq, &linux_bridge, NULL, &count, &skip); + error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip); if (error) { return error; } /* Figure out vswitchd bridge and VLAN. */ - cfg_read(); - if (bridge_exists(linux_bridge)) { - /* Bridge name is the same. We are interested in VLAN 0. */ - ovs_bridge = xstrdup(linux_bridge); - br_vlan = 0; - } else { - /* No such Open vSwitch bridge 'linux_bridge', but there might be an - * internal port named 'linux_bridge' on some other bridge - * 'ovs_bridge'. If so then we are interested in the VLAN assigned to - * port 'linux_bridge' on the bridge named 'ovs_bridge'. */ - const char *port_name = linux_bridge; - - ovs_bridge = get_bridge_containing_port(port_name); - br_vlan = cfg_get_vlan(0, "vlan.%s.tag", port_name); - if (!ovs_bridge || br_vlan < 0) { - free(ovs_bridge); - send_reply(seq, ENODEV, NULL); - return error; - } + error = linux_bridge_to_ovs_bridge(ovs, linux_name, + &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; } /* Fetch the forwarding database using ovs-appctl. */ - unixctl_command = xasprintf("fdb/show %s", ovs_bridge); + unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name); error = execute_appctl_command(unixctl_command, &output); free(unixctl_command); if (error) { - free(ovs_bridge); - send_reply(seq, error, NULL); + send_simple_reply(seq, error); return error; } /* Fetch the MAC address for each interface on the bridge, so that we can * fill in the is_local field in the response. */ + sset_init(&ifaces); get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); - local_macs = xmalloc(ifaces.n * sizeof *local_macs); + local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs); n_local_macs = 0; - for (i = 0; i < ifaces.n; i++) { - const char *iface_name = ifaces.names[i]; + SSET_FOR_EACH (iface_name, &ifaces) { struct mac *mac = &local_macs[n_local_macs]; - if (!netdev_nodev_get_etheraddr(iface_name, mac->addr)) { - n_local_macs++; + struct netdev *netdev; + + error = netdev_open_default(iface_name, &netdev); + if (!error) { + if (!netdev_get_etheraddr(netdev, mac->addr)) { + n_local_macs++; + } + netdev_close(netdev); } } - svec_destroy(&ifaces); + sset_destroy(&ifaces); /* Parse the response from ovs-appctl and convert it to binary format to * pass back to the kernel. */ @@ -667,7 +888,7 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) != 2 + ETH_ADDR_SCAN_COUNT + 1) { - struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); continue; } @@ -702,34 +923,164 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) } free(output); - send_reply(seq, 0, &query_data); + /* Compose and send reply to datapath. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA, + query_data.data, query_data.size); + send_reply(reply); + + /* Free memory. */ ofpbuf_uninit(&query_data); - free(ovs_bridge); + free(local_macs); + + return 0; +} + +static void +send_ifindex_reply(uint32_t seq, struct sset *ifaces) +{ + struct ofpbuf *reply; + const char *iface; + size_t n_indices; + int *indices; + + /* Convert 'ifaces' into ifindexes. */ + n_indices = 0; + indices = xmalloc(sset_count(ifaces) * sizeof *indices); + SSET_FOR_EACH (iface, ifaces) { + int ifindex = if_nametoindex(iface); + if (ifindex) { + indices[n_indices++] = ifindex; + } + } + + /* Compose and send reply. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, + indices, n_indices * sizeof *indices); + send_reply(reply); + + /* Free memory. */ + free(indices); +} + +static int +handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) +{ + struct sset bridges; + size_t i, j; + + uint32_t seq; + + int error; + + /* Parse Netlink command. + * + * The command doesn't actually have any arguments, but we need the + * sequence number to send the reply. */ + error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); + if (error) { + return error; + } + + /* Get all the real bridges and all the fake ones. */ + sset_init(&bridges); + for (i = 0; i < ovs->n_bridges; i++) { + const struct ovsrec_bridge *br = ovs->bridges[i]; + + sset_add(&bridges, br->name); + for (j = 0; j < br->n_ports; j++) { + const struct ovsrec_port *port = br->ports[j]; + + if (port->fake_bridge) { + sset_add(&bridges, port->name); + } + } + } + + send_ifindex_reply(seq, &bridges); + sset_destroy(&bridges); return 0; } static int -brc_recv_update(void) +handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) { - int retval; - struct ofpbuf *buffer; - struct genlmsghdr *genlmsghdr; + uint32_t seq; + const char *linux_name; + const struct ovsrec_bridge *ovs_bridge; + int br_vlan; + + struct sset ports; + + int error; + + /* Parse Netlink command. */ + error = parse_command(buffer, &seq, &linux_name, NULL, NULL, NULL); + if (error) { + return error; + } + + error = linux_bridge_to_ovs_bridge(ovs, linux_name, + &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + sset_init(&ports); + get_bridge_ports(ovs_bridge, &ports, br_vlan); + sset_find_and_delete(&ports, linux_name); + send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ + sset_destroy(&ports); + + return 0; +} + +static struct ofpbuf * +brc_recv_update__(void) +{ + for (;;) { + struct ofpbuf *buffer; + int retval; - buffer = NULL; - do { - ofpbuf_delete(buffer); retval = nl_sock_recv(brc_sock, &buffer, false); - } while (retval == ENOBUFS - || (!retval - && (nl_msg_nlmsgerr(buffer, NULL) - || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE))); - if (retval) { - if (retval != EAGAIN) { + switch (retval) { + case 0: + if (nl_msg_nlmsgerr(buffer, NULL) + || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) { + break; + } + return buffer; + + case ENOBUFS: + break; + + case EAGAIN: + return NULL; + + default: VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); + return NULL; } - return retval; + ofpbuf_delete(buffer); + } +} + +static void +brc_recv_update(struct ovsdb_idl *idl) +{ + struct ofpbuf *buffer; + struct genlmsghdr *genlmsghdr; + const struct ovsrec_open_vswitch *ovs; + + buffer = brc_recv_update__(); + if (!buffer) { + return; } genlmsghdr = nl_msg_genlmsghdr(buffer); @@ -744,47 +1095,60 @@ brc_recv_update(void) goto error; } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - retval = EAGAIN; + /* Get the Open vSwitch configuration. Just drop the request on the floor + * if a valid configuration doesn't exist. (We could check this earlier, + * but we want to drain pending Netlink messages even when there is no Open + * vSwitch configuration.) */ + ovs = ovsrec_open_vswitch_first(idl); + if (!ovs) { + VLOG_WARN_RL(&rl, "could not find valid configuration to update"); goto error; } switch (genlmsghdr->cmd) { case BRC_GENL_C_DP_ADD: - retval = handle_bridge_cmd(buffer, true); + handle_bridge_cmd(idl, ovs, buffer, true); break; case BRC_GENL_C_DP_DEL: - retval = handle_bridge_cmd(buffer, false); + handle_bridge_cmd(idl, ovs, buffer, false); break; case BRC_GENL_C_PORT_ADD: - retval = handle_port_cmd(buffer, true); + handle_port_cmd(idl, ovs, buffer, true); break; case BRC_GENL_C_PORT_DEL: - retval = handle_port_cmd(buffer, false); + handle_port_cmd(idl, ovs, buffer, false); break; case BRC_GENL_C_FDB_QUERY: - retval = handle_fdb_query_cmd(buffer); + handle_fdb_query_cmd(ovs, buffer); + break; + + case BRC_GENL_C_GET_BRIDGES: + handle_get_bridges_cmd(ovs, buffer); + break; + + case BRC_GENL_C_GET_PORTS: + handle_get_ports_cmd(ovs, buffer); break; default: - retval = EPROTO; + VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n", + genlmsghdr->cmd); + break; } - cfg_unlock(); - error: ofpbuf_delete(buffer); - return retval; + return; } /* Check for interface configuration changes announced through RTNL. */ static void -rtnl_recv_update(void) +rtnl_recv_update(struct ovsdb_idl *idl, + const struct ovsrec_open_vswitch *ovs) { struct ofpbuf *buf; @@ -794,7 +1158,7 @@ rtnl_recv_update(void) } else if (error == ENOBUFS) { VLOG_WARN_RL(&rl, "network monitor socket overflowed"); } else if (error) { - VLOG_WARN_RL(&rl, "error on network monitor socket: %s", + VLOG_WARN_RL(&rl, "error on network monitor socket: %s", strerror(error)); } else { struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; @@ -807,8 +1171,8 @@ rtnl_recv_update(void) VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); ofpbuf_delete(buf); return; - } - + } + if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), rtnlgrp_link_policy, attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { @@ -820,32 +1184,41 @@ rtnl_recv_update(void) const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); char br_name[IFNAMSIZ]; uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - struct svec ports; - enum netdev_flags flags; if (!if_indextoname(br_idx, br_name)) { ofpbuf_delete(buf); return; } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - /* xxx this should try again and print error msg. */ - ofpbuf_delete(buf); - return; - } - - if (netdev_nodev_get_flags(port_name, &flags) == ENODEV) { + if (!netdev_exists(port_name)) { /* Network device is really gone. */ + struct ovsdb_idl_txn *txn; + const struct ovsrec_interface *iface; + struct ovsrec_port *port; + struct ovsrec_bridge *br; + VLOG_INFO("network device %s destroyed, " "removing from bridge %s", port_name, br_name); - svec_init(&ports); - cfg_get_all_keys(&ports, "bridge.%s.port", br_name); - svec_sort(&ports); - if (svec_contains(&ports, port_name)) { - del_port(br_name, port_name); - rewrite_and_reload_config(); + + br = find_bridge(ovs, br_name); + if (!br) { + VLOG_WARN("no bridge named %s from which to remove %s", + br_name, port_name); + ofpbuf_delete(buf); + return; + } + + txn = ovsdb_idl_txn_create(idl); + + iface = find_interface(br, port_name, &port); + if (iface) { + del_interface(br, port, iface); + ovsdb_idl_txn_add_comment(txn, + "ovs-brcompatd: destroy port %s", + port_name); } + + commit_txn(txn, false); } else { /* A network device by that name exists even though the kernel * told us it had disappeared. Probably, what happened was @@ -892,7 +1265,6 @@ rtnl_recv_update(void) "a device by that name exists (XS Tools 5.0.0?)", port_name); } - cfg_unlock(); } ofpbuf_delete(buf); } @@ -901,48 +1273,73 @@ rtnl_recv_update(void) int main(int argc, char *argv[]) { + extern struct vlog_module VLM_reconnect; struct unixctl_server *unixctl; + const char *remote; + struct ovsdb_idl *idl; int retval; + proctitle_init(argc, argv); set_program_name(argv[0]); - register_fault_handlers(); - time_init(); - vlog_init(); - parse_options(argc, argv); + vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); + vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); + + remote = parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); process_init(); + ovsrec_init(); - die_if_already_running(); - daemonize(); + daemonize_start(); retval = unixctl_server_create(NULL, &unixctl); if (retval) { - ovs_fatal(retval, "could not listen for vlog connections"); + exit(EXIT_FAILURE); } if (brc_open(&brc_sock)) { - ovs_fatal(0, "could not open brcompat socket. Check " - "\"brcompat\" kernel module."); + VLOG_FATAL("could not open brcompat socket. Check " + "\"brcompat\" kernel module."); } if (prune_timeout) { - if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) { - ovs_fatal(0, "could not create rtnetlink socket"); + int error; + + error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); + if (error) { + VLOG_FATAL("could not create rtnetlink socket (%s)", + strerror(error)); } - } - retval = cfg_read(); - if (retval) { - ovs_fatal(retval, "could not read config file"); + error = nl_sock_join_mcgroup(rtnl_sock, RTNLGRP_LINK); + if (error) { + VLOG_FATAL("could not join RTNLGRP_LINK multicast group (%s)", + strerror(error)); + } } + daemonize_complete(); + + idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true); + for (;;) { + const struct ovsrec_open_vswitch *ovs; + + ovsdb_idl_run(idl); + unixctl_server_run(unixctl); - brc_recv_update(); + brc_recv_update(idl); + + ovs = ovsrec_open_vswitch_first(idl); + if (!ovs && ovsdb_idl_has_ever_connected(idl)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch " + "configuration", remote); + } + netdev_run(); /* If 'prune_timeout' is non-zero, we actively prune from the - * config file any 'bridge..port' entries that are no - * longer valid. We use two methods: + * configuration of port entries that are no longer valid. We + * use two methods: * * 1) The kernel explicitly notifies us of removed ports * through the RTNL messages. @@ -950,19 +1347,22 @@ main(int argc, char *argv[]) * 2) We periodically check all ports associated with bridges * to see if they no longer exist. */ - if (prune_timeout) { - rtnl_recv_update(); - prune_ports(); - + if (ovs && prune_timeout) { + rtnl_recv_update(idl, ovs); nl_sock_wait(rtnl_sock, POLLIN); poll_timer_wait(prune_timeout); } + nl_sock_wait(brc_sock, POLLIN); + ovsdb_idl_wait(idl); unixctl_server_wait(unixctl); + netdev_wait(); poll_block(); } + ovsdb_idl_destroy(idl); + return 0; } @@ -979,28 +1379,27 @@ validate_appctl_command(void) } else if (p[1] == 's') { n++; } else { - ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command"); + VLOG_FATAL("only '%%s' and '%%%%' allowed in --appctl-command"); } } if (n != 1) { - ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command"); + VLOG_FATAL("'%%s' must appear exactly once in --appctl-command"); } } -static void +static const char * parse_options(int argc, char *argv[]) { enum { - OPT_LOCK_TIMEOUT = UCHAR_MAX + 1, OPT_PRUNE_TIMEOUT, OPT_APPCTL_COMMAND, VLOG_OPTION_ENUMS, - LEAK_CHECKER_OPTION_ENUMS + LEAK_CHECKER_OPTION_ENUMS, + DAEMON_OPTION_ENUMS }; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, - {"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT}, {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND}, DAEMON_LONG_OPTIONS, @@ -1009,12 +1408,8 @@ parse_options(int argc, char *argv[]) {0, 0, 0, 0}, }; char *short_options = long_options_to_short_options(long_options); - int error; - appctl_command = xasprintf("%s/ovs-appctl -t " - "%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl " - "-e '%%s'", - ovs_bindir, ovs_rundir, ovs_rundir); + appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir()); for (;;) { int c; @@ -1032,10 +1427,6 @@ parse_options(int argc, char *argv[]) OVS_PRINT_VERSION(0, 0); exit(EXIT_SUCCESS); - case OPT_LOCK_TIMEOUT: - lock_timeout = atoi(optarg); - break; - case OPT_PRUNE_TIMEOUT: prune_timeout = atoi(optarg) * 1000; break; @@ -1063,17 +1454,11 @@ parse_options(int argc, char *argv[]) argv += optind; if (argc != 1) { - ovs_fatal(0, "exactly one non-option argument required; " - "use --help for usage"); + VLOG_FATAL("database socket is non-option argument; " + "use --help for usage"); } - cfg_init(); - config_file = argv[0]; - error = cfg_set_file(config_file); - if (error) { - ovs_fatal(error, "failed to add configuration file \"%s\"", - config_file); - } + return argv[0]; } static void @@ -1086,7 +1471,6 @@ usage(void) printf("\nConfiguration options:\n" " --appctl-command=COMMAND shell command to run ovs-appctl\n" " --prune-timeout=SECS wait at most SECS before pruning ports\n" - " --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n" ); daemon_usage(); vlog_usage();