X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=d351c059fbdfbef329abc251359e334f20ab1bc0;hb=7d9809efd81dd8d392288be5e3633493bf5a8e3d;hp=20569e7bde2bd0559c072fcff54f392915369c03;hpb=385533816c9d34286b6bd3f50ad4bceb8d34aa9a;p=sliver-openvswitch.git diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 20569e7bd..d351c059f 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -15,6 +15,7 @@ #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include @@ -36,7 +38,7 @@ #include "coverage.h" #include "daemon.h" #include "dirs.h" -#include "dpif.h" +#include "dynamic-string.h" #include "fatal-signal.h" #include "fault.h" #include "leak-checker.h" @@ -44,6 +46,7 @@ #include "netlink.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" +#include "packets.h" #include "poll-loop.h" #include "process.h" #include "signals.h" @@ -82,9 +85,10 @@ static int prune_timeout = 5000; /* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */ static char *config_file; -/* Command to run (via system()) to reload the ovs-vswitchd configuration - * file. */ -static char *reload_command; +/* Shell command to execute (via popen()) to send a control command to the + * running ovs-vswitchd process. The string must contain one instance of %s, + * which is replaced by the control command. */ +static char *appctl_command; /* Netlink socket to listen for interface changes. */ static struct nl_sock *rtnl_sock; @@ -175,6 +179,48 @@ bridge_exists(const char *name) return cfg_has_section("bridge.%s", name); } +static int +execute_appctl_command(const char *unixctl_command, char **output) +{ + char *stdout_log, *stderr_log; + int error, status; + char *argv[5]; + + argv[0] = "/bin/sh"; + argv[1] = "-c"; + argv[2] = xasprintf(appctl_command, unixctl_command); + argv[3] = NULL; + + /* Run process and log status. */ + error = process_run_capture(argv, &stdout_log, &stderr_log, &status); + if (error) { + VLOG_ERR("failed to execute %s command via ovs-appctl: %s", + unixctl_command, strerror(error)); + } else if (status) { + char *msg = process_status_msg(status); + VLOG_ERR("ovs-appctl exited with error (%s)", msg); + free(msg); + error = ECHILD; + } + + /* Deal with stdout_log. */ + if (output) { + *output = stdout_log; + } else { + free(stdout_log); + } + + /* Deal with stderr_log */ + if (stderr_log && *stderr_log) { + VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log); + } + free(stderr_log); + + free(argv[2]); + + return error; +} + static int rewrite_and_reload_config(void) { @@ -182,31 +228,81 @@ rewrite_and_reload_config(void) int error1 = cfg_write(); int error2 = cfg_read(); long long int reload_start = time_msec(); - int error3 = system(reload_command); + int error3 = execute_appctl_command("vswitchd/reload", NULL); long long int elapsed = time_msec() - reload_start; COVERAGE_INC(brcompatd_reload); if (elapsed > 0) { VLOG_INFO("reload command executed in %lld ms", elapsed); } - if (error3 == -1) { - VLOG_ERR("failed to execute reload command: %s", strerror(errno)); - } else if (error3 != 0) { - char *msg = process_status_msg(error3); - VLOG_ERR("reload command exited with error (%s)", msg); - free(msg); - } - return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0; + return error1 ? error1 : error2 ? error2 : error3; } return 0; } +static void +do_get_bridge_parts(const char *bridge, struct svec *parts, int vlan, + bool break_down_bonds) +{ + struct svec ports; + int i; + + svec_init(&ports); + cfg_get_all_keys(&ports, "bridge.%s.port", bridge); + for (i = 0; i < ports.n; i++) { + const char *port_name = ports.names[i]; + if (vlan >= 0) { + int port_vlan = cfg_get_vlan(0, "vlan.%s.tag", port_name); + if (port_vlan < 0) { + port_vlan = 0; + } + if (vlan != port_vlan) { + continue; + } + } + if (break_down_bonds && cfg_has_section("bonding.%s", port_name)) { + struct svec slaves; + svec_init(&slaves); + cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name); + svec_append(parts, &slaves); + svec_destroy(&slaves); + } else { + svec_add(parts, port_name); + } + } + svec_destroy(&ports); +} + +/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces + * down into their constituent parts. + * + * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, + * then only interfaces for trunk ports or ports with implicit VLAN 0 are + * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are + * reported. */ +static void +get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan) +{ + do_get_bridge_parts(bridge, ifaces, vlan, true); +} + +/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under + * the bond name, not broken down into their constituent interfaces. + * + * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then + * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, + * only port with implicit VLAN 'vlan' are reported. */ +static void +get_bridge_ports(const char *bridge, struct svec *ports, int vlan) +{ + do_get_bridge_parts(bridge, ports, vlan, false); +} + /* Go through the configuration file and remove any ports that no longer * exist associated with a bridge. */ static void prune_ports(void) { int i, j; - int error; struct svec bridges, delete; if (cfg_lock(NULL, 0)) { @@ -219,32 +315,13 @@ prune_ports(void) cfg_get_subsections(&bridges, "bridge"); for (i=0; idata)->nlmsg_seq; - *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + if (br_name) { + *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + } if (port_name) { *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); } + if (count) { + *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]); + } + if (skip) { + *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]); + } return 0; } -static void -send_reply(uint32_t seq, int error) +/* Composes and returns a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. The caller may add additional + * attributes to the message, then it may send it with send_reply(). */ +static struct ofpbuf * +compose_reply(uint32_t seq, int error) { - struct ofpbuf msg; - int retval; - - /* Compose reply. */ - ofpbuf_init(&msg, 0); - nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST, + struct ofpbuf *reply = ofpbuf_new(4096); + nl_msg_put_genlmsghdr(reply, brc_sock, 32, brc_family, NLM_F_REQUEST, BRC_GENL_C_DP_RESULT, 1); - ((struct nlmsghdr *) msg.data)->nlmsg_seq = seq; - nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error); + ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq; + nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); + return reply; +} - /* Send reply. */ - retval = nl_sock_send(brc_sock, &msg, false); +/* Sends 'reply' to the datapath and frees it. */ +static void +send_reply(struct ofpbuf *reply) +{ + int retval = nl_sock_send(brc_sock, reply, false); if (retval) { VLOG_WARN_RL(&rl, "replying to brcompat request: %s", strerror(retval)); } - ofpbuf_uninit(&msg); + ofpbuf_delete(reply); +} + +/* Composes and sends a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. */ +static void +send_simple_reply(uint32_t seq, int error) +{ + send_reply(compose_reply(seq, error)); } static int @@ -400,13 +474,13 @@ handle_bridge_cmd(struct ofpbuf *buffer, bool add) uint32_t seq; int error; - error = parse_command(buffer, &seq, &br_name, NULL); + error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); if (!error) { error = add ? add_bridge(br_name) : del_bridge(br_name); if (!error) { error = rewrite_and_reload_config(); } - send_reply(seq, error); + send_simple_reply(seq, error); } return error; } @@ -432,7 +506,7 @@ handle_port_cmd(struct ofpbuf *buffer, bool add) uint32_t seq; int error; - error = parse_command(buffer, &seq, &br_name, &port_name); + error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); if (!error) { if (!bridge_exists(br_name)) { VLOG_WARN("%s %s %s: no bridge named %s", @@ -451,12 +525,332 @@ handle_port_cmd(struct ofpbuf *buffer, bool add) VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name); error = rewrite_and_reload_config(); } - send_reply(seq, error); + send_simple_reply(seq, error); } return error; } +/* Returns the name of the bridge that contains a port named 'port_name', as a + * malloc'd string that the caller must free, or a null pointer if no bridge + * contains a port named 'port_name'. */ +static char * +get_bridge_containing_port(const char *port_name) +{ + struct svec matches; + const char *start, *end; + + svec_init(&matches); + cfg_get_matches(&matches, "bridge.*.port=%s", port_name); + if (!matches.n) { + return 0; + } + + start = matches.names[0] + strlen("bridge."); + end = strstr(start, ".port="); + assert(end); + return xmemdup0(start, end - start); +} + +static int +linux_bridge_to_ovs_bridge(const char *linux_bridge, + char **ovs_bridge, int *br_vlan) +{ + if (bridge_exists(linux_bridge)) { + /* Bridge name is the same. We are interested in VLAN 0. */ + *ovs_bridge = xstrdup(linux_bridge); + *br_vlan = 0; + return 0; + } else { + /* No such Open vSwitch bridge 'linux_bridge', but there might be an + * internal port named 'linux_bridge' on some other bridge + * 'ovs_bridge'. If so then we are interested in the VLAN assigned to + * port 'linux_bridge' on the bridge named 'ovs_bridge'. */ + const char *port_name = linux_bridge; + + *ovs_bridge = get_bridge_containing_port(port_name); + *br_vlan = cfg_get_vlan(0, "vlan.%s.tag", port_name); + if (*ovs_bridge && *br_vlan >= 0) { + return 0; + } else { + free(*ovs_bridge); + return ENODEV; + } + } +} + +static int +handle_fdb_query_cmd(struct ofpbuf *buffer) +{ + /* This structure is copied directly from the Linux 2.6.30 header files. + * It would be more straightforward to #include , but + * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems + * with old header files won't have it. */ + struct __fdb_entry { + __u8 mac_addr[6]; + __u8 port_no; + __u8 is_local; + __u32 ageing_timer_value; + __u8 port_hi; + __u8 pad0; + __u16 unused; + }; + + struct mac { + uint8_t addr[6]; + }; + struct mac *local_macs; + int n_local_macs; + int i; + + /* Impedance matching between the vswitchd and Linux kernel notions of what + * a bridge is. The kernel only handles a single VLAN per bridge, but + * vswitchd can deal with all the VLANs on a single bridge. We have to + * pretend that the former is the case even though the latter is the + * implementation. */ + const char *linux_bridge; /* Name used by brctl. */ + char *ovs_bridge; /* Name used by ovs-vswitchd. */ + int br_vlan; /* VLAN tag. */ + struct svec ifaces; + + struct ofpbuf query_data; + struct ofpbuf *reply; + char *unixctl_command; + uint64_t count, skip; + char *output; + char *save_ptr; + uint32_t seq; + int error; + + /* Parse the command received from brcompat_mod. */ + error = parse_command(buffer, &seq, &linux_bridge, NULL, &count, &skip); + if (error) { + return error; + } + + /* Figure out vswitchd bridge and VLAN. */ + cfg_read(); + error = linux_bridge_to_ovs_bridge(linux_bridge, &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + /* Fetch the forwarding database using ovs-appctl. */ + unixctl_command = xasprintf("fdb/show %s", ovs_bridge); + error = execute_appctl_command(unixctl_command, &output); + free(unixctl_command); + if (error) { + free(ovs_bridge); + send_simple_reply(seq, error); + return error; + } + + /* Fetch the MAC address for each interface on the bridge, so that we can + * fill in the is_local field in the response. */ + svec_init(&ifaces); + get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); + local_macs = xmalloc(ifaces.n * sizeof *local_macs); + n_local_macs = 0; + for (i = 0; i < ifaces.n; i++) { + const char *iface_name = ifaces.names[i]; + struct mac *mac = &local_macs[n_local_macs]; + struct netdev *netdev; + + error = netdev_open(iface_name, NETDEV_ETH_TYPE_NONE, &netdev); + if (netdev) { + if (!netdev_get_etheraddr(netdev, mac->addr)) { + n_local_macs++; + } + netdev_close(netdev); + } + } + svec_destroy(&ifaces); + + /* Parse the response from ovs-appctl and convert it to binary format to + * pass back to the kernel. */ + ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8); + save_ptr = NULL; + strtok_r(output, "\n", &save_ptr); /* Skip header line. */ + while (count > 0) { + struct __fdb_entry *entry; + int port, vlan, age; + uint8_t mac[ETH_ADDR_LEN]; + char *line; + bool is_local; + + line = strtok_r(NULL, "\n", &save_ptr); + if (!line) { + break; + } + + if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", + &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) + != 2 + ETH_ADDR_SCAN_COUNT + 1) { + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); + continue; + } + + if (vlan != br_vlan) { + continue; + } + + if (skip > 0) { + skip--; + continue; + } + + /* Is this the MAC address of an interface on the bridge? */ + is_local = false; + for (i = 0; i < n_local_macs; i++) { + if (eth_addr_equals(local_macs[i].addr, mac)) { + is_local = true; + break; + } + } + + entry = ofpbuf_put_uninit(&query_data, sizeof *entry); + memcpy(entry->mac_addr, mac, ETH_ADDR_LEN); + entry->port_no = port & 0xff; + entry->is_local = is_local; + entry->ageing_timer_value = age * HZ; + entry->port_hi = (port & 0xff00) >> 8; + entry->pad0 = 0; + entry->unused = 0; + count--; + } + free(output); + + /* Compose and send reply to datapath. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA, + query_data.data, query_data.size); + send_reply(reply); + + /* Free memory. */ + ofpbuf_uninit(&query_data); + free(ovs_bridge); + + return 0; +} + +static void +send_ifindex_reply(uint32_t seq, struct svec *ifaces) +{ + struct ofpbuf *reply; + const char *iface; + size_t n_indices; + int *indices; + size_t i; + + /* Make sure that any given interface only occurs once. This shouldn't + * happen, but who knows what people put into their configuration files. */ + svec_sort_unique(ifaces); + + /* Convert 'ifaces' into ifindexes. */ + n_indices = 0; + indices = xmalloc(ifaces->n * sizeof *indices); + SVEC_FOR_EACH (i, iface, ifaces) { + int ifindex = if_nametoindex(iface); + if (ifindex) { + indices[n_indices++] = ifindex; + } + } + + /* Compose and send reply. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, + indices, n_indices * sizeof *indices); + send_reply(reply); + + /* Free memory. */ + free(indices); +} + +static int +handle_get_bridges_cmd(struct ofpbuf *buffer) +{ + struct svec bridges; + const char *br_name; + size_t i; + + uint32_t seq; + + int error; + + /* Parse Netlink command. + * + * The command doesn't actually have any arguments, but we need the + * sequence number to send the reply. */ + error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); + if (error) { + return error; + } + + /* Get all the real bridges and all the fake ones. */ + cfg_read(); + svec_init(&bridges); + cfg_get_subsections(&bridges, "bridge"); + SVEC_FOR_EACH (i, br_name, &bridges) { + const char *iface_name; + struct svec ifaces; + size_t j; + + svec_init(&ifaces); + get_bridge_ifaces(br_name, &ifaces, -1); + SVEC_FOR_EACH (j, iface_name, &ifaces) { + if (cfg_get_bool(0, "iface.%s.fake-bridge", iface_name)) { + svec_add(&bridges, iface_name); + } + } + svec_destroy(&ifaces); + } + + send_ifindex_reply(seq, &bridges); + svec_destroy(&bridges); + + return 0; +} + +static int +handle_get_ports_cmd(struct ofpbuf *buffer) +{ + uint32_t seq; + + const char *linux_bridge; + char *ovs_bridge; + int br_vlan; + + struct svec ports; + + int error; + + /* Parse Netlink command. */ + error = parse_command(buffer, &seq, &linux_bridge, NULL, NULL, NULL); + if (error) { + return error; + } + + cfg_read(); + error = linux_bridge_to_ovs_bridge(linux_bridge, &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + svec_init(&ports); + get_bridge_ports(ovs_bridge, &ports, br_vlan); + svec_sort(&ports); + svec_del(&ports, linux_bridge); + send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ + svec_destroy(&ports); + + free(ovs_bridge); + + return 0; +} + static int brc_recv_update(void) { @@ -515,6 +909,18 @@ brc_recv_update(void) retval = handle_port_cmd(buffer, false); break; + case BRC_GENL_C_FDB_QUERY: + retval = handle_fdb_query_cmd(buffer); + break; + + case BRC_GENL_C_GET_BRIDGES: + retval = handle_get_bridges_cmd(buffer); + break; + + case BRC_GENL_C_GET_PORTS: + retval = handle_get_ports_cmd(buffer); + break; + default: retval = EPROTO; } @@ -564,8 +970,6 @@ rtnl_recv_update(void) const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); char br_name[IFNAMSIZ]; uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - struct svec ports; - enum netdev_flags flags; if (!if_indextoname(br_idx, br_name)) { ofpbuf_delete(buf); @@ -579,10 +983,13 @@ rtnl_recv_update(void) return; } - if (netdev_nodev_get_flags(port_name, &flags) == ENODEV) { + if (!netdev_exists(port_name)) { /* Network device is really gone. */ + struct svec ports; + VLOG_INFO("network device %s destroyed, " "removing from bridge %s", port_name, br_name); + svec_init(&ports); cfg_get_all_keys(&ports, "bridge.%s.port", br_name); svec_sort(&ports); @@ -590,6 +997,7 @@ rtnl_recv_update(void) del_port(br_name, port_name); rewrite_and_reload_config(); } + svec_destroy(&ports); } else { /* A network device by that name exists even though the kernel * told us it had disappeared. Probably, what happened was @@ -675,11 +1083,15 @@ main(int argc, char *argv[]) } } - cfg_read(); + retval = cfg_read(); + if (retval) { + ovs_fatal(retval, "could not read config file"); + } for (;;) { unixctl_server_run(unixctl); brc_recv_update(); + netdev_run(); /* If 'prune_timeout' is non-zero, we actively prune from the * config file any 'bridge..port' entries that are no @@ -701,19 +1113,41 @@ main(int argc, char *argv[]) nl_sock_wait(brc_sock, POLLIN); unixctl_server_wait(unixctl); + netdev_wait(); poll_block(); } return 0; } +static void +validate_appctl_command(void) +{ + const char *p; + int n; + + n = 0; + for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) { + if (p[1] == '%') { + /* Nothing to do. */ + } else if (p[1] == 's') { + n++; + } else { + ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command"); + } + } + if (n != 1) { + ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command"); + } +} + static void parse_options(int argc, char *argv[]) { enum { OPT_LOCK_TIMEOUT = UCHAR_MAX + 1, OPT_PRUNE_TIMEOUT, - OPT_RELOAD_COMMAND, + OPT_APPCTL_COMMAND, VLOG_OPTION_ENUMS, LEAK_CHECKER_OPTION_ENUMS }; @@ -722,7 +1156,7 @@ parse_options(int argc, char *argv[]) {"version", no_argument, 0, 'V'}, {"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT}, {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, - {"reload-command", required_argument, 0, OPT_RELOAD_COMMAND}, + {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, LEAK_CHECKER_LONG_OPTIONS, @@ -731,10 +1165,9 @@ parse_options(int argc, char *argv[]) char *short_options = long_options_to_short_options(long_options); int error; - reload_command = xasprintf("%s/ovs-appctl -t " + appctl_command = xasprintf("%s/ovs-appctl -t " "%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl " - "-e vswitchd/reload 2>&1 " - "| /usr/bin/logger -t brcompatd-reload", + "-e '%%s'", ovs_bindir, ovs_rundir, ovs_rundir); for (;;) { int c; @@ -761,8 +1194,8 @@ parse_options(int argc, char *argv[]) prune_timeout = atoi(optarg) * 1000; break; - case OPT_RELOAD_COMMAND: - reload_command = optarg; + case OPT_APPCTL_COMMAND: + appctl_command = optarg; break; VLOG_OPTION_HANDLERS @@ -778,6 +1211,8 @@ parse_options(int argc, char *argv[]) } free(short_options); + validate_appctl_command(); + argc -= optind; argv += optind; @@ -786,6 +1221,7 @@ parse_options(int argc, char *argv[]) "use --help for usage"); } + cfg_init(); config_file = argv[0]; error = cfg_set_file(config_file); if (error) { @@ -802,7 +1238,7 @@ usage(void) "CONFIG is the configuration file used by ovs-vswitchd.\n", program_name, program_name); printf("\nConfiguration options:\n" - " --reload-command=COMMAND shell command to reload ovs-vswitchd\n" + " --appctl-command=COMMAND shell command to run ovs-appctl\n" " --prune-timeout=SECS wait at most SECS before pruning ports\n" " --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n" ); @@ -812,6 +1248,6 @@ usage(void) " -h, --help display this help message\n" " -V, --version display version information\n"); leak_checker_usage(); - printf("\nThe default reload command is:\n%s\n", reload_command); + printf("\nThe default appctl command is:\n%s\n", appctl_command); exit(EXIT_SUCCESS); }