X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=b7f068bf0cd181407bc7cf74b8cca5b52c774bab;hb=efdd908807544de7bee8644fafc1400e6a32d936;hp=8314c5392bd2b73c8e332bc1aab336ac551c045d;hpb=9d82ec478d52edfddd215dff1b0659ed7508b365;p=sliver-openvswitch.git diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 8314c5392..b7f068bf0 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -82,12 +82,10 @@ struct iface { long long delay_expires; /* Time after which 'enabled' may change. */ /* These members are valid only after bridge_reconfigure() causes them to - * be initialized.*/ + * be initialized. */ int dp_ifidx; /* Index within kernel datapath. */ struct netdev *netdev; /* Network device. */ bool enabled; /* May be chosen for flows? */ - - /* This member is only valid *during* bridge_reconfigure(). */ const struct ovsrec_interface *cfg; }; @@ -125,6 +123,7 @@ struct port { int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. * NULL if all VLANs are trunked. */ + const struct ovsrec_port *cfg; char *name; /* An ordinary bridge port has 1 interface. @@ -148,9 +147,6 @@ struct port { mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */ bool is_mirror_output_port; /* Does port mirroring send frames here? */ - - /* This member is only valid *during* bridge_reconfigure(). */ - const struct ovsrec_port *cfg; }; #define DP_MAX_PORTS 255 @@ -158,19 +154,12 @@ struct bridge { struct list node; /* Node in global list of bridges. */ char *name; /* User-specified arbitrary name. */ struct mac_learning *ml; /* MAC learning table. */ - bool sent_config_request; /* Successfully sent config request? */ uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */ + const struct ovsrec_bridge *cfg; /* OpenFlow switch processing. */ struct ofproto *ofproto; /* OpenFlow switch. */ - /* Description strings. */ - char *mfr_desc; /* Manufacturer. */ - char *hw_desc; /* Hardware. */ - char *sw_desc; /* Software version. */ - char *serial_desc; /* Serial number. */ - char *dp_desc; /* Datapath description. */ - /* Kernel datapath information. */ struct dpif *dpif; /* Datapath. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ @@ -187,26 +176,26 @@ struct bridge { /* Flow tracking. */ bool flush; - /* Flow statistics gathering. */ - time_t next_stats_request; - /* Port mirroring. */ struct mirror *mirrors[MAX_MIRRORS]; - - /* This member is only valid *during* bridge_reconfigure(). */ - const struct ovsrec_bridge *cfg; }; /* List of all bridges. */ static struct list all_bridges = LIST_INITIALIZER(&all_bridges); -/* Maximum number of datapaths. */ -enum { DP_MAX = 256 }; +/* OVSDB IDL used to obtain configuration. */ +static struct ovsdb_idl *idl; + +/* Each time this timer expires, the bridge fetches statistics for every + * interface and pushes them into the database. */ +#define IFACE_STATS_INTERVAL (5 * 1000) /* In milliseconds. */ +static long long int iface_stats_timer = LLONG_MIN; static struct bridge *bridge_create(const struct ovsrec_bridge *br_cfg); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); static unixctl_cb_func bridge_unixctl_dump_flows; +static unixctl_cb_func bridge_unixctl_reconnect; static int bridge_run_one(struct bridge *); static size_t bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg, const struct bridge *br, @@ -264,54 +253,58 @@ static struct iface *iface_from_dp_ifidx(const struct bridge *, uint16_t dp_ifidx); static bool iface_is_internal(const struct bridge *, const char *name); static void iface_set_mac(struct iface *); +static void iface_update_qos(struct iface *, const struct ovsrec_qos *); /* Hooks into ofproto processing. */ static struct ofhooks bridge_ofhooks; /* Public functions. */ -/* Adds the name of each interface used by a bridge, including local and - * internal ports, to 'svec'. */ +/* Initializes the bridge module, configuring it to obtain its configuration + * from an OVSDB server accessed over 'remote', which should be a string in a + * form acceptable to ovsdb_idl_create(). */ void -bridge_get_ifaces(struct svec *svec) +bridge_init(const char *remote) { - struct bridge *br, *next; - size_t i, j; + /* Create connection to database. */ + idl = ovsdb_idl_create(remote, &ovsrec_idl_class); - LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { - for (i = 0; i < br->n_ports; i++) { - struct port *port = br->ports[i]; - - for (j = 0; j < port->n_ifaces; j++) { - struct iface *iface = port->ifaces[j]; - if (iface->dp_ifidx < 0) { - VLOG_ERR("%s interface not in datapath %s, ignoring", - iface->name, dpif_name(br->dpif)); - } else { - if (iface->dp_ifidx != ODPP_LOCAL) { - svec_add(svec, iface->name); - } - } - } - } - } + /* Register unixctl commands. */ + unixctl_command_register("fdb/show", bridge_unixctl_fdb_show, NULL); + unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows, + NULL); + unixctl_command_register("bridge/reconnect", bridge_unixctl_reconnect, + NULL); + bond_init(); } -void -bridge_init(const struct ovsrec_open_vswitch *cfg) +/* Performs configuration that is only necessary once at ovs-vswitchd startup, + * but for which the ovs-vswitchd configuration 'cfg' is required. */ +static void +bridge_configure_once(const struct ovsrec_open_vswitch *cfg) { + static bool already_configured_once; struct svec bridge_names; struct svec dpif_names, dpif_types; size_t i; - unixctl_command_register("fdb/show", bridge_unixctl_fdb_show, NULL); + /* Only do this once per ovs-vswitchd run. */ + if (already_configured_once) { + return; + } + already_configured_once = true; + + iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL; + /* Get all the configured bridges' names from 'cfg' into 'bridge_names'. */ svec_init(&bridge_names); for (i = 0; i < cfg->n_bridges; i++) { svec_add(&bridge_names, cfg->bridges[i]->name); } svec_sort(&bridge_names); + /* Iterate over all system dpifs and delete any of them that do not appear + * in 'cfg'. */ svec_init(&dpif_names); svec_init(&dpif_types); dp_enumerate_types(&dpif_types); @@ -322,12 +315,14 @@ bridge_init(const struct ovsrec_open_vswitch *cfg) dp_enumerate_names(dpif_types.names[i], &dpif_names); + /* For each dpif... */ for (j = 0; j < dpif_names.n; j++) { retval = dpif_open(dpif_names.names[j], dpif_types.names[i], &dpif); if (!retval) { struct svec all_names; size_t k; + /* ...check whether any of its names is in 'bridge_names'. */ svec_init(&all_names); dpif_get_all_names(dpif, &all_names); for (k = 0; k < all_names.n; k++) { @@ -335,7 +330,10 @@ bridge_init(const struct ovsrec_open_vswitch *cfg) goto found; } } + + /* No. Delete the dpif. */ dpif_delete(dpif); + found: svec_destroy(&all_names); dpif_close(dpif); @@ -345,12 +343,6 @@ bridge_init(const struct ovsrec_open_vswitch *cfg) svec_destroy(&bridge_names); svec_destroy(&dpif_names); svec_destroy(&dpif_types); - - unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows, - NULL); - - bond_init(); - bridge_reconfigure(cfg); } #ifdef HAVE_OPENSSL @@ -372,7 +364,6 @@ static int set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, bool create) { - struct shash_node *node; struct shash options; int error = 0; size_t i; @@ -396,10 +387,6 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, } netdev_options.args = &options; netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; - netdev_options.may_create = true; - if (iface_is_internal(iface->port->bridge, iface_cfg->name)) { - netdev_options.may_open = true; - } error = netdev_open(&netdev_options, &iface->netdev); @@ -424,11 +411,7 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, error = EINVAL; } } - - SHASH_FOR_EACH (node, &options) { - free(node->data); - } - shash_destroy(&options); + shash_destroy_free_data(&options); return error; } @@ -559,10 +542,9 @@ collect_managers(const struct ovsrec_open_vswitch *ovs_cfg, *n_managersp = n_managers; } -void +static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { - struct ovsdb_idl_txn *txn; struct shash old_br, new_br; struct shash_node *node; struct bridge *br, *next; @@ -573,8 +555,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) COVERAGE_INC(bridge_reconfigure); - txn = ovsdb_idl_txn_create(ovs_cfg->header_.table->idl); - collect_managers(ovs_cfg, &managers, &n_managers); /* Collect old and new bridges. */ @@ -665,9 +645,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) shash_init(&cur_ifaces); for (i = 0; i < n_dpif_ports; i++) { const char *name = dpif_ports[i].devname; - if (!shash_find(&cur_ifaces, name)) { - shash_add(&cur_ifaces, name, NULL); - } + shash_add_once(&cur_ifaces, name, NULL); } free(dpif_ports); @@ -734,7 +712,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface); ofproto_set_datapath_id(br->ofproto, dpid); - dpid_string = xasprintf("%012"PRIx64, dpid); + dpid_string = xasprintf("%016"PRIx64, dpid); ovsrec_bridge_set_datapath_id(br->cfg, dpid_string); free(dpid_string); @@ -847,20 +825,20 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + int j; port_update_vlan_compat(port); port_update_bonding(port); + + for (j = 0; j < port->n_ifaces; j++) { + iface_update_qos(port->ifaces[j], port->cfg->qos); + } } } LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { iterate_and_prune_ifaces(br, set_iface_properties, NULL); } - ovsrec_open_vswitch_set_cur_cfg(ovs_cfg, ovs_cfg->next_cfg); - - ovsdb_idl_txn_commit(txn); - ovsdb_idl_txn_destroy(txn); /* XXX */ - free(managers); } @@ -1079,25 +1057,115 @@ dpid_from_hash(const void *data, size_t n) return eth_addr_to_uint64(hash); } -int +static void +iface_refresh_stats(struct iface *iface) +{ + struct iface_stat { + char *name; + int offset; + }; + static const struct iface_stat iface_stats[] = { + { "rx_packets", offsetof(struct netdev_stats, rx_packets) }, + { "tx_packets", offsetof(struct netdev_stats, tx_packets) }, + { "rx_bytes", offsetof(struct netdev_stats, rx_bytes) }, + { "tx_bytes", offsetof(struct netdev_stats, tx_bytes) }, + { "rx_dropped", offsetof(struct netdev_stats, rx_dropped) }, + { "tx_dropped", offsetof(struct netdev_stats, tx_dropped) }, + { "rx_errors", offsetof(struct netdev_stats, rx_errors) }, + { "tx_errors", offsetof(struct netdev_stats, tx_errors) }, + { "rx_frame_err", offsetof(struct netdev_stats, rx_frame_errors) }, + { "rx_over_err", offsetof(struct netdev_stats, rx_over_errors) }, + { "rx_crc_err", offsetof(struct netdev_stats, rx_crc_errors) }, + { "collisions", offsetof(struct netdev_stats, collisions) }, + }; + enum { N_STATS = ARRAY_SIZE(iface_stats) }; + const struct iface_stat *s; + + char *keys[N_STATS]; + int64_t values[N_STATS]; + int n; + + struct netdev_stats stats; + + /* Intentionally ignore return value, since errors will set 'stats' to + * all-1s, and we will deal with that correctly below. */ + netdev_get_stats(iface->netdev, &stats); + + n = 0; + for (s = iface_stats; s < &iface_stats[N_STATS]; s++) { + uint64_t value = *(uint64_t *) (((char *) &stats) + s->offset); + if (value != UINT64_MAX) { + keys[n] = s->name; + values[n] = value; + n++; + } + } + + ovsrec_interface_set_statistics(iface->cfg, keys, values, n); +} + +void bridge_run(void) { - struct bridge *br, *next; - int retval; + bool datapath_destroyed; + struct bridge *br; - retval = 0; - LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { + /* Let each bridge do the work that it needs to do. */ + datapath_destroyed = false; + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { int error = bridge_run_one(br); if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "bridge %s: datapath was destroyed externally, " "forcing reconfiguration", br->name); - if (!retval) { - retval = error; + datapath_destroyed = true; + } + } + + /* (Re)configure if necessary. */ + if (ovsdb_idl_run(idl) || datapath_destroyed) { + const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl); + if (cfg) { + struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl); + + bridge_configure_once(cfg); + bridge_reconfigure(cfg); + + ovsrec_open_vswitch_set_cur_cfg(cfg, cfg->next_cfg); + ovsdb_idl_txn_commit(txn); + ovsdb_idl_txn_destroy(txn); /* XXX */ + } else { + /* We still need to reconfigure to avoid dangling pointers to + * now-destroyed ovsrec structures inside bridge data. */ + static const struct ovsrec_open_vswitch null_cfg; + + bridge_reconfigure(&null_cfg); + } + } + + /* Refresh interface stats if necessary. */ + if (time_msec() >= iface_stats_timer) { + struct ovsdb_idl_txn *txn; + + txn = ovsdb_idl_txn_create(idl); + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + size_t i; + + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + size_t j; + + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + iface_refresh_stats(iface); + } } } + ovsdb_idl_txn_commit(txn); + ovsdb_idl_txn_destroy(txn); /* XXX */ + + iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL; } - return retval; } void @@ -1114,6 +1182,8 @@ bridge_wait(void) mac_learning_wait(br->ml); bond_wait(br); } + ovsdb_idl_wait(idl); + poll_timer_wait_until(iface_stats_timer); } /* Forces 'br' to revalidate all of its flows. This is appropriate when 'br''s @@ -1206,7 +1276,6 @@ bridge_create(const struct ovsrec_bridge *br_cfg) br->name = xstrdup(br_cfg->name); br->cfg = br_cfg; br->ml = mac_learning_create(); - br->sent_config_request = false; eth_addr_nicira_random(br->default_ea); port_array_init(&br->ifaces); @@ -1263,19 +1332,6 @@ bridge_lookup(const char *name) return NULL; } -bool -bridge_exists(const char *name) -{ - return bridge_lookup(name) ? true : false; -} - -uint64_t -bridge_get_datapathid(const char *name) -{ - struct bridge *br = bridge_lookup(name); - return br ? ofproto_get_datapath_id(br->ofproto) : 0; -} - /* Handle requests for a listing of all flows known by the OpenFlow * stack, including those normally hidden. */ static void @@ -1298,6 +1354,29 @@ bridge_unixctl_dump_flows(struct unixctl_conn *conn, ds_destroy(&results); } +/* "bridge/reconnect [BRIDGE]": makes BRIDGE drop all of its controller + * connections and reconnect. If BRIDGE is not specified, then all bridges + * drop their controller connections and reconnect. */ +static void +bridge_unixctl_reconnect(struct unixctl_conn *conn, + const char *args, void *aux OVS_UNUSED) +{ + struct bridge *br; + if (args[0] != '\0') { + br = bridge_lookup(args); + if (!br) { + unixctl_command_reply(conn, 501, "Unknown bridge"); + return; + } + ofproto_reconnect_controllers(br->ofproto); + } else { + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + ofproto_reconnect_controllers(br->ofproto); + } + } + unixctl_command_reply(conn, 200, NULL); +} + static int bridge_run_one(struct bridge *br) { @@ -1344,75 +1423,6 @@ bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg, return n_controllers; } -static void -bridge_update_desc(struct bridge *br OVS_UNUSED) -{ -#if 0 - bool changed = false; - const char *desc; - - desc = cfg_get_string(0, "bridge.%s.mfr-desc", br->name); - if (desc != br->mfr_desc) { - free(br->mfr_desc); - if (desc) { - br->mfr_desc = xstrdup(desc); - } else { - br->mfr_desc = xstrdup(DEFAULT_MFR_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.hw-desc", br->name); - if (desc != br->hw_desc) { - free(br->hw_desc); - if (desc) { - br->hw_desc = xstrdup(desc); - } else { - br->hw_desc = xstrdup(DEFAULT_HW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.sw-desc", br->name); - if (desc != br->sw_desc) { - free(br->sw_desc); - if (desc) { - br->sw_desc = xstrdup(desc); - } else { - br->sw_desc = xstrdup(DEFAULT_SW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.serial-desc", br->name); - if (desc != br->serial_desc) { - free(br->serial_desc); - if (desc) { - br->serial_desc = xstrdup(desc); - } else { - br->serial_desc = xstrdup(DEFAULT_SERIAL_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.dp-desc", br->name); - if (desc != br->dp_desc) { - free(br->dp_desc); - if (desc) { - br->dp_desc = xstrdup(desc); - } else { - br->dp_desc = xstrdup(DEFAULT_DP_DESC); - } - changed = true; - } - - if (changed) { - ofproto_set_desc(br->ofproto, br->mfr_desc, br->hw_desc, - br->sw_desc, br->serial_desc, br->dp_desc); - } -#endif -} - static void bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, struct bridge *br) @@ -1493,50 +1503,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, * versa. (XXX Should we delete all flows if we are switching from one * controller to another?) */ -#if 0 - /* Configure OpenFlow management listeners. */ - svec_init(&listeners); - cfg_get_all_strings(&listeners, "bridge.%s.openflow.listeners", br->name); - if (!listeners.n) { - svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", - ovs_rundir, br->name)); - } else if (listeners.n == 1 && !strcmp(listeners.names[0], "none")) { - svec_clear(&listeners); - } - svec_sort_unique(&listeners); - - svec_init(&old_listeners); - ofproto_get_listeners(br->ofproto, &old_listeners); - svec_sort_unique(&old_listeners); - - if (!svec_equal(&listeners, &old_listeners)) { - ofproto_set_listeners(br->ofproto, &listeners); - } - svec_destroy(&listeners); - svec_destroy(&old_listeners); - - /* Configure OpenFlow controller connection snooping. */ - svec_init(&snoops); - cfg_get_all_strings(&snoops, "bridge.%s.openflow.snoops", br->name); - if (!snoops.n) { - svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", - ovs_rundir, br->name)); - } else if (snoops.n == 1 && !strcmp(snoops.names[0], "none")) { - svec_clear(&snoops); - } - svec_sort_unique(&snoops); - - svec_init(&old_snoops); - ofproto_get_snoops(br->ofproto, &old_snoops); - svec_sort_unique(&old_snoops); - - if (!svec_equal(&snoops, &old_snoops)) { - ofproto_set_snoops(br->ofproto, &snoops); - } - svec_destroy(&snoops); - svec_destroy(&old_snoops); -#else - /* Default listener. */ + /* Configure OpenFlow management listener. */ svec_init(&listeners); svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", ovs_rundir, br->name)); @@ -1548,7 +1515,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, svec_destroy(&listeners); svec_destroy(&old_listeners); - /* Default snoop. */ + /* Configure OpenFlow controller connection snooping. */ svec_init(&snoops); svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", ovs_rundir, br->name)); @@ -1559,11 +1526,8 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, } svec_destroy(&snoops); svec_destroy(&old_snoops); -#endif mirror_reconfigure(br); - - bridge_update_desc(br); } static void @@ -1931,10 +1895,19 @@ bond_update_fake_iface_stats(struct port *port) struct netdev_stats slave_stats; if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) { - bond_stats.rx_packets += slave_stats.rx_packets; - bond_stats.rx_bytes += slave_stats.rx_bytes; - bond_stats.tx_packets += slave_stats.tx_packets; - bond_stats.tx_bytes += slave_stats.tx_bytes; + /* XXX: We swap the stats here because they are swapped back when + * reported by the internal device. The reason for this is + * internal devices normally represent packets going into the system + * but when used as fake bond device they represent packets leaving + * the system. We really should do this in the internal device + * itself because changing it here reverses the counts from the + * perspective of the switch. However, the internal device doesn't + * know what type of device it represents so we have to do it here + * for now. */ + bond_stats.tx_packets += slave_stats.rx_packets; + bond_stats.tx_bytes += slave_stats.rx_bytes; + bond_stats.rx_packets += slave_stats.tx_packets; + bond_stats.rx_bytes += slave_stats.tx_bytes; } } @@ -2250,12 +2223,34 @@ static int flow_get_vlan(struct bridge *br, const flow_t *flow, return vlan; } +/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after + * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to + * indicate this; newer upstream kernels use gratuitous ARP requests. */ +static bool +is_gratuitous_arp(const flow_t *flow) +{ + return (flow->dl_type == htons(ETH_TYPE_ARP) + && eth_addr_is_broadcast(flow->dl_dst) + && (flow->nw_proto == ARP_OP_REPLY + || (flow->nw_proto == ARP_OP_REQUEST + && flow->nw_src == flow->nw_dst))); +} + static void update_learning_table(struct bridge *br, const flow_t *flow, int vlan, struct port *in_port) { - tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, - vlan, in_port->port_idx); + enum grat_arp_lock_type lock_type; + tag_type rev_tag; + + /* We don't want to learn from gratuitous ARP packets that are reflected + * back over bond slaves so we lock the learning table. */ + lock_type = !is_gratuitous_arp(flow) ? GRAT_ARP_LOCK_NONE : + (in_port->n_ifaces == 1) ? GRAT_ARP_LOCK_SET : + GRAT_ARP_LOCK_CHECK; + + rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx, + lock_type); if (rev_tag) { /* The log messages here could actually be useful in debugging, * so keep the rate limit relatively high. */ @@ -2269,14 +2264,6 @@ update_learning_table(struct bridge *br, const flow_t *flow, int vlan, } } -static bool -is_bcast_arp_reply(const flow_t *flow) -{ - return (flow->dl_type == htons(ETH_TYPE_ARP) - && flow->nw_proto == ARP_OP_REPLY - && eth_addr_is_broadcast(flow->dl_dst)); -} - /* Determines whether packets in 'flow' within 'br' should be forwarded or * dropped. Returns true if they may be forwarded, false if they should be * dropped. @@ -2355,6 +2342,7 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Packets received on bonds need special attention to avoid duplicates. */ if (in_port->n_ifaces > 1) { int src_idx; + bool is_grat_arp_locked; if (eth_addr_is_multicast(flow->dl_dst)) { *tags |= in_port->active_iface_tag; @@ -2366,11 +2354,15 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Drop all packets for which we have learned a different input * port, because we probably sent the packet on one slave and got - * it back on the other. Broadcast ARP replies are an exception - * to this rule: the host has moved to another switch. */ - src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + * it back on the other. Gratuitous ARP packets are an exception + * to this rule: the host has moved to another switch. The exception + * to the exception is if we locked the learning table to avoid + * reflections on bond slaves. If this is the case, just drop the + * packet now. */ + src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan, + &is_grat_arp_locked); if (src_idx != -1 && src_idx != in_port->port_idx && - !is_bcast_arp_reply(flow)) { + (!is_gratuitous_arp(flow) || is_grat_arp_locked)) { return false; } } @@ -2403,7 +2395,8 @@ process_flow(struct bridge *br, const flow_t *flow, } /* Determine output port. */ - out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags); + out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags, + NULL); if (out_port_idx >= 0 && out_port_idx < br->n_ports) { out_port = br->ports[out_port_idx]; } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) { @@ -3746,6 +3739,90 @@ iface_set_mac(struct iface *iface) } } } + +static void +shash_from_ovs_idl_map(char **keys, char **values, size_t n, + struct shash *shash) +{ + size_t i; + + shash_init(shash); + for (i = 0; i < n; i++) { + shash_add(shash, keys[i], values[i]); + } +} + +struct iface_delete_queues_cbdata { + struct netdev *netdev; + const int64_t *queue_ids; + size_t n_queue_ids; +}; + +static bool +queue_ids_include(const int64_t *ids, size_t n, int64_t target) +{ + size_t low = 0; + size_t high = n; + + while (low < high) { + size_t mid = low + (high - low) / 2; + if (target > ids[mid]) { + high = mid; + } else if (target < ids[mid]) { + low = mid + 1; + } else { + return true; + } + } + return false; +} + +static void +iface_delete_queues(unsigned int queue_id, + const struct shash *details OVS_UNUSED, void *cbdata_) +{ + struct iface_delete_queues_cbdata *cbdata = cbdata_; + + if (!queue_ids_include(cbdata->queue_ids, cbdata->n_queue_ids, queue_id)) { + netdev_delete_queue(cbdata->netdev, queue_id); + } +} + +static void +iface_update_qos(struct iface *iface, const struct ovsrec_qos *qos) +{ + if (!qos || qos->type[0] == '\0') { + netdev_set_qos(iface->netdev, NULL, NULL); + } else { + struct iface_delete_queues_cbdata cbdata; + struct shash details; + size_t i; + + /* Configure top-level Qos for 'iface'. */ + shash_from_ovs_idl_map(qos->key_other_config, qos->value_other_config, + qos->n_other_config, &details); + netdev_set_qos(iface->netdev, qos->type, &details); + shash_destroy(&details); + + /* Deconfigure queues that were deleted. */ + cbdata.netdev = iface->netdev; + cbdata.queue_ids = qos->key_queues; + cbdata.n_queue_ids = qos->n_queues; + netdev_dump_queues(iface->netdev, iface_delete_queues, &cbdata); + + /* Configure queues for 'iface'. */ + for (i = 0; i < qos->n_queues; i++) { + const struct ovsrec_queue *queue = qos->value_queues[i]; + unsigned int queue_id = qos->key_queues[i]; + + shash_from_ovs_idl_map(queue->key_other_config, + queue->value_other_config, + queue->n_other_config, &details); + netdev_set_queue(iface->netdev, queue_id, &details); + shash_destroy(&details); + } + } +} /* Port mirroring. */