X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=cf26e87610125593beaa1484476a3ad85c5adaed;hb=f40869bdf6feca4d3ff7c59a1fb1f7ac101bc967;hp=ab0ecd6dfd43ac17cbd7b02e8a7c118b13170f1d;hpb=f4f1ea7eaca92e2ca44c9624b3bb7d6426b2ddea;p=sliver-openvswitch.git diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index ab0ecd6df..cf26e8761 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include "bfd.h" #include "bitmap.h" #include "bond.h" #include "cfm.h" @@ -146,6 +147,23 @@ static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges); /* OVSDB IDL used to obtain configuration. */ static struct ovsdb_idl *idl; +/* We want to complete daemonization, fully detaching from our parent process, + * only after we have completed our initial configuration, committed our state + * to the database, and received confirmation back from the database server + * that it applied the commit. This allows our parent process to know that, + * post-detach, ephemeral fields such as datapath-id and ofport are very likely + * to have already been filled in. (It is only "very likely" rather than + * certain because there is always a slim possibility that the transaction will + * fail or that some other client has added new bridges, ports, etc. while + * ovs-vswitchd was configuring using an old configuration.) + * + * We only need to do this once for our initial configuration at startup, so + * 'initial_config_done' tracks whether we've already done it. While we are + * waiting for a response to our commit, 'daemonize_txn' tracks the transaction + * itself and is otherwise NULL. */ +static bool initial_config_done; +static struct ovsdb_idl_txn *daemonize_txn; + /* Most recently processed IDL sequence number. */ static unsigned int idl_seqno; @@ -183,6 +201,7 @@ static void bridge_configure_netflow(struct bridge *); static void bridge_configure_forward_bpdu(struct bridge *); static void bridge_configure_mac_table(struct bridge *); static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number); +static void bridge_configure_ipfix(struct bridge *); static void bridge_configure_stp(struct bridge *); static void bridge_configure_tables(struct bridge *); static void bridge_configure_dp_desc(struct bridge *); @@ -318,7 +337,7 @@ void bridge_init(const char *remote) { /* Create connection to database. */ - idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true); + idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true, true); idl_seqno = ovsdb_idl_get_seqno(idl); ovsdb_idl_set_lock(idl, "ovs_vswitchd"); ovsdb_idl_verify_write_only(idl); @@ -345,6 +364,7 @@ bridge_init(const char *remote) ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_speed); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_state); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_resets); + ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_mac_in_use); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_mtu); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_ofport); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_statistics); @@ -354,6 +374,7 @@ bridge_init(const char *remote) ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_health); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_opstate); + ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_bfd_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current); ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids); @@ -370,8 +391,9 @@ bridge_init(const char *remote) ovsdb_idl_omit_alert(idl, &ovsrec_mirror_col_statistics); ovsdb_idl_omit(idl, &ovsrec_netflow_col_external_ids); - ovsdb_idl_omit(idl, &ovsrec_sflow_col_external_ids); + ovsdb_idl_omit(idl, &ovsrec_ipfix_col_external_ids); + ovsdb_idl_omit(idl, &ovsrec_flow_sample_collector_set_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_manager_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_manager_col_inactivity_probe); @@ -585,6 +607,8 @@ bridge_reconfigure_continue(const struct ovsrec_open_vswitch *ovs_cfg) iface_configure_cfm(iface); iface_configure_qos(iface, port->cfg->qos); iface_set_mac(iface); + ofproto_port_set_bfd(br->ofproto, iface->ofp_port, + &iface->cfg->bfd); } } bridge_configure_mirrors(br); @@ -594,21 +618,13 @@ bridge_reconfigure_continue(const struct ovsrec_open_vswitch *ovs_cfg) bridge_configure_remotes(br, managers, n_managers); bridge_configure_netflow(br); bridge_configure_sflow(br, &sflow_bridge_number); + bridge_configure_ipfix(br); bridge_configure_stp(br); bridge_configure_tables(br); bridge_configure_dp_desc(br); } free(managers); - if (done) { - /* ovs-vswitchd has completed initialization, so allow the process that - * forked us to exit successfully. */ - daemonize_complete(); - reconfiguring = false; - - VLOG_INFO_ONCE("%s (Open vSwitch) %s", program_name, VERSION); - } - return done; } @@ -935,6 +951,79 @@ bridge_configure_sflow(struct bridge *br, int *sflow_bridge_number) sset_destroy(&oso.targets); } +/* Set IPFIX configuration on 'br'. */ +static void +bridge_configure_ipfix(struct bridge *br) +{ + const struct ovsrec_ipfix *be_cfg = br->cfg->ipfix; + const struct ovsrec_flow_sample_collector_set *fe_cfg; + struct ofproto_ipfix_bridge_exporter_options be_opts; + struct ofproto_ipfix_flow_exporter_options *fe_opts = NULL; + size_t n_fe_opts = 0; + + OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH(fe_cfg, idl) { + if (fe_cfg->bridge == br->cfg) { + n_fe_opts++; + } + } + + if (!be_cfg && n_fe_opts == 0) { + ofproto_set_ipfix(br->ofproto, NULL, NULL, 0); + return; + } + + if (be_cfg) { + memset(&be_opts, 0, sizeof be_opts); + + sset_init(&be_opts.targets); + sset_add_array(&be_opts.targets, be_cfg->targets, be_cfg->n_targets); + + if (be_cfg->sampling) { + be_opts.sampling_rate = *be_cfg->sampling; + } else { + be_opts.sampling_rate = SFL_DEFAULT_SAMPLING_RATE; + } + if (be_cfg->obs_domain_id) { + be_opts.obs_domain_id = *be_cfg->obs_domain_id; + } + if (be_cfg->obs_point_id) { + be_opts.obs_point_id = *be_cfg->obs_point_id; + } + } + + if (n_fe_opts > 0) { + struct ofproto_ipfix_flow_exporter_options *opts; + fe_opts = xcalloc(n_fe_opts, sizeof *fe_opts); + opts = fe_opts; + OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH(fe_cfg, idl) { + if (fe_cfg->bridge == br->cfg) { + opts->collector_set_id = fe_cfg->id; + sset_init(&opts->targets); + sset_add_array(&opts->targets, fe_cfg->ipfix->targets, + fe_cfg->ipfix->n_targets); + opts++; + } + } + } + + ofproto_set_ipfix(br->ofproto, be_cfg ? &be_opts : NULL, fe_opts, + n_fe_opts); + + if (be_cfg) { + sset_destroy(&be_opts.targets); + } + + if (n_fe_opts > 0) { + struct ofproto_ipfix_flow_exporter_options *opts = fe_opts; + size_t i; + for (i = 0; i < n_fe_opts; i++) { + sset_destroy(&opts->targets); + opts++; + } + free(fe_opts); + } +} + static void port_configure_stp(const struct ofproto *ofproto, struct port *port, struct ofproto_port_stp_settings *port_s, @@ -1360,7 +1449,7 @@ iface_do_create(const struct bridge *br, if ((port_cfg->vlan_mode && !strcmp(port_cfg->vlan_mode, "splinter")) || iface_is_internal(iface_cfg, br->cfg)) { - netdev_turn_flags_on(netdev, NETDEV_UP, true); + netdev_turn_flags_on(netdev, NETDEV_UP, NULL); } *netdevp = netdev; @@ -1698,6 +1787,7 @@ iface_refresh_status(struct iface *iface) int64_t bps; int mtu; int64_t mtu_64; + uint8_t mac[ETH_ADDR_LEN]; int error; if (iface_is_synthetic(iface)) { @@ -1721,8 +1811,7 @@ iface_refresh_status(struct iface *iface) netdev_features_is_full_duplex(current) ? "full" : "half"); ovsrec_interface_set_link_speed(iface->cfg, &bps, 1); - } - else { + } else { ovsrec_interface_set_duplex(iface->cfg, NULL); ovsrec_interface_set_link_speed(iface->cfg, NULL, 0); } @@ -1731,10 +1820,19 @@ iface_refresh_status(struct iface *iface) if (!error) { mtu_64 = mtu; ovsrec_interface_set_mtu(iface->cfg, &mtu_64, 1); - } - else { + } else { ovsrec_interface_set_mtu(iface->cfg, NULL, 0); } + + error = netdev_get_etheraddr(iface->netdev, mac); + if (!error) { + char mac_string[32]; + + sprintf(mac_string, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); + ovsrec_interface_set_mac_in_use(iface->cfg, mac_string); + } else { + ovsrec_interface_set_mac_in_use(iface->cfg, NULL); + } } /* Writes 'iface''s CFM statistics to the database. 'iface' must not be @@ -1743,57 +1841,47 @@ static void iface_refresh_cfm_stats(struct iface *iface) { const struct ovsrec_interface *cfg = iface->cfg; - int fault, opup, error; - const uint64_t *rmps; - size_t n_rmps; - int health; - - fault = ofproto_port_get_cfm_fault(iface->port->bridge->ofproto, - iface->ofp_port); - if (fault >= 0) { + struct ofproto_cfm_status status; + + if (!ofproto_port_get_cfm_status(iface->port->bridge->ofproto, + iface->ofp_port, &status)) { + ovsrec_interface_set_cfm_fault(cfg, NULL, 0); + ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0); + ovsrec_interface_set_cfm_remote_opstate(cfg, NULL); + ovsrec_interface_set_cfm_health(cfg, NULL, 0); + ovsrec_interface_set_cfm_remote_mpids(cfg, NULL, 0); + } else { const char *reasons[CFM_FAULT_N_REASONS]; - bool fault_bool = fault; + int64_t cfm_health = status.health; + bool faulted = status.faults != 0; size_t i, j; + ovsrec_interface_set_cfm_fault(cfg, &faulted, 1); + j = 0; for (i = 0; i < CFM_FAULT_N_REASONS; i++) { int reason = 1 << i; - if (fault & reason) { + if (status.faults & reason) { reasons[j++] = cfm_fault_reason_to_str(reason); } } - - ovsrec_interface_set_cfm_fault(cfg, &fault_bool, 1); ovsrec_interface_set_cfm_fault_status(cfg, (char **) reasons, j); - } else { - ovsrec_interface_set_cfm_fault(cfg, NULL, 0); - ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0); - } - opup = ofproto_port_get_cfm_opup(iface->port->bridge->ofproto, - iface->ofp_port); - if (opup >= 0) { - ovsrec_interface_set_cfm_remote_opstate(cfg, opup ? "up" : "down"); - } else { - ovsrec_interface_set_cfm_remote_opstate(cfg, NULL); - } - - error = ofproto_port_get_cfm_remote_mpids(iface->port->bridge->ofproto, - iface->ofp_port, &rmps, &n_rmps); - if (error >= 0) { - ovsrec_interface_set_cfm_remote_mpids(cfg, (const int64_t *)rmps, - n_rmps); - } else { - ovsrec_interface_set_cfm_remote_mpids(cfg, NULL, 0); - } + if (status.remote_opstate >= 0) { + const char *remote_opstate = status.remote_opstate ? "up" : "down"; + ovsrec_interface_set_cfm_remote_opstate(cfg, remote_opstate); + } else { + ovsrec_interface_set_cfm_remote_opstate(cfg, NULL); + } - health = ofproto_port_get_cfm_health(iface->port->bridge->ofproto, - iface->ofp_port); - if (health >= 0) { - int64_t cfm_health = health; - ovsrec_interface_set_cfm_health(cfg, &cfm_health, 1); - } else { - ovsrec_interface_set_cfm_health(cfg, NULL, 0); + ovsrec_interface_set_cfm_remote_mpids(cfg, + (const int64_t *)status.rmps, + status.n_rmps); + if (cfm_health >= 0) { + ovsrec_interface_set_cfm_health(cfg, &cfm_health, 1); + } else { + ovsrec_interface_set_cfm_health(cfg, NULL, 0); + } } } @@ -1814,11 +1902,12 @@ iface_refresh_stats(struct iface *iface) IFACE_STAT(rx_crc_errors, "rx_crc_err") \ IFACE_STAT(collisions, "collisions") -#define IFACE_STAT(MEMBER, NAME) NAME, - static char *keys[] = { IFACE_STATS }; +#define IFACE_STAT(MEMBER, NAME) + 1 + enum { N_IFACE_STATS = IFACE_STATS }; #undef IFACE_STAT - int64_t values[ARRAY_SIZE(keys)]; - int i; + int64_t values[N_IFACE_STATS]; + char *keys[N_IFACE_STATS]; + int n; struct netdev_stats stats; @@ -1830,15 +1919,19 @@ iface_refresh_stats(struct iface *iface) * all-1s, and we will deal with that correctly below. */ netdev_get_stats(iface->netdev, &stats); - /* Copy statistics into values[] array. */ - i = 0; -#define IFACE_STAT(MEMBER, NAME) values[i++] = stats.MEMBER; + /* Copy statistics into keys[] and values[]. */ + n = 0; +#define IFACE_STAT(MEMBER, NAME) \ + if (stats.MEMBER != UINT64_MAX) { \ + keys[n] = NAME; \ + values[n] = stats.MEMBER; \ + n++; \ + } IFACE_STATS; #undef IFACE_STAT - ovs_assert(i == ARRAY_SIZE(keys)); + ovs_assert(n <= N_IFACE_STATS); - ovsrec_interface_set_statistics(iface->cfg, keys, values, - ARRAY_SIZE(keys)); + ovsrec_interface_set_statistics(iface->cfg, keys, values, n); #undef IFACE_STATS } @@ -2019,17 +2112,61 @@ refresh_controller_status(void) ofproto_free_ofproto_controller_info(&info); } + +/* "Instant" stats. + * + * Some information in the database must be kept as up-to-date as possible to + * allow controllers to respond rapidly to network outages. We call these + * statistics "instant" stats. + * + * We wish to update these statistics every INSTANT_INTERVAL_MSEC milliseconds, + * assuming that they've changed. The only means we have to determine whether + * they have changed are: + * + * - Try to commit changes to the database. If nothing changed, then + * ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other + * value. + * + * - instant_stats_run() is called late in the run loop, after anything that + * might change any of the instant stats. + * + * We use these two facts together to avoid waking the process up every + * INSTANT_INTERVAL_MSEC whether there is any change or not. + */ + +/* Minimum interval between writing updates to the instant stats to the + * database. */ +#define INSTANT_INTERVAL_MSEC 100 + +/* Current instant stats database transaction, NULL if there is no ongoing + * transaction. */ +static struct ovsdb_idl_txn *instant_txn; + +/* Next time (in msec on monotonic clock) at which we will update the instant + * stats. */ +static long long int instant_next_txn = LLONG_MIN; + +/* True if the run loop has run since we last saw that the instant stats were + * unchanged, that is, this is true if we need to wake up at 'instant_next_txn' + * to refresh the instant stats. */ +static bool instant_stats_could_have_changed; static void -refresh_instant_stats(void) +instant_stats_run(void) { - static struct ovsdb_idl_txn *txn = NULL; + enum ovsdb_idl_txn_status status; + + instant_stats_could_have_changed = true; - if (!txn) { + if (!instant_txn) { struct bridge *br; - txn = ovsdb_idl_txn_create(idl); + if (time_msec() < instant_next_txn) { + return; + } + instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC; + instant_txn = ovsdb_idl_txn_create(idl); HMAP_FOR_EACH (br, node, &all_bridges) { struct iface *iface; struct port *port; @@ -2042,6 +2179,7 @@ refresh_instant_stats(void) HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) { enum netdev_flags flags; + struct smap smap; const char *link_state; int64_t link_resets; int current, error; @@ -2074,16 +2212,37 @@ refresh_instant_stats(void) ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1); iface_refresh_cfm_stats(iface); + + smap_init(&smap); + if (!ofproto_port_get_bfd_status(br->ofproto, iface->ofp_port, + &smap)) { + ovsrec_interface_set_bfd_status(iface->cfg, &smap); + smap_destroy(&smap); + } } } } - if (ovsdb_idl_txn_commit(txn) != TXN_INCOMPLETE) { - ovsdb_idl_txn_destroy(txn); - txn = NULL; + status = ovsdb_idl_txn_commit(instant_txn); + if (status != TXN_INCOMPLETE) { + ovsdb_idl_txn_destroy(instant_txn); + instant_txn = NULL; + } + if (status == TXN_UNCHANGED) { + instant_stats_could_have_changed = false; } } +static void +instant_stats_wait(void) +{ + if (instant_txn) { + ovsdb_idl_txn_wait(instant_txn); + } else if (instant_stats_could_have_changed) { + poll_timer_wait_until(instant_next_txn); + } +} + /* Performs periodic activity required by bridges that needs to be done with * the least possible latency. * @@ -2132,11 +2291,16 @@ bridge_run(void) struct bridge *br, *next_br; VLOG_ERR_RL(&rl, "another ovs-vswitchd process is running, " - "disabling this process until it goes away"); + "disabling this process (pid %ld) until it goes away", + (long int) getpid()); HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { bridge_destroy(br); } + /* Since we will not be running system_stats_run() in this process + * with the current situation of multiple ovs-vswitchd daemons, + * disable system stats collection. */ + system_stats_enable(false); return; } else if (!ovsdb_idl_has_lock(idl)) { return; @@ -2203,15 +2367,25 @@ bridge_run(void) } if (reconfiguring) { - if (cfg) { - if (!reconf_txn) { - reconf_txn = ovsdb_idl_txn_create(idl); - } - if (bridge_reconfigure_continue(cfg)) { + if (!reconf_txn) { + reconf_txn = ovsdb_idl_txn_create(idl); + } + + if (bridge_reconfigure_continue(cfg ? cfg : &null_cfg)) { + reconfiguring = false; + + if (cfg) { ovsrec_open_vswitch_set_cur_cfg(cfg, cfg->next_cfg); } - } else { - bridge_reconfigure_continue(&null_cfg); + + /* If we are completing our initial configuration for this run + * of ovs-vswitchd, then keep the transaction around to monitor + * it for completion. */ + if (!initial_config_done) { + initial_config_done = true; + daemonize_txn = reconf_txn; + reconf_txn = NULL; + } } } @@ -2221,6 +2395,20 @@ bridge_run(void) reconf_txn = NULL; } + if (daemonize_txn) { + enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(daemonize_txn); + if (status != TXN_INCOMPLETE) { + ovsdb_idl_txn_destroy(daemonize_txn); + daemonize_txn = NULL; + + /* ovs-vswitchd has completed initialization, so allow the + * process that forked us to exit successfully. */ + daemonize_complete(); + + VLOG_INFO_ONCE("%s (Open vSwitch) %s", program_name, VERSION); + } + } + /* Refresh interface and mirror stats if necessary. */ if (time_msec() >= iface_stats_timer) { if (cfg) { @@ -2254,7 +2442,7 @@ bridge_run(void) } run_system_stats(); - refresh_instant_stats(); + instant_stats_run(); } void @@ -2264,6 +2452,9 @@ bridge_wait(void) const char *type; ovsdb_idl_wait(idl); + if (daemonize_txn) { + ovsdb_idl_txn_wait(daemonize_txn); + } if (reconfiguring) { poll_immediate_wake(); @@ -2286,6 +2477,7 @@ bridge_wait(void) } system_stats_wait(); + instant_stats_wait(); } /* Adds some memory usage statistics for bridges into 'usage', for use with @@ -2700,7 +2892,7 @@ bridge_configure_local_iface_netdev(struct bridge *br, /* Bring up the local interface. */ netdev = local_iface->netdev; - netdev_turn_flags_on(netdev, NETDEV_UP, true); + netdev_turn_flags_on(netdev, NETDEV_UP, NULL); /* Configure the IP address and netmask. */ if (!c->local_netmask @@ -3342,6 +3534,7 @@ iface_clear_db_record(const struct ovsrec_interface *if_cfg) ovsrec_interface_set_duplex(if_cfg, NULL); ovsrec_interface_set_link_speed(if_cfg, NULL, 0); ovsrec_interface_set_link_state(if_cfg, NULL); + ovsrec_interface_set_mac_in_use(if_cfg, NULL); ovsrec_interface_set_mtu(if_cfg, NULL, 0); ovsrec_interface_set_cfm_fault(if_cfg, NULL, 0); ovsrec_interface_set_cfm_fault_status(if_cfg, NULL, 0);