X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=4fc90c67b57e4bc8067928fc9a1edb9754051196;hb=4a1b8f30e5956655b59200d673964b9f3f9ad711;hp=6fa7894d41735a5ff540eabff9afc1c5035b4d66;hpb=4e022ec09e14ac89add74c1b4b8e3ff3873edbf0;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 6fa7894d4..4fc90c67b 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -50,6 +50,7 @@ #include "ofp-print.h" #include "ofproto-dpif-governor.h" #include "ofproto-dpif-ipfix.h" +#include "ofproto-dpif-mirror.h" #include "ofproto-dpif-sflow.h" #include "ofproto-dpif-xlate.h" #include "poll-loop.h" @@ -69,6 +70,12 @@ COVERAGE_DEFINE(facet_changed_rule); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); COVERAGE_DEFINE(facet_suppress); +COVERAGE_DEFINE(subfacet_install_fail); + +/* Number of implemented OpenFlow tables. */ +enum { N_TABLES = 255 }; +enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */ +BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); struct flow_miss; struct facet; @@ -79,11 +86,28 @@ static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *, static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes); static void rule_invalidate(const struct rule_dpif *); - -static void mirror_destroy(struct ofmirror *); -static void update_mirror_stats(struct ofproto_dpif *ofproto, - mirror_mask_t mirrors, - uint64_t packets, uint64_t bytes); +static tag_type rule_calculate_tag(const struct flow *, + const struct minimask *, uint32_t secret); + +struct ofbundle { + struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */ + struct ofproto_dpif *ofproto; /* Owning ofproto. */ + void *aux; /* Key supplied by ofproto's client. */ + char *name; /* Identifier for log messages. */ + + /* Configuration. */ + struct list ports; /* Contains "struct ofport"s. */ + enum port_vlan_mode vlan_mode; /* VLAN mode */ + int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ + unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. + * NULL if all VLANs are trunked. */ + struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */ + struct bond *bond; /* Nonnull iff more than one port. */ + bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ + + /* Status. */ + bool floodable; /* True if no port has OFPUTIL_PC_NO_FLOOD set. */ +}; static void bundle_remove(struct ofport *); static void bundle_update(struct ofbundle *); @@ -188,8 +212,7 @@ static void subfacet_uninstall(struct subfacet *); struct facet { /* Owners. */ struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */ - struct list list_node; /* In owning rule's 'facets' list. */ - struct rule_dpif *rule; /* Owning rule. */ + struct ofproto_dpif *ofproto; /* Owned data. */ struct list subfacets; @@ -225,6 +248,7 @@ struct facet { uint8_t tcp_flags; /* TCP flags seen for this 'rule'. */ struct xlate_out xout; + bool fail_open; /* Facet matched the fail open rule. */ /* Storage for a single subfacet, to reduce malloc() time and space * overhead. (A facet always has at least one subfacet and in the common @@ -258,6 +282,38 @@ static void push_all_stats(void); static bool facet_is_controller_flow(struct facet *); +struct ofport_dpif { + struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */ + struct ofport up; + + odp_port_t odp_port; + struct ofbundle *bundle; /* Bundle that contains this port, if any. */ + struct list bundle_node; /* In struct ofbundle's "ports" list. */ + struct cfm *cfm; /* Connectivity Fault Management, if any. */ + struct bfd *bfd; /* BFD, if any. */ + tag_type tag; /* Tag associated with this port. */ + bool may_enable; /* May be enabled in bonds. */ + bool is_tunnel; /* This port is a tunnel. */ + long long int carrier_seq; /* Carrier status changes. */ + struct ofport_dpif *peer; /* Peer if patch port. */ + + /* Spanning tree. */ + struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ + enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ + long long int stp_state_entered; + + struct hmap priorities; /* Map of attached 'priority_to_dscp's. */ + + /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) + * + * This is deprecated. It is only for compatibility with broken device + * drivers in old versions of Linux that do not properly support VLANs when + * VLAN devices are not used. When broken device drivers are no longer in + * widespread use, we will delete these interfaces. */ + ofp_port_t realdev_ofp_port; + int vlandev_vid; +}; + /* Node in 'ofport_dpif''s 'priorities' map. Used to maintain a map from * 'priority' (the datapath's term for QoS queue) to the dscp bits which all * traffic egressing the 'ofport' with that priority should be marked with. */ @@ -286,8 +342,11 @@ static bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *); static void vsp_remove(struct ofport_dpif *); static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid); +static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *, + ofp_port_t); + static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *, - odp_port_t odp_port); + odp_port_t); static struct ofport_dpif * ofport_dpif_cast(const struct ofport *ofport) @@ -309,6 +368,17 @@ struct dpif_completion { struct ofoperation *op; }; +/* Extra information about a classifier table. + * Currently used just for optimized flow revalidation. */ +struct table_dpif { + /* If either of these is nonnull, then this table has a form that allows + * flows to be tagged to avoid revalidating most flows for the most common + * kinds of flow table changes. */ + struct cls_table *catchall_table; /* Table that wildcards all fields. */ + struct cls_table *other_table; /* Table with any other wildcard set. */ + uint32_t basis; /* Keeps each table's tags separate. */ +}; + /* Reasons that we might need to revalidate every facet, and corresponding * coverage counters. * @@ -320,14 +390,18 @@ struct dpif_completion { enum revalidate_reason { REV_RECONFIGURE = 1, /* Switch configuration changed. */ REV_STP, /* Spanning tree protocol port status change. */ + REV_BOND, /* Bonding changed. */ REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/ REV_FLOW_TABLE, /* Flow table changed. */ + REV_MAC_LEARNING, /* Mac learning changed. */ REV_INCONSISTENCY /* Facet self-check failed. */ }; COVERAGE_DEFINE(rev_reconfigure); COVERAGE_DEFINE(rev_stp); +COVERAGE_DEFINE(rev_bond); COVERAGE_DEFINE(rev_port_toggled); COVERAGE_DEFINE(rev_flow_table); +COVERAGE_DEFINE(rev_mac_learning); COVERAGE_DEFINE(rev_inconsistency); /* Drop keys are odp flow keys which have drop flows installed in the kernel. @@ -399,15 +473,80 @@ static struct ofport_dpif * odp_port_to_ofport(const struct dpif_backer *, odp_port_t odp_port); static void update_moving_averages(struct dpif_backer *backer); +struct ofproto_dpif { + struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ + struct ofproto up; + struct dpif_backer *backer; + + /* Special OpenFlow rules. */ + struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */ + struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ + struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */ + + /* Bridging. */ + struct netflow *netflow; + struct dpif_sflow *sflow; + struct dpif_ipfix *ipfix; + struct hmap bundles; /* Contains "struct ofbundle"s. */ + struct mac_learning *ml; + bool has_bonded_bundles; + struct mbridge *mbridge; + + /* Facets. */ + struct classifier facets; /* Contains 'struct facet's. */ + long long int consistency_rl; + + /* Revalidation. */ + struct table_dpif tables[N_TABLES]; + + /* Support for debugging async flow mods. */ + struct list completions; + + struct netdev_stats stats; /* To account packets generated and consumed in + * userspace. */ + + /* Spanning tree. */ + struct stp *stp; + long long int stp_last_tick; + + /* VLAN splinters. */ + struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */ + struct hmap vlandev_map; /* vlandev -> (realdev,vid). */ + + /* Ports. */ + struct sset ports; /* Set of standard port names. */ + struct sset ghost_ports; /* Ports with no datapath port. */ + struct sset port_poll_set; /* Queued names for port_poll() reply. */ + int port_poll_errno; /* Last errno for port_poll() reply. */ + + /* Per ofproto's dpif stats. */ + uint64_t n_hit; + uint64_t n_missed; +}; + /* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only * for debugging the asynchronous flow_mod implementation.) */ static bool clogged; +/* By default, flows in the datapath are wildcarded (megaflows). They + * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */ +static bool enable_megaflows = true; + /* All existing ofproto_dpif instances, indexed by ->up.name. */ static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs); static void ofproto_dpif_unixctl_init(void); +static inline struct ofproto_dpif * +ofproto_dpif_cast(const struct ofproto *ofproto) +{ + ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class); + return CONTAINER_OF(ofproto, struct ofproto_dpif, up); +} + +static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, + ofp_port_t ofp_port); + /* Upcalls. */ #define FLOW_MISS_MAX_BATCH 50 static int handle_upcalls(struct dpif_backer *, unsigned int max_batch); @@ -426,6 +565,20 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Initial mappings of port to bridge mappings. */ static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports); + +int +ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto, + struct ofputil_flow_mod *fm) +{ + return ofproto_flow_mod(&ofproto->up, fm); +} + +void +ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto, + struct ofputil_packet_in *pin) +{ + connmgr_send_packet_in(ofproto->up.connmgr, pin); +} /* Factory functions. */ @@ -491,6 +644,12 @@ port_open_type(const char *datapath_type, const char *port_type) /* Type functions. */ +static void process_dpif_port_changes(struct dpif_backer *); +static void process_dpif_all_ports_changed(struct dpif_backer *); +static void process_dpif_port_change(struct dpif_backer *, + const char *devname); +static void process_dpif_port_error(struct dpif_backer *, int error); + static struct ofproto_dpif * lookup_ofproto_dpif_by_port_name(const char *name) { @@ -510,8 +669,6 @@ type_run(const char *type) { static long long int push_timer = LLONG_MIN; struct dpif_backer *backer; - char *devname; - int error; backer = shash_find_data(&all_dpif_backers, type); if (!backer) { @@ -536,6 +693,8 @@ type_run(const char *type) * and the configuration has now changed to "false", enable receiving * packets from the datapath. */ if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) { + int error; + backer->recv_set_enable = true; error = dpif_recv_set(backer->dpif, backer->recv_set_enable); @@ -570,7 +729,7 @@ type_run(const char *type) char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port; - if (!iter->tnl_port) { + if (!iter->is_tunnel) { continue; } @@ -596,8 +755,8 @@ type_run(const char *type) } iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE; - if (tnl_port_reconfigure(&iter->up, iter->odp_port, - &iter->tnl_port)) { + if (tnl_port_reconfigure(iter, iter->up.netdev, + iter->odp_port)) { backer->need_revalidate = REV_RECONFIGURE; } } @@ -611,8 +770,10 @@ type_run(const char *type) switch (backer->need_revalidate) { case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break; case REV_STP: COVERAGE_INC(rev_stp); break; + case REV_BOND: COVERAGE_INC(rev_bond); break; case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break; case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break; + case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break; case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break; } @@ -634,6 +795,36 @@ type_run(const char *type) continue; } + if (need_revalidate) { + struct ofport_dpif *ofport; + struct ofbundle *bundle; + + xlate_ofproto_set(ofproto, ofproto->up.name, ofproto->ml, + ofproto->mbridge, ofproto->sflow, + ofproto->ipfix, ofproto->up.frag_handling, + ofproto->up.forward_bpdu, + connmgr_has_in_band(ofproto->up.connmgr), + ofproto->netflow != NULL, + ofproto->stp != NULL); + + HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { + xlate_bundle_set(ofproto, bundle, bundle->name, + bundle->vlan_mode, bundle->vlan, + bundle->trunks, bundle->use_priority_tags, + bundle->bond, bundle->lacp, + bundle->floodable); + } + + HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { + xlate_ofport_set(ofproto, ofport->bundle, ofport, + ofport->up.ofp_port, ofport->odp_port, + ofport->up.netdev, ofport->cfm, + ofport->bfd, ofport->peer, + ofport->up.pp.config, ofport->stp_state, + ofport->is_tunnel, ofport->may_enable); + } + } + cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH_SAFE (facet, next, cr, &cursor) { if (need_revalidate @@ -653,58 +844,7 @@ type_run(const char *type) timer_set_duration(&backer->next_expiration, delay); } - /* Check for port changes in the dpif. */ - while ((error = dpif_port_poll(backer->dpif, &devname)) == 0) { - struct ofproto_dpif *ofproto; - struct dpif_port port; - - /* Don't report on the datapath's device. */ - if (!strcmp(devname, dpif_base_name(backer->dpif))) { - goto next; - } - - HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, - &all_ofproto_dpifs) { - if (simap_contains(&ofproto->backer->tnl_backers, devname)) { - goto next; - } - } - - ofproto = lookup_ofproto_dpif_by_port_name(devname); - if (dpif_port_query_by_name(backer->dpif, devname, &port)) { - /* The port was removed. If we know the datapath, - * report it through poll_set(). If we don't, it may be - * notifying us of a removal we initiated, so ignore it. - * If there's a pending ENOBUFS, let it stand, since - * everything will be reevaluated. */ - if (ofproto && ofproto->port_poll_errno != ENOBUFS) { - sset_add(&ofproto->port_poll_set, devname); - ofproto->port_poll_errno = 0; - } - } else if (!ofproto) { - /* The port was added, but we don't know with which - * ofproto we should associate it. Delete it. */ - dpif_port_del(backer->dpif, port.port_no); - } - dpif_port_destroy(&port); - - next: - free(devname); - } - - if (error != EAGAIN) { - struct ofproto_dpif *ofproto; - - /* There was some sort of error, so propagate it to all - * ofprotos that use this backer. */ - HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, - &all_ofproto_dpifs) { - if (ofproto->backer == backer) { - sset_clear(&ofproto->port_poll_set); - ofproto->port_poll_errno = error; - } - } - } + process_dpif_port_changes(backer); if (backer->governor) { size_t n_subfacets; @@ -727,6 +867,133 @@ type_run(const char *type) return 0; } +/* Check for and handle port changes in 'backer''s dpif. */ +static void +process_dpif_port_changes(struct dpif_backer *backer) +{ + for (;;) { + char *devname; + int error; + + error = dpif_port_poll(backer->dpif, &devname); + switch (error) { + case EAGAIN: + return; + + case ENOBUFS: + process_dpif_all_ports_changed(backer); + break; + + case 0: + process_dpif_port_change(backer, devname); + free(devname); + break; + + default: + process_dpif_port_error(backer, error); + break; + } + } +} + +static void +process_dpif_all_ports_changed(struct dpif_backer *backer) +{ + struct ofproto_dpif *ofproto; + struct dpif_port dpif_port; + struct dpif_port_dump dump; + struct sset devnames; + const char *devname; + + sset_init(&devnames); + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + if (ofproto->backer == backer) { + struct ofport *ofport; + + HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) { + sset_add(&devnames, netdev_get_name(ofport->netdev)); + } + } + } + DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) { + sset_add(&devnames, dpif_port.name); + } + + SSET_FOR_EACH (devname, &devnames) { + process_dpif_port_change(backer, devname); + } + sset_destroy(&devnames); +} + +static void +process_dpif_port_change(struct dpif_backer *backer, const char *devname) +{ + struct ofproto_dpif *ofproto; + struct dpif_port port; + + /* Don't report on the datapath's device. */ + if (!strcmp(devname, dpif_base_name(backer->dpif))) { + return; + } + + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, + &all_ofproto_dpifs) { + if (simap_contains(&ofproto->backer->tnl_backers, devname)) { + return; + } + } + + ofproto = lookup_ofproto_dpif_by_port_name(devname); + if (dpif_port_query_by_name(backer->dpif, devname, &port)) { + /* The port was removed. If we know the datapath, + * report it through poll_set(). If we don't, it may be + * notifying us of a removal we initiated, so ignore it. + * If there's a pending ENOBUFS, let it stand, since + * everything will be reevaluated. */ + if (ofproto && ofproto->port_poll_errno != ENOBUFS) { + sset_add(&ofproto->port_poll_set, devname); + ofproto->port_poll_errno = 0; + } + } else if (!ofproto) { + /* The port was added, but we don't know with which + * ofproto we should associate it. Delete it. */ + dpif_port_del(backer->dpif, port.port_no); + } else { + struct ofport_dpif *ofport; + + ofport = ofport_dpif_cast(shash_find_data( + &ofproto->up.port_by_name, devname)); + if (ofport + && ofport->odp_port != port.port_no + && !odp_port_to_ofport(backer, port.port_no)) + { + /* 'ofport''s datapath port number has changed from + * 'ofport->odp_port' to 'port.port_no'. Update our internal data + * structures to match. */ + hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node); + ofport->odp_port = port.port_no; + hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node, + hash_odp_port(port.port_no)); + backer->need_revalidate = REV_RECONFIGURE; + } + } + dpif_port_destroy(&port); +} + +/* Propagate 'error' to all ofprotos based on 'backer'. */ +static void +process_dpif_port_error(struct dpif_backer *backer, int error) +{ + struct ofproto_dpif *ofproto; + + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + if (ofproto->backer == backer) { + sset_clear(&ofproto->port_poll_set); + ofproto->port_poll_errno = error; + } + } +} + static int dpif_backer_run_fast(struct dpif_backer *backer, int max_batch) { @@ -931,7 +1198,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) free(backer_name); if (error) { VLOG_ERR("failed to open datapath of type %s: %s", type, - strerror(error)); + ovs_strerror(error)); free(backer); return error; } @@ -978,7 +1245,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) error = dpif_recv_set(backer->dpif, backer->recv_set_enable); if (error) { VLOG_ERR("failed to listen on datapath of type %s: %s", - type, strerror(error)); + type, ovs_strerror(error)); close_dpif_backer(backer); return error; } @@ -1023,9 +1290,7 @@ construct(struct ofproto *ofproto_) ofproto->stp = NULL; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); - for (i = 0; i < MAX_MIRRORS; i++) { - ofproto->mirrors[i] = NULL; - } + ofproto->mbridge = mbridge_create(); ofproto->has_bonded_bundles = false; classifier_init(&ofproto->facets); @@ -1043,9 +1308,6 @@ construct(struct ofproto *ofproto_) ofproto_dpif_unixctl_init(); - ofproto->has_mirrors = false; - ofproto->has_bundle_action = false; - hmap_init(&ofproto->vlandev_map); hmap_init(&ofproto->realdev_vid_map); @@ -1097,6 +1359,7 @@ add_internal_flow(struct ofproto_dpif *ofproto, int id, fm.new_cookie = htonll(0); fm.cookie = htonll(0); fm.cookie_mask = htonll(0); + fm.modify_cookie = false; fm.table_id = TBL_INTERNAL; fm.command = OFPFC_ADD; fm.idle_timeout = 0; @@ -1174,9 +1437,10 @@ destruct(struct ofproto *ofproto_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct rule_dpif *rule, *next_rule; struct oftable *table; - int i; ofproto->backer->need_revalidate = REV_RECONFIGURE; + xlate_remove_ofproto(ofproto); + hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); complete_operations(ofproto); @@ -1189,14 +1453,12 @@ destruct(struct ofproto *ofproto_) } } - for (i = 0; i < MAX_MIRRORS; i++) { - mirror_destroy(ofproto->mirrors[i]); - } + mbridge_unref(ofproto->mbridge); netflow_destroy(ofproto->netflow); - dpif_sflow_destroy(ofproto->sflow); + dpif_sflow_unref(ofproto->sflow); hmap_destroy(&ofproto->bundles); - mac_learning_destroy(ofproto->ml); + mac_learning_unref(ofproto->ml); classifier_destroy(&ofproto->facets); @@ -1241,6 +1503,13 @@ run(struct ofproto *ofproto_) complete_operations(ofproto); } + if (mbridge_need_revalidate(ofproto->mbridge)) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); + } + /* Do not perform any periodic activity below required by 'ofproto' while * waiting for flow restore to complete. */ if (ofproto_get_flow_restore_wait()) { @@ -1269,7 +1538,11 @@ run(struct ofproto *ofproto_) } stp_run(ofproto); - mac_learning_run(ofproto->ml, &ofproto->backer->revalidate_set); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + if (mac_learning_run(ofproto->ml)) { + ofproto->backer->need_revalidate = REV_MAC_LEARNING; + } + ovs_rwlock_unlock(&ofproto->ml->rwlock); /* Check the consistency of a random facet, to aid debugging. */ if (time_msec() >= ofproto->consistency_rl @@ -1330,7 +1603,9 @@ wait(struct ofproto *ofproto_) if (ofproto->netflow) { netflow_wait(ofproto->netflow); } + ovs_rwlock_rdlock(&ofproto->ml->rwlock); mac_learning_wait(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); stp_wait(ofproto); if (ofproto->backer->need_revalidate) { /* Shouldn't happen, but if it does just go around again. */ @@ -1367,7 +1642,7 @@ flush(struct ofproto *ofproto_) n_batch = 0; HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node, &ofproto->backer->subfacets) { - if (ofproto_dpif_cast(subfacet->facet->rule->up.ofproto) != ofproto) { + if (subfacet->facet->ofproto != ofproto) { continue; } @@ -1458,7 +1733,7 @@ port_construct(struct ofport *port_) port->may_enable = true; port->stp_port = NULL; port->stp_state = STP_DISABLED; - port->tnl_port = NULL; + port->is_tunnel = false; port->peer = NULL; hmap_init(&port->priorities); port->realdev_ofp_port = 0; @@ -1487,7 +1762,8 @@ port_construct(struct ofport *port_) port->odp_port = dpif_port.port_no; if (netdev_get_tunnel_config(netdev)) { - port->tnl_port = tnl_port_add(&port->up, port->odp_port); + tnl_port_add(port, port->up.netdev, port->odp_port); + port->is_tunnel = true; } else { /* Sanity-check that a mapping doesn't already exist. This * shouldn't happen for non-tunnel ports. */ @@ -1499,7 +1775,7 @@ port_construct(struct ofport *port_) } hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node, - hash_int(odp_to_u32(port->odp_port), 0)); + hash_odp_port(port->odp_port)); } dpif_port_destroy(&dpif_port); @@ -1519,6 +1795,9 @@ port_destruct(struct ofport *port_) char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port_name; + ofproto->backer->need_revalidate = REV_RECONFIGURE; + xlate_ofport_remove(port); + dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, sizeof namebuf); if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { @@ -1526,10 +1805,9 @@ port_destruct(struct ofport *port_) * happens when the ofproto is being destroyed, since the caller * assumes that removal of attached ports will happen as part of * destruction. */ - if (!port->tnl_port) { + if (!port->is_tunnel) { dpif_port_del(ofproto->backer->dpif, port->odp_port); } - ofproto->backer->need_revalidate = REV_RECONFIGURE; } if (port->peer) { @@ -1537,14 +1815,13 @@ port_destruct(struct ofport *port_) port->peer = NULL; } - if (port->odp_port != ODPP_NONE && !port->tnl_port) { + if (port->odp_port != ODPP_NONE && !port->is_tunnel) { hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node); } - tnl_port_del(port->tnl_port); + tnl_port_del(port); sset_find_and_delete(&ofproto->ports, devname); sset_find_and_delete(&ofproto->ghost_ports, devname); - ofproto->backer->need_revalidate = REV_RECONFIGURE; bundle_remove(port_); set_cfm(port_, NULL); set_bfd(port_, NULL); @@ -1569,9 +1846,10 @@ port_modified(struct ofport *port_) cfm_set_netdev(port->cfm, port->up.netdev); } - if (port->tnl_port && tnl_port_reconfigure(&port->up, port->odp_port, - &port->tnl_port)) { - ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate = true; + if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev, + port->odp_port)) { + ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate = + REV_RECONFIGURE; } ofport_update_peer(port); @@ -1615,7 +1893,7 @@ set_sflow(struct ofproto *ofproto_, dpif_sflow_set_options(ds, sflow_options); } else { if (ds) { - dpif_sflow_destroy(ds); + dpif_sflow_unref(ds); ofproto->backer->need_revalidate = REV_RECONFIGURE; ofproto->sflow = NULL; } @@ -1642,7 +1920,7 @@ set_ipfix( n_flow_exporters_options); } else { if (di) { - dpif_ipfix_destroy(di); + dpif_ipfix_unref(di); ofproto->ipfix = NULL; } } @@ -1672,7 +1950,7 @@ set_cfm(struct ofport *ofport_, const struct cfm_settings *s) error = EINVAL; } - cfm_destroy(ofport->cfm); + cfm_unref(ofport->cfm); ofport->cfm = NULL; return error; } @@ -1780,7 +2058,7 @@ set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s) set_stp_port(ofport, NULL); } - stp_destroy(ofproto->stp); + stp_unref(ofproto->stp); ofproto->stp = NULL; } @@ -1826,8 +2104,9 @@ update_stp_port_state(struct ofport_dpif *ofport) if (stp_learn_in_state(ofport->stp_state) != stp_learn_in_state(state)) { /* xxx Learning action flows should also be flushed. */ - mac_learning_flush(ofproto->ml, - &ofproto->backer->revalidate_set); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); } fwd_change = stp_forward_in_state(ofport->stp_state) != stp_forward_in_state(state); @@ -1932,7 +2211,9 @@ stp_run(struct ofproto_dpif *ofproto) } if (stp_check_and_reset_fdb_flush(ofproto->stp)) { - mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); } } } @@ -2089,6 +2370,7 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) struct mac_entry *mac, *next_mac; ofproto->backer->need_revalidate = REV_RECONFIGURE; + ovs_rwlock_wrlock(&ml->rwlock); LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { if (mac->port.p == bundle) { if (all_ofprotos) { @@ -2098,11 +2380,12 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) if (o != ofproto) { struct mac_entry *e; - e = mac_learning_lookup(o->ml, mac->mac, mac->vlan, - NULL); + ovs_rwlock_wrlock(&o->ml->rwlock); + e = mac_learning_lookup(o->ml, mac->mac, mac->vlan); if (e) { mac_learning_expire(o->ml, e); } + ovs_rwlock_unlock(&o->ml->rwlock); } } } @@ -2110,6 +2393,7 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) mac_learning_expire(ml, mac); } } + ovs_rwlock_unlock(&ml->rwlock); } static struct ofbundle * @@ -2126,24 +2410,6 @@ bundle_lookup(const struct ofproto_dpif *ofproto, void *aux) return NULL; } -/* Looks up each of the 'n_auxes' pointers in 'auxes' as bundles and adds the - * ones that are found to 'bundles'. */ -static void -bundle_lookup_multiple(struct ofproto_dpif *ofproto, - void **auxes, size_t n_auxes, - struct hmapx *bundles) -{ - size_t i; - - hmapx_init(bundles); - for (i = 0; i < n_auxes; i++) { - struct ofbundle *bundle = bundle_lookup(ofproto, auxes[i]); - if (bundle) { - hmapx_add(bundles, bundle); - } - } -} - static void bundle_update(struct ofbundle *bundle) { @@ -2216,24 +2482,15 @@ bundle_destroy(struct ofbundle *bundle) { struct ofproto_dpif *ofproto; struct ofport_dpif *port, *next_port; - int i; if (!bundle) { return; } ofproto = bundle->ofproto; - for (i = 0; i < MAX_MIRRORS; i++) { - struct ofmirror *m = ofproto->mirrors[i]; - if (m) { - if (m->out == bundle) { - mirror_destroy(m); - } else if (hmapx_find_and_delete(&m->srcs, bundle) - || hmapx_find_and_delete(&m->dsts, bundle)) { - ofproto->backer->need_revalidate = REV_RECONFIGURE; - } - } - } + mbridge_unregister_bundle(ofproto->mbridge, bundle->aux); + + xlate_bundle_remove(bundle); LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { bundle_del_port(port); @@ -2243,8 +2500,8 @@ bundle_destroy(struct ofbundle *bundle) hmap_remove(&ofproto->bundles, &bundle->hmap_node); free(bundle->name); free(bundle->trunks); - lacp_destroy(bundle->lacp); - bond_destroy(bundle->bond); + lacp_unref(bundle->lacp); + bond_unref(bundle->bond); free(bundle); } @@ -2288,10 +2545,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, bundle->bond = NULL; bundle->floodable = true; - - bundle->src_mirrors = 0; - bundle->dst_mirrors = 0; - bundle->mirror_out = 0; + mbridge_register_bundle(ofproto->mbridge, bundle); } if (!bundle->name || strcmp(s->name, bundle->name)) { @@ -2307,7 +2561,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, } lacp_configure(bundle->lacp, s->lacp); } else { - lacp_destroy(bundle->lacp); + lacp_unref(bundle->lacp); bundle->lacp = NULL; } @@ -2418,7 +2672,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, bond_slave_register(bundle->bond, port, port->up.netdev); } } else { - bond_destroy(bundle->bond); + bond_unref(bundle->bond); bundle->bond = NULL; } @@ -2442,7 +2696,7 @@ bundle_remove(struct ofport *port_) if (list_is_empty(&bundle->ports)) { bundle_destroy(bundle); } else if (list_is_short(&bundle->ports)) { - bond_destroy(bundle->bond); + bond_unref(bundle->bond); bundle->bond = NULL; } } @@ -2471,7 +2725,7 @@ send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) } else { VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface " "%s (%s)", port->bundle->name, - netdev_get_name(port->up.netdev), strerror(error)); + netdev_get_name(port->up.netdev), ovs_strerror(error)); } } @@ -2483,6 +2737,7 @@ bundle_send_learning_packets(struct ofbundle *bundle) struct mac_entry *e; error = n_packets = n_errors = 0; + ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { if (e->port.p != bundle) { struct ofpbuf *learning_packet; @@ -2505,12 +2760,13 @@ bundle_send_learning_packets(struct ofbundle *bundle) n_packets++; } } + ovs_rwlock_unlock(&ofproto->ml->rwlock); if (n_errors) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning " "packets, last error was: %s", - bundle->name, n_errors, n_packets, strerror(error)); + bundle->name, n_errors, n_packets, ovs_strerror(error)); } else { VLOG_DBG("bond %s: sent %d gratuitous learning packets", bundle->name, n_packets); @@ -2530,8 +2786,10 @@ bundle_run(struct ofbundle *bundle) bond_slave_set_may_enable(bundle->bond, port, port->may_enable); } - bond_run(bundle->bond, &bundle->ofproto->backer->revalidate_set, - lacp_status(bundle->lacp)); + if (bond_run(bundle->bond, lacp_status(bundle->lacp))) { + bundle->ofproto->backer->need_revalidate = REV_BOND; + } + if (bond_should_send_learning_packets(bundle->bond)) { bundle_send_learning_packets(bundle); } @@ -2552,247 +2810,56 @@ bundle_wait(struct ofbundle *bundle) /* Mirrors. */ static int -mirror_scan(struct ofproto_dpif *ofproto) -{ - int idx; - - for (idx = 0; idx < MAX_MIRRORS; idx++) { - if (!ofproto->mirrors[idx]) { - return idx; - } - } - return -1; -} - -static struct ofmirror * -mirror_lookup(struct ofproto_dpif *ofproto, void *aux) -{ - int i; - - for (i = 0; i < MAX_MIRRORS; i++) { - struct ofmirror *mirror = ofproto->mirrors[i]; - if (mirror && mirror->aux == aux) { - return mirror; - } - } - - return NULL; -} - -/* Update the 'dup_mirrors' member of each of the ofmirrors in 'ofproto'. */ -static void -mirror_update_dups(struct ofproto_dpif *ofproto) -{ - int i; - - for (i = 0; i < MAX_MIRRORS; i++) { - struct ofmirror *m = ofproto->mirrors[i]; - - if (m) { - m->dup_mirrors = MIRROR_MASK_C(1) << i; - } - } - - for (i = 0; i < MAX_MIRRORS; i++) { - struct ofmirror *m1 = ofproto->mirrors[i]; - int j; - - if (!m1) { - continue; - } - - for (j = i + 1; j < MAX_MIRRORS; j++) { - struct ofmirror *m2 = ofproto->mirrors[j]; - - if (m2 && m1->out == m2->out && m1->out_vlan == m2->out_vlan) { - m1->dup_mirrors |= MIRROR_MASK_C(1) << j; - m2->dup_mirrors |= m1->dup_mirrors; - } - } - } -} - -static int -mirror_set(struct ofproto *ofproto_, void *aux, - const struct ofproto_mirror_settings *s) +mirror_set__(struct ofproto *ofproto_, void *aux, + const struct ofproto_mirror_settings *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - mirror_mask_t mirror_bit; - struct ofbundle *bundle; - struct ofmirror *mirror; - struct ofbundle *out; - struct hmapx srcs; /* Contains "struct ofbundle *"s. */ - struct hmapx dsts; /* Contains "struct ofbundle *"s. */ - int out_vlan; + struct ofbundle **srcs, **dsts; + int error; + size_t i; - mirror = mirror_lookup(ofproto, aux); if (!s) { - mirror_destroy(mirror); + mirror_destroy(ofproto->mbridge, aux); return 0; } - if (!mirror) { - int idx; - - idx = mirror_scan(ofproto); - if (idx < 0) { - VLOG_WARN("bridge %s: maximum of %d port mirrors reached, " - "cannot create %s", - ofproto->up.name, MAX_MIRRORS, s->name); - return EFBIG; - } - mirror = ofproto->mirrors[idx] = xzalloc(sizeof *mirror); - mirror->ofproto = ofproto; - mirror->idx = idx; - mirror->aux = aux; - mirror->out_vlan = -1; - mirror->name = NULL; - } + srcs = xmalloc(s->n_srcs * sizeof *srcs); + dsts = xmalloc(s->n_dsts * sizeof *dsts); - if (!mirror->name || strcmp(s->name, mirror->name)) { - free(mirror->name); - mirror->name = xstrdup(s->name); + for (i = 0; i < s->n_srcs; i++) { + srcs[i] = bundle_lookup(ofproto, s->srcs[i]); } - /* Get the new configuration. */ - if (s->out_bundle) { - out = bundle_lookup(ofproto, s->out_bundle); - if (!out) { - mirror_destroy(mirror); - return EINVAL; - } - out_vlan = -1; - } else { - out = NULL; - out_vlan = s->out_vlan; - } - bundle_lookup_multiple(ofproto, s->srcs, s->n_srcs, &srcs); - bundle_lookup_multiple(ofproto, s->dsts, s->n_dsts, &dsts); - - /* If the configuration has not changed, do nothing. */ - if (hmapx_equals(&srcs, &mirror->srcs) - && hmapx_equals(&dsts, &mirror->dsts) - && vlan_bitmap_equal(mirror->vlans, s->src_vlans) - && mirror->out == out - && mirror->out_vlan == out_vlan) - { - hmapx_destroy(&srcs); - hmapx_destroy(&dsts); - return 0; + for (i = 0; i < s->n_dsts; i++) { + dsts[i] = bundle_lookup(ofproto, s->dsts[i]); } - hmapx_swap(&srcs, &mirror->srcs); - hmapx_destroy(&srcs); - - hmapx_swap(&dsts, &mirror->dsts); - hmapx_destroy(&dsts); - - free(mirror->vlans); - mirror->vlans = vlan_bitmap_clone(s->src_vlans); - - mirror->out = out; - mirror->out_vlan = out_vlan; - - /* Update bundles. */ - mirror_bit = MIRROR_MASK_C(1) << mirror->idx; - HMAP_FOR_EACH (bundle, hmap_node, &mirror->ofproto->bundles) { - if (hmapx_contains(&mirror->srcs, bundle)) { - bundle->src_mirrors |= mirror_bit; - } else { - bundle->src_mirrors &= ~mirror_bit; - } - - if (hmapx_contains(&mirror->dsts, bundle)) { - bundle->dst_mirrors |= mirror_bit; - } else { - bundle->dst_mirrors &= ~mirror_bit; - } - - if (mirror->out == bundle) { - bundle->mirror_out |= mirror_bit; - } else { - bundle->mirror_out &= ~mirror_bit; - } - } - - ofproto->backer->need_revalidate = REV_RECONFIGURE; - ofproto->has_mirrors = true; - mac_learning_flush(ofproto->ml, - &ofproto->backer->revalidate_set); - mirror_update_dups(ofproto); - - return 0; -} - -static void -mirror_destroy(struct ofmirror *mirror) -{ - struct ofproto_dpif *ofproto; - mirror_mask_t mirror_bit; - struct ofbundle *bundle; - int i; - - if (!mirror) { - return; - } - - ofproto = mirror->ofproto; - ofproto->backer->need_revalidate = REV_RECONFIGURE; - mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); - - mirror_bit = MIRROR_MASK_C(1) << mirror->idx; - HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { - bundle->src_mirrors &= ~mirror_bit; - bundle->dst_mirrors &= ~mirror_bit; - bundle->mirror_out &= ~mirror_bit; - } - - hmapx_destroy(&mirror->srcs); - hmapx_destroy(&mirror->dsts); - free(mirror->vlans); - - ofproto->mirrors[mirror->idx] = NULL; - free(mirror->name); - free(mirror); - - mirror_update_dups(ofproto); - - ofproto->has_mirrors = false; - for (i = 0; i < MAX_MIRRORS; i++) { - if (ofproto->mirrors[i]) { - ofproto->has_mirrors = true; - break; - } - } + error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts, + s->n_dsts, s->src_vlans, + bundle_lookup(ofproto, s->out_bundle), s->out_vlan); + free(srcs); + free(dsts); + return error; } static int -mirror_get_stats(struct ofproto *ofproto_, void *aux, - uint64_t *packets, uint64_t *bytes) +mirror_get_stats__(struct ofproto *ofproto, void *aux, + uint64_t *packets, uint64_t *bytes) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct ofmirror *mirror = mirror_lookup(ofproto, aux); - - if (!mirror) { - *packets = *bytes = UINT64_MAX; - return 0; - } - push_all_stats(); - - *packets = mirror->packet_count; - *bytes = mirror->byte_count; - - return 0; + return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets, + bytes); } static int set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) { - mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + mac_learning_flush(ofproto->ml); } + ovs_rwlock_unlock(&ofproto->ml->rwlock); return 0; } @@ -2801,7 +2868,7 @@ is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofbundle *bundle = bundle_lookup(ofproto, aux); - return bundle && bundle->mirror_out != 0; + return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0; } static void @@ -2816,20 +2883,22 @@ set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time, size_t max_entries) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_set_idle_time(ofproto->ml, idle_time); mac_learning_set_max_entries(ofproto->ml, max_entries); + ovs_rwlock_unlock(&ofproto->ml->rwlock); } /* Ports. */ -struct ofport_dpif * +static struct ofport_dpif * get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port); return ofport ? ofport_dpif_cast(ofport) : NULL; } -struct ofport_dpif * +static struct ofport_dpif * get_odp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) { struct ofport_dpif *port = odp_port_to_ofport(ofproto->backer, odp_port); @@ -2858,7 +2927,7 @@ ofport_update_peer(struct ofport_dpif *ofport) } backer = ofproto_dpif_cast(ofport->up.ofproto)->backer; - backer->need_revalidate = true; + backer->need_revalidate = REV_RECONFIGURE; if (ofport->peer) { ofport->peer->peer = NULL; @@ -2954,10 +3023,7 @@ port_run(struct ofport_dpif *ofport) if (ofport->may_enable != enable) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); - - if (ofproto->has_bundle_action) { - ofproto->backer->need_revalidate = REV_PORT_TOGGLED; - } + ofproto->backer->need_revalidate = REV_PORT_TOGGLED; } ofport->may_enable = enable; @@ -3062,7 +3128,7 @@ port_del(struct ofproto *ofproto_, ofp_port_t ofp_port) sset_find_and_delete(&ofproto->ghost_ports, netdev_get_name(ofport->up.netdev)); ofproto->backer->need_revalidate = REV_RECONFIGURE; - if (!ofport->tnl_port) { + if (!ofport->is_tunnel) { error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port); if (!error) { /* The caller is going to close ofport->up.netdev. If this is a @@ -3245,6 +3311,13 @@ struct flow_miss_op { uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */ struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ + + struct ofpbuf mask; /* Flow mask for "put" ops. */ + struct odputil_keybuf maskbuf; + + /* If this is a "put" op, then a pointer to the subfacet that should + * be marked as uninstalled if the operation fails. */ + struct subfacet *subfacet; }; /* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each @@ -3307,22 +3380,22 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet, eth_pop_vlan(packet); } + op->subfacet = NULL; op->xout_garbage = false; op->dpif_op.type = DPIF_OP_EXECUTE; op->dpif_op.u.execute.key = miss->key; op->dpif_op.u.execute.key_len = miss->key_len; op->dpif_op.u.execute.packet = packet; + ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf); } /* Helper for handle_flow_miss_without_facet() and * handle_flow_miss_with_facet(). */ static void -handle_flow_miss_common(struct rule_dpif *rule, - struct ofpbuf *packet, const struct flow *flow) +handle_flow_miss_common(struct ofproto_dpif *ofproto, struct ofpbuf *packet, + const struct flow *flow, bool fail_open) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - - if (rule->up.cr.priority == FAIL_OPEN_PRIORITY) { + if (fail_open) { /* * Extra-special case for fail-open mode. * @@ -3350,6 +3423,15 @@ flow_miss_should_make_facet(struct flow_miss *miss, struct flow_wildcards *wc) struct dpif_backer *backer = miss->ofproto->backer; uint32_t hash; + switch (flow_miss_model) { + case OFPROTO_HANDLE_MISS_AUTO: + break; + case OFPROTO_HANDLE_MISS_WITH_FACETS: + return true; + case OFPROTO_HANDLE_MISS_WITHOUT_FACETS: + return false; + } + if (!backer->governor) { size_t n_subfacets; @@ -3380,7 +3462,8 @@ handle_flow_miss_without_facet(struct rule_dpif *rule, struct xlate_out *xout, COVERAGE_INC(facet_suppress); - handle_flow_miss_common(rule, packet, &miss->flow); + handle_flow_miss_common(miss->ofproto, packet, &miss->flow, + rule->up.cr.priority == FAIL_OPEN_PRIORITY); if (xout->slow) { struct xlate_in xin; @@ -3420,26 +3503,24 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, long long int now, struct dpif_flow_stats *stats, struct flow_miss_op *ops, size_t *n_ops) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); enum subfacet_path want_path; struct subfacet *subfacet; struct ofpbuf *packet; - subfacet = subfacet_create(facet, miss, now); want_path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH; - if (stats) { - subfacet_update_stats(subfacet, stats); - } LIST_FOR_EACH (packet, list_node, &miss->packets) { struct flow_miss_op *op = &ops[*n_ops]; - handle_flow_miss_common(facet->rule, packet, &miss->flow); + handle_flow_miss_common(miss->ofproto, packet, &miss->flow, + facet->fail_open); if (want_path != SF_FAST_PATH) { + struct rule_dpif *rule; struct xlate_in xin; - xlate_in_init(&xin, ofproto, &miss->flow, facet->rule, 0, packet); + rule = rule_dpif_lookup(facet->ofproto, &facet->flow, NULL); + xlate_in_init(&xin, facet->ofproto, &miss->flow, rule, 0, packet); xlate_actions_for_side_effects(&xin); } @@ -3453,24 +3534,53 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, } } + /* Don't install the flow if it's the result of the "userspace" + * action for an already installed facet. This can occur when a + * datapath flow with wildcards has a "userspace" action and flows + * sent to userspace result in a different subfacet, which will then + * be rejected as overlapping by the datapath. */ + if (miss->upcall_type == DPIF_UC_ACTION + && !list_is_empty(&facet->subfacets)) { + if (stats) { + facet->used = MAX(facet->used, stats->used); + facet->packet_count += stats->n_packets; + facet->byte_count += stats->n_bytes; + facet->tcp_flags |= stats->tcp_flags; + } + return; + } + + subfacet = subfacet_create(facet, miss, now); + if (stats) { + subfacet_update_stats(subfacet, stats); + } + if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) { struct flow_miss_op *op = &ops[(*n_ops)++]; struct dpif_flow_put *put = &op->dpif_op.u.flow_put; subfacet->path = want_path; + ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf); + if (enable_megaflows) { + odp_flow_key_from_mask(&op->mask, &facet->xout.wc.masks, + &miss->flow, UINT32_MAX); + } + op->xout_garbage = false; op->dpif_op.type = DPIF_OP_FLOW_PUT; - put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; + op->subfacet = subfacet; + put->flags = DPIF_FP_CREATE; put->key = miss->key; put->key_len = miss->key_len; - put->mask = NULL; - put->mask_len = 0; + put->mask = op->mask.data; + put->mask_len = op->mask.size; + if (want_path == SF_FAST_PATH) { put->actions = facet->xout.odp_actions.data; put->actions_len = facet->xout.odp_actions.size; } else { - compose_slow_path(ofproto, &miss->flow, facet->xout.slow, + compose_slow_path(facet->ofproto, &miss->flow, facet->xout.slow, op->slow_stub, sizeof op->slow_stub, &put->actions, &put->actions_len); } @@ -3565,8 +3675,8 @@ drop_key_clear(struct dpif_backer *backer) if (error && !VLOG_DROP_WARN(&rl)) { struct ds ds = DS_EMPTY_INITIALIZER; odp_flow_key_format(drop_key->key, drop_key->key_len, &ds); - VLOG_WARN("Failed to delete drop key (%s) (%s)", strerror(error), - ds_cstr(&ds)); + VLOG_WARN("Failed to delete drop key (%s) (%s)", + ovs_strerror(error), ds_cstr(&ds)); ds_destroy(&ds); } @@ -3620,7 +3730,7 @@ ofproto_receive(const struct dpif_backer *backer, struct ofpbuf *packet, } port = (tnl_port_should_receive(flow) - ? ofport_dpif_cast(tnl_port_receive(flow)) + ? tnl_port_receive(flow) : odp_port_to_ofport(backer, flow->in_port.odp_port)); flow->in_port.ofp_port = port ? port->up.ofp_port : OFPP_NONE; if (!port) { @@ -3719,15 +3829,20 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls, drop_key = drop_key_lookup(backer, upcall->key, upcall->key_len); if (!drop_key) { - drop_key = xmalloc(sizeof *drop_key); - drop_key->key = xmemdup(upcall->key, upcall->key_len); - drop_key->key_len = upcall->key_len; - - hmap_insert(&backer->drop_keys, &drop_key->hmap_node, - hash_bytes(drop_key->key, drop_key->key_len, 0)); - dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, - drop_key->key, drop_key->key_len, - NULL, 0, NULL, 0, NULL); + int ret; + ret = dpif_flow_put(backer->dpif, + DPIF_FP_CREATE | DPIF_FP_MODIFY, + upcall->key, upcall->key_len, + NULL, 0, NULL, 0, NULL); + + if (!ret) { + drop_key = xmalloc(sizeof *drop_key); + drop_key->key = xmemdup(upcall->key, upcall->key_len); + drop_key->key_len = upcall->key_len; + + hmap_insert(&backer->drop_keys, &drop_key->hmap_node, + hash_bytes(drop_key->key, drop_key->key_len, 0)); + } } continue; } @@ -3771,8 +3886,30 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls, } dpif_operate(backer->dpif, dpif_ops, n_ops); - /* Free memory. */ for (i = 0; i < n_ops; i++) { + if (dpif_ops[i]->error != 0 + && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT + && flow_miss_ops[i].subfacet) { + struct subfacet *subfacet = flow_miss_ops[i].subfacet; + + COVERAGE_INC(subfacet_install_fail); + + /* Zero-out subfacet counters when installation failed, but + * datapath reported hits. This should not happen and + * indicates a bug, since if the datapath flow exists, we + * should not be attempting to create a new subfacet. A + * buggy datapath could trigger this, so just zero out the + * counters and log an error. */ + if (subfacet->dp_packet_count || subfacet->dp_byte_count) { + VLOG_ERR_RL(&rl, "failed to install subfacet for which " + "datapath reported hits"); + subfacet->dp_packet_count = subfacet->dp_byte_count = 0; + } + + subfacet->path = SF_NOT_INSTALLED; + } + + /* Free memory. */ if (flow_miss_ops[i].xout_garbage) { xlate_out_uninit(&flow_miss_ops[i].xout); } @@ -4031,7 +4168,7 @@ expire(struct dpif_backer *backer) HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { if (bundle->bond) { - bond_rebalance(bundle->bond, &backer->revalidate_set); + bond_rebalance(bundle->bond); } } } @@ -4047,7 +4184,6 @@ update_subfacet_stats(struct subfacet *subfacet, const struct dpif_flow_stats *stats) { struct facet *facet = subfacet->facet; - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); struct dpif_flow_stats diff; diff.tcp_flags = stats->tcp_flags; @@ -4067,7 +4203,7 @@ update_subfacet_stats(struct subfacet *subfacet, diff.n_bytes = 0; } - ofproto->n_hit += diff.n_packets; + facet->ofproto->n_hit += diff.n_packets; subfacet->dp_packet_count = stats->n_packets; subfacet->dp_byte_count = stats->n_bytes; subfacet_update_stats(subfacet, &diff); @@ -4119,12 +4255,12 @@ update_stats(struct dpif_backer *backer) { const struct dpif_flow_stats *stats; struct dpif_flow_dump dump; - const struct nlattr *key; - size_t key_len; + const struct nlattr *key, *mask; + size_t key_len, mask_len; dpif_flow_dump_start(&dump, backer->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, - NULL, NULL, NULL, NULL, &stats)) { + &mask, &mask_len, NULL, NULL, &stats)) { struct subfacet *subfacet; uint32_t key_hash; @@ -4285,7 +4421,6 @@ expire_subfacets(struct dpif_backer *backer, int dp_max_idle) static void rule_expire(struct rule_dpif *rule) { - struct facet *facet, *next_facet; long long int now; uint8_t reason; @@ -4308,12 +4443,6 @@ rule_expire(struct rule_dpif *rule) COVERAGE_INC(ofproto_dpif_expired); - /* Update stats. (This is a no-op if the rule expired due to an idle - * timeout, because that only happens when the rule has no facets left.) */ - LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) { - facet_remove(facet); - } - /* Get rid of the rule. */ ofproto_rule_expire(&rule->up, reason); } @@ -4340,15 +4469,14 @@ facet_create(const struct flow_miss *miss, struct rule_dpif *rule, struct match match; facet = xzalloc(sizeof *facet); + facet->ofproto = miss->ofproto; facet->packet_count = facet->prev_packet_count = stats->n_packets; facet->byte_count = facet->prev_byte_count = stats->n_bytes; facet->tcp_flags = stats->tcp_flags; facet->used = stats->used; facet->flow = miss->flow; facet->learn_rl = time_msec() + 500; - facet->rule = rule; - list_push_back(&facet->rule->facets, &facet->list_node); list_init(&facet->subfacets); netflow_flow_init(&facet->nf_flow); netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used); @@ -4360,6 +4488,7 @@ facet_create(const struct flow_miss *miss, struct rule_dpif *rule, classifier_insert(&ofproto->facets, &facet->cr); facet->nf_flow.output_iface = facet->xout.nf_output_iface; + facet->fail_open = rule->up.cr.priority == FAIL_OPEN_PRIORITY; return facet; } @@ -4403,7 +4532,6 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, static void facet_remove(struct facet *facet) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); struct subfacet *subfacet, *next_subfacet; ovs_assert(!list_is_empty(&facet->subfacets)); @@ -4425,9 +4553,8 @@ facet_remove(struct facet *facet) &facet->subfacets) { subfacet_destroy__(subfacet); } - classifier_remove(&ofproto->facets, &facet->cr); + classifier_remove(&facet->ofproto->facets, &facet->cr); cls_rule_destroy(&facet->cr); - list_remove(&facet->list_node); facet_free(facet); } @@ -4457,13 +4584,12 @@ facet_learn(struct facet *facet) static void facet_account(struct facet *facet) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); const struct nlattr *a; unsigned int left; ovs_be16 vlan_tci; uint64_t n_bytes; - if (!facet->xout.has_normal || !ofproto->has_bonded_bundles) { + if (!facet->xout.has_normal || !facet->ofproto->has_bonded_bundles) { return; } n_bytes = facet->byte_count - facet->accounted_bytes; @@ -4484,7 +4610,7 @@ facet_account(struct facet *facet) switch (nl_attr_type(a)) { case OVS_ACTION_ATTR_OUTPUT: - port = get_odp_port(ofproto, nl_attr_get_odp_port(a)); + port = get_odp_port(facet->ofproto, nl_attr_get_odp_port(a)); if (port && port->bundle && port->bundle->bond) { bond_account(port->bundle->bond, &facet->flow, vlan_tci_to_vid(vlan_tci), n_bytes); @@ -4510,9 +4636,11 @@ static bool facet_is_controller_flow(struct facet *facet) { if (facet) { - const struct rule *rule = &facet->rule->up; - const struct ofpact *ofpacts = rule->ofpacts; - size_t ofpacts_len = rule->ofpacts_len; + struct ofproto_dpif *ofproto = facet->ofproto; + const struct rule_dpif *rule = rule_dpif_lookup(ofproto, &facet->flow, + NULL); + const struct ofpact *ofpacts = rule->up.ofpacts; + size_t ofpacts_len = rule->up.ofpacts_len; if (ofpacts_len > 0 && ofpacts->type == OFPACT_CONTROLLER && @@ -4530,7 +4658,7 @@ facet_is_controller_flow(struct facet *facet) static void facet_flush_stats(struct facet *facet) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); + struct ofproto_dpif *ofproto = facet->ofproto; struct subfacet *subfacet; LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { @@ -4599,41 +4727,21 @@ facet_check_consistency(struct facet *facet) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); - struct xlate_out xout; struct xlate_in xin; struct rule_dpif *rule; - bool ok; - - /* Check the rule for consistency. */ - rule = rule_dpif_lookup(ofproto, &facet->flow, NULL); - if (rule != facet->rule) { - if (!VLOG_DROP_WARN(&rl)) { - struct ds s = DS_EMPTY_INITIALIZER; - - flow_format(&s, &facet->flow); - ds_put_format(&s, ": facet associated with wrong rule (was " - "table=%"PRIu8",", facet->rule->up.table_id); - cls_rule_format(&facet->rule->up.cr, &s); - ds_put_format(&s, ") (should have been table=%"PRIu8",", - rule->up.table_id); - cls_rule_format(&rule->up.cr, &s); - ds_put_char(&s, ')'); - - VLOG_WARN("%s", ds_cstr(&s)); - ds_destroy(&s); - } - return false; - } + bool ok, fail_open; /* Check the datapath actions for consistency. */ - xlate_in_init(&xin, ofproto, &facet->flow, rule, 0, NULL); + rule = rule_dpif_lookup(facet->ofproto, &facet->flow, NULL); + xlate_in_init(&xin, facet->ofproto, &facet->flow, rule, 0, NULL); xlate_actions(&xin, &xout); + fail_open = rule->up.cr.priority == FAIL_OPEN_PRIORITY; ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions) - && facet->xout.slow == xout.slow; + && facet->xout.slow == xout.slow + && facet->fail_open == fail_open; if (!ok && !VLOG_DROP_WARN(&rl)) { struct ds s = DS_EMPTY_INITIALIZER; @@ -4654,7 +4762,10 @@ facet_check_consistency(struct facet *facet) ds_put_format(&s, " slow path incorrect. should be %d", xout.slow); } - VLOG_WARN("%s", ds_cstr(&s)); + if (facet->fail_open != fail_open) { + ds_put_format(&s, " fail open incorrect. should be %s", + fail_open ? "true" : "false"); + } ds_destroy(&s); } xlate_out_uninit(&xout); @@ -4677,7 +4788,7 @@ facet_check_consistency(struct facet *facet) static bool facet_revalidate(struct facet *facet) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); + struct ofproto_dpif *ofproto = facet->ofproto; struct rule_dpif *new_rule; struct subfacet *subfacet; struct flow_wildcards wc; @@ -4758,15 +4869,8 @@ facet_revalidate(struct facet *facet) facet->xout.nf_output_iface = xout.nf_output_iface; facet->xout.mirrors = xout.mirrors; facet->nf_flow.output_iface = facet->xout.nf_output_iface; - - if (facet->rule != new_rule) { - COVERAGE_INC(facet_changed_rule); - list_remove(&facet->list_node); - list_push_back(&new_rule->facets, &facet->list_node); - facet->rule = new_rule; - facet->used = new_rule->up.created; - facet->prev_used = facet->used; - } + facet->used = MAX(facet->used, new_rule->up.created); + facet->fail_open = new_rule->up.cr.priority == FAIL_OPEN_PRIORITY; xlate_out_uninit(&xout); return true; @@ -4797,10 +4901,9 @@ facet_push_stats(struct facet *facet, bool may_learn) stats.tcp_flags = facet->tcp_flags; if (may_learn || stats.n_packets || facet->used > facet->prev_used) { - struct ofproto_dpif *ofproto = - ofproto_dpif_cast(facet->rule->up.ofproto); - + struct ofproto_dpif *ofproto = facet->ofproto; struct ofport_dpif *in_port; + struct rule_dpif *rule; struct xlate_in xin; facet->prev_packet_count = facet->packet_count; @@ -4808,19 +4911,20 @@ facet_push_stats(struct facet *facet, bool may_learn) facet->prev_used = facet->used; in_port = get_ofp_port(ofproto, facet->flow.in_port.ofp_port); - if (in_port && in_port->tnl_port) { + if (in_port && in_port->is_tunnel) { netdev_vport_inc_rx(in_port->up.netdev, &stats); } - rule_credit_stats(facet->rule, &stats); + rule = rule_dpif_lookup(ofproto, &facet->flow, NULL); + rule_credit_stats(rule, &stats); netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used); netflow_flow_update_flags(&facet->nf_flow, facet->tcp_flags); - update_mirror_stats(ofproto, facet->xout.mirrors, stats.n_packets, - stats.n_bytes); + mirror_update_stats(ofproto->mbridge, facet->xout.mirrors, + stats.n_packets, stats.n_bytes); - xlate_in_init(&xin, ofproto, &facet->flow, facet->rule, - stats.tcp_flags, NULL); + xlate_in_init(&xin, ofproto, &facet->flow, rule, stats.tcp_flags, + NULL); xin.resubmit_stats = &stats; xin.may_learn = may_learn; xlate_actions_for_side_effects(&xin); @@ -4943,7 +5047,7 @@ static void subfacet_destroy__(struct subfacet *subfacet) { struct facet *facet = subfacet->facet; - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); + struct ofproto_dpif *ofproto = facet->ofproto; /* Update ofproto stats before uninstall the subfacet. */ ofproto->backer->subfacet_del_count++; @@ -5009,35 +5113,45 @@ subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions, struct dpif_flow_stats *stats) { struct facet *facet = subfacet->facet; - struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); enum subfacet_path path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH; const struct nlattr *actions = odp_actions->data; size_t actions_len = odp_actions->size; + struct odputil_keybuf maskbuf; + struct ofpbuf mask; uint64_t slow_path_stub[128 / 8]; enum dpif_flow_put_flags flags; int ret; - flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; + flags = subfacet->path == SF_NOT_INSTALLED ? DPIF_FP_CREATE + : DPIF_FP_MODIFY; if (stats) { flags |= DPIF_FP_ZERO_STATS; } if (path == SF_SLOW_PATH) { - compose_slow_path(ofproto, &facet->flow, facet->xout.slow, + compose_slow_path(facet->ofproto, &facet->flow, facet->xout.slow, slow_path_stub, sizeof slow_path_stub, &actions, &actions_len); } - ret = dpif_flow_put(ofproto->backer->dpif, flags, subfacet->key, - subfacet->key_len, NULL, 0, + ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf); + if (enable_megaflows) { + odp_flow_key_from_mask(&mask, &facet->xout.wc.masks, + &facet->flow, UINT32_MAX); + } + + ret = dpif_flow_put(subfacet->backer->dpif, flags, subfacet->key, + subfacet->key_len, mask.data, mask.size, actions, actions_len, stats); if (stats) { subfacet_reset_dp_stats(subfacet, stats); } - if (!ret) { + if (ret) { + COVERAGE_INC(subfacet_install_fail); + } else { subfacet->path = path; } return ret; @@ -5048,8 +5162,7 @@ static void subfacet_uninstall(struct subfacet *subfacet) { if (subfacet->path != SF_NOT_INSTALLED) { - struct rule_dpif *rule = subfacet->facet->rule; - struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + struct ofproto_dpif *ofproto = subfacet->facet->ofproto; struct dpif_flow_stats stats; int error; @@ -5139,6 +5252,7 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, } if (wc) { + memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; } @@ -5219,28 +5333,8 @@ rule_construct(struct rule *rule_) rule->packet_count = 0; rule->byte_count = 0; - victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending)); - if (victim && !list_is_empty(&victim->facets)) { - struct facet *facet; - - rule->facets = victim->facets; - list_moved(&rule->facets); - LIST_FOR_EACH (facet, list_node, &rule->facets) { - /* XXX: We're only clearing our local counters here. It's possible - * that quite a few packets are unaccounted for in the datapath - * statistics. These will be accounted to the new rule instead of - * cleared as required. This could be fixed by clearing out the - * datapath statistics for this facet, but currently it doesn't - * seem worth it. */ - facet_reset_counters(facet); - facet->rule = rule; - } - } else { - /* Must avoid list_moved() in this case. */ - list_init(&rule->facets); - } - table_id = rule->up.table_id; + victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending)); if (victim) { rule->tag = victim->tag; } else if (table_id == 0) { @@ -5258,16 +5352,9 @@ rule_construct(struct rule *rule_) } static void -rule_destruct(struct rule *rule_) +rule_destruct(struct rule *rule) { - struct rule_dpif *rule = rule_dpif_cast(rule_); - struct facet *facet, *next_facet; - - LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) { - facet_revalidate(facet); - } - - complete_operation(rule); + complete_operation(rule_dpif_cast(rule)); } static void @@ -5373,7 +5460,7 @@ send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) if (error) { VLOG_WARN_RL(&rl, "%s: failed to send packet on port %s (%s)", ofproto->up.name, netdev_get_name(ofport->up.netdev), - strerror(error)); + ovs_strerror(error)); } ofproto->stats.tx_packets++; @@ -5433,35 +5520,22 @@ put_userspace_action(const struct ofproto_dpif *ofproto, return odp_put_userspace_action(pid, cookie, cookie_size, odp_actions); } - -static void -update_mirror_stats(struct ofproto_dpif *ofproto, mirror_mask_t mirrors, - uint64_t packets, uint64_t bytes) +tag_type +calculate_flow_tag(struct ofproto_dpif *ofproto, const struct flow *flow, + uint8_t table_id, struct rule_dpif *rule) { - if (!mirrors) { - return; - } - - for (; mirrors; mirrors = zero_rightmost_1bit(mirrors)) { - struct ofmirror *m; - - m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1]; - - if (!m) { - /* In normal circumstances 'm' will not be NULL. However, - * if mirrors are reconfigured, we can temporarily get out - * of sync in facet_revalidate(). We could "correct" the - * mirror list before reaching here, but doing that would - * not properly account the traffic stats we've currently - * accumulated for previous mirror configuration. */ - continue; + if (table_id > 0 && table_id < N_TABLES) { + struct table_dpif *table = &ofproto->tables[table_id]; + if (table->other_table) { + return (rule && rule->tag + ? rule->tag + : rule_calculate_tag(flow, &table->other_table->mask, + table->basis)); } - - m->packet_count += packets; - m->byte_count += bytes; } -} + return 0; +} /* Optimized flow revalidation. * @@ -5486,7 +5560,7 @@ update_mirror_stats(struct ofproto_dpif *ofproto, mirror_mask_t mirrors, /* Calculates the tag to use for 'flow' and mask 'mask' when it is inserted * into an OpenFlow table with the given 'basis'. */ -tag_type +static tag_type rule_calculate_tag(const struct flow *flow, const struct minimask *mask, uint32_t secret) { @@ -5718,16 +5792,27 @@ ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc, unixctl_command_reply_error(conn, "no such bridge"); return; } - mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); } else { HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { - mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); } } unixctl_command_reply(conn, "table successfully flushed"); } +static struct ofport_dpif * +ofbundle_get_a_port(const struct ofbundle *bundle) +{ + return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif, + bundle_node); +} + static void ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) @@ -5743,13 +5828,18 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, } ds_put_cstr(&ds, " port VLAN MAC Age\n"); + ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { struct ofbundle *bundle = e->port.p; - ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", - ofbundle_get_a_port(bundle)->odp_port, - e->vlan, ETH_ADDR_ARGS(e->mac), + char name[OFP_MAX_PORT_NAME_LEN]; + + ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, + name, sizeof name); + ds_put_format(&ds, "%5s %4d "ETH_ADDR_FMT" %3d\n", + name, e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(ofproto->ml, e)); } + ovs_rwlock_unlock(&ofproto->ml->rwlock); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } @@ -5852,7 +5942,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], { const struct dpif_backer *backer; struct ofproto_dpif *ofproto; - struct ofpbuf odp_key; + struct ofpbuf odp_key, odp_mask; struct ofpbuf *packet; struct ds result; struct flow flow; @@ -5862,6 +5952,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], backer = NULL; ds_init(&result); ofpbuf_init(&odp_key, 0); + ofpbuf_init(&odp_mask, 0); /* Handle "-generate" or a hex string as the last argument. */ if (!strcmp(argv[argc - 1], "-generate")) { @@ -5882,7 +5973,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], * bridge is specified. If function odp_flow_key_from_string() * returns 0, the flow is a odp_flow. If function * parse_ofp_exact_flow() returns 0, the flow is a br_flow. */ - if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, NULL)) { + if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, &odp_mask)) { /* If the odp_flow is the second argument, * the datapath name is the first argument. */ if (argc == 3) { @@ -5962,6 +6053,7 @@ exit: ds_destroy(&result); ofpbuf_delete(packet); ofpbuf_uninit(&odp_key); + ofpbuf_uninit(&odp_mask); } void @@ -6326,6 +6418,48 @@ ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn *conn, ds_destroy(&ds); } +/* Disable using the megaflows. + * + * This command is only needed for advanced debugging, so it's not + * documented in the man page. */ +static void +ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, + void *aux OVS_UNUSED) +{ + struct ofproto_dpif *ofproto; + + enable_megaflows = false; + + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + flush(&ofproto->up); + } + + unixctl_command_reply(conn, "megaflows disabled"); +} + +/* Re-enable using megaflows. + * + * This command is only needed for advanced debugging, so it's not + * documented in the man page. */ +static void +ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, + void *aux OVS_UNUSED) +{ + struct ofproto_dpif *ofproto; + + enable_megaflows = true; + + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + flush(&ofproto->up); + } + + unixctl_command_reply(conn, "megaflows enabled"); +} + static void ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], @@ -6345,12 +6479,21 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->backer->subfacets) { struct facet *facet = subfacet->facet; + struct odputil_keybuf maskbuf; + struct ofpbuf mask; - if (ofproto_dpif_cast(facet->rule->up.ofproto) != ofproto) { + if (facet->ofproto != ofproto) { continue; } - odp_flow_key_format(subfacet->key, subfacet->key_len, &ds); + ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf); + if (enable_megaflows) { + odp_flow_key_from_mask(&mask, &facet->xout.wc.masks, + &facet->flow, UINT32_MAX); + } + + odp_flow_format(subfacet->key, subfacet->key_len, + mask.data, mask.size, &ds); ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:", subfacet->dp_packet_count, subfacet->dp_byte_count); @@ -6439,6 +6582,10 @@ ofproto_dpif_unixctl_init(void) ofproto_unixctl_dpif_del_flows, NULL); unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1, ofproto_unixctl_dpif_dump_megaflows, NULL); + unixctl_command_register("dpif/disable-megaflows", "", 0, 0, + ofproto_unixctl_dpif_disable_megaflows, NULL); + unixctl_command_register("dpif/enable-megaflows", "", 0, 0, + ofproto_unixctl_dpif_enable_megaflows, NULL); } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) @@ -6486,6 +6633,12 @@ hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid) return hash_2words(ofp_to_u16(realdev_ofp_port), vid); } +bool +ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto) +{ + return !hmap_is_empty(&ofproto->realdev_vid_map); +} + /* Returns the OFP port number of the Linux VLAN device that corresponds to * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in * 'struct ofport_dpif'. For example, given 'realdev_ofp_port' of eth0 and @@ -6519,7 +6672,7 @@ vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port) struct vlan_splinter *vsp; HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node, - hash_int(ofp_to_u16(vlandev_ofp_port), 0), + hash_ofp_port(vlandev_ofp_port), &ofproto->vlandev_map) { if (vsp->vlandev_ofp_port == vlandev_ofp_port) { return vsp; @@ -6610,7 +6763,7 @@ vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid) vsp = xmalloc(sizeof *vsp); hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node, - hash_int(ofp_to_u16(port->up.ofp_port), 0)); + hash_ofp_port(port->up.ofp_port)); hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node, hash_realdev_vid(realdev_ofp_port, vid)); vsp->realdev_ofp_port = realdev_ofp_port; @@ -6623,7 +6776,7 @@ vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid) } } -odp_port_t +static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port); @@ -6635,8 +6788,7 @@ odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port) { struct ofport_dpif *port; - HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, - hash_int(odp_to_u32(odp_port), 0), + HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port), &backer->odp_to_ofport_map) { if (port->odp_port == odp_port) { return port; @@ -6765,11 +6917,15 @@ const struct ofproto_class ofproto_dpif_class = { set_queues, bundle_set, bundle_remove, - mirror_set, - mirror_get_stats, + mirror_set__, + mirror_get_stats__, set_flood_vlans, is_mirror_output_bundle, forward_bpdu_changed, set_mac_table_config, set_realdev, + NULL, /* meter_get_features */ + NULL, /* meter_set */ + NULL, /* meter_get */ + NULL, /* meter_del */ };