- * In addition, this function maintains per ofproto flow hit counts. The patch
- * port is not treated specially. e.g. A packet ingress from br0 patched into
- * br1 will increase the hit count of br0 by 1, however, does not affect
- * the hit or miss counts of br1.
- */
-static void
-update_stats(struct dpif_backer *backer)
-{
- const struct dpif_flow_stats *stats;
- struct dpif_flow_dump dump;
- const struct nlattr *key, *mask;
- size_t key_len, mask_len;
-
- dpif_flow_dump_start(&dump, backer->dpif);
- while (dpif_flow_dump_next(&dump, &key, &key_len,
- &mask, &mask_len, NULL, NULL, &stats)) {
- struct subfacet *subfacet;
- uint32_t key_hash;
-
- key_hash = odp_flow_key_hash(key, key_len);
- subfacet = subfacet_find(backer, key, key_len, key_hash);
- switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) {
- case SF_FAST_PATH:
- update_subfacet_stats(subfacet, stats);
- break;
-
- case SF_SLOW_PATH:
- /* Stats are updated per-packet. */
- break;
-
- case SF_NOT_INSTALLED:
- default:
- delete_unexpected_flow(backer, key, key_len);
- break;
- }
- run_fast_rl();
- }
- dpif_flow_dump_done(&dump);
-}
-
-/* Calculates and returns the number of milliseconds of idle time after which
- * subfacets should expire from the datapath. When a subfacet expires, we fold
- * its statistics into its facet, and when a facet's last subfacet expires, we
- * fold its statistic into its rule. */
-static int
-subfacet_max_idle(const struct dpif_backer *backer)
-{
- /*
- * Idle time histogram.
- *
- * Most of the time a switch has a relatively small number of subfacets.
- * When this is the case we might as well keep statistics for all of them
- * in userspace and to cache them in the kernel datapath for performance as
- * well.
- *
- * As the number of subfacets increases, the memory required to maintain
- * statistics about them in userspace and in the kernel becomes
- * significant. However, with a large number of subfacets it is likely
- * that only a few of them are "heavy hitters" that consume a large amount
- * of bandwidth. At this point, only heavy hitters are worth caching in
- * the kernel and maintaining in userspaces; other subfacets we can
- * discard.
- *
- * The technique used to compute the idle time is to build a histogram with
- * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet
- * that is installed in the kernel gets dropped in the appropriate bucket.
- * After the histogram has been built, we compute the cutoff so that only
- * the most-recently-used 1% of subfacets (but at least
- * flow_eviction_threshold flows) are kept cached. At least
- * the most-recently-used bucket of subfacets is kept, so actually an
- * arbitrary number of subfacets can be kept in any given expiration run
- * (though the next run will delete most of those unless they receive
- * additional data).
- *
- * This requires a second pass through the subfacets, in addition to the
- * pass made by update_stats(), because the former function never looks at
- * uninstallable subfacets.
- */
- enum { BUCKET_WIDTH = 100 };
- enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
- int buckets[N_BUCKETS] = { 0 };
- int total, subtotal, bucket;
- struct subfacet *subfacet;
- long long int now;
- int i;
-
- total = hmap_count(&backer->subfacets);
- if (total <= flow_eviction_threshold) {
- return N_BUCKETS * BUCKET_WIDTH;
- }
-
- /* Build histogram. */
- now = time_msec();
- HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
- long long int idle = now - subfacet->used;
- int bucket = (idle <= 0 ? 0
- : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
- : (unsigned int) idle / BUCKET_WIDTH);
- buckets[bucket]++;
- }
-
- /* Find the first bucket whose flows should be expired. */
- subtotal = bucket = 0;
- do {
- subtotal += buckets[bucket++];
- } while (bucket < N_BUCKETS &&
- subtotal < MAX(flow_eviction_threshold, total / 100));
-
- if (VLOG_IS_DBG_ENABLED()) {
- struct ds s;
-
- ds_init(&s);
- ds_put_cstr(&s, "keep");
- for (i = 0; i < N_BUCKETS; i++) {
- if (i == bucket) {
- ds_put_cstr(&s, ", drop");
- }
- if (buckets[i]) {
- ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
- }
- }
- VLOG_INFO("%s (msec:count)", ds_cstr(&s));
- ds_destroy(&s);
- }
-
- return bucket * BUCKET_WIDTH;
-}
-
-static void
-expire_subfacets(struct dpif_backer *backer, int dp_max_idle)
-{
- /* Cutoff time for most flows. */
- long long int normal_cutoff = time_msec() - dp_max_idle;
-
- /* We really want to keep flows for special protocols around, so use a more
- * conservative cutoff. */
- long long int special_cutoff = time_msec() - 10000;
-
- struct subfacet *subfacet, *next_subfacet;
- struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
- int n_batch;
-
- n_batch = 0;
- HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
- &backer->subfacets) {
- long long int cutoff;
-
- cutoff = (subfacet->facet->xout.slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP
- | SLOW_STP)
- ? special_cutoff
- : normal_cutoff);
- if (subfacet->used < cutoff) {
- if (subfacet->path != SF_NOT_INSTALLED) {
- batch[n_batch++] = subfacet;
- if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
- subfacet_destroy_batch(backer, batch, n_batch);
- n_batch = 0;
- }
- } else {
- subfacet_destroy(subfacet);
- }
- }
- }
-
- if (n_batch > 0) {
- subfacet_destroy_batch(backer, batch, n_batch);
- }
-}
-
-/* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
- * then delete it entirely. */
-static void
-rule_expire(struct rule_dpif *rule)
- OVS_REQUIRES(ofproto_mutex)
-{
- uint16_t idle_timeout, hard_timeout;
- long long int now = time_msec();
- int reason;
-
- ovs_assert(!rule->up.pending);
-
- /* Has 'rule' expired? */
- ovs_mutex_lock(&rule->up.mutex);
- hard_timeout = rule->up.hard_timeout;
- idle_timeout = rule->up.idle_timeout;
- if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) {
- reason = OFPRR_HARD_TIMEOUT;
- } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) {
- reason = OFPRR_IDLE_TIMEOUT;
- } else {
- reason = -1;
- }
- ovs_mutex_unlock(&rule->up.mutex);
-
- if (reason >= 0) {
- COVERAGE_INC(ofproto_dpif_expired);
- ofproto_rule_expire(&rule->up, reason);
- }
-}
-\f
-/* Facets. */
-
-/* Creates and returns a new facet based on 'miss'.
- *
- * The caller must already have determined that no facet with an identical
- * 'miss->flow' exists in 'miss->ofproto'.
- *
- * 'rule' and 'xout' must have been created based on 'miss'.
- *
- * 'facet'' statistics are initialized based on 'stats'.
- *
- * The facet will initially have no subfacets. The caller should create (at
- * least) one subfacet with subfacet_create(). */
-static struct facet *
-facet_create(const struct flow_miss *miss)
-{
- struct ofproto_dpif *ofproto = miss->ofproto;
- struct facet *facet;
- struct match match;
-
- COVERAGE_INC(facet_create);
- facet = xzalloc(sizeof *facet);
- facet->ofproto = miss->ofproto;
- facet->used = miss->stats.used;
- facet->flow = miss->flow;
- facet->learn_rl = time_msec() + 500;
-
- list_init(&facet->subfacets);
- netflow_flow_init(&facet->nf_flow);
- netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
-
- xlate_out_copy(&facet->xout, &miss->xout);
-
- match_init(&match, &facet->flow, &facet->xout.wc);
- cls_rule_init(&facet->cr, &match, OFP_DEFAULT_PRIORITY);
- ovs_rwlock_wrlock(&ofproto->facets.rwlock);
- classifier_insert(&ofproto->facets, &facet->cr);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
-
- facet->nf_flow.output_iface = facet->xout.nf_output_iface;
- return facet;
-}
-
-static void
-facet_free(struct facet *facet)
-{
- if (facet) {
- xlate_out_uninit(&facet->xout);
- free(facet);
- }
-}
-
-/* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
- * 'flow' must reflect the data in 'packet'. */
-int
-ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
- const struct flow *flow,
- struct rule_dpif *rule,
- const struct ofpact *ofpacts, size_t ofpacts_len,
- struct ofpbuf *packet)
-{
- struct odputil_keybuf keybuf;
- struct dpif_flow_stats stats;
- struct xlate_out xout;
- struct xlate_in xin;
- ofp_port_t in_port;
- struct ofpbuf key;
- int error;
-
- ovs_assert((rule != NULL) != (ofpacts != NULL));
-
- dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
- if (rule) {
- rule_dpif_credit_stats(rule, &stats);
- }
-
- xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet);
- xin.ofpacts = ofpacts;
- xin.ofpacts_len = ofpacts_len;
- xin.resubmit_stats = &stats;
- xlate_actions(&xin, &xout);
-
- ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- in_port = flow->in_port.ofp_port;
- if (in_port == OFPP_NONE) {
- in_port = OFPP_LOCAL;
- }
- odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto, in_port));
-
- error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
- xout.odp_actions.data, xout.odp_actions.size, packet,
- (xout.slow & SLOW_ACTION) != 0);
- xlate_out_uninit(&xout);
-
- return error;
-}
-
-/* Remove 'facet' from its ofproto and free up the associated memory:
- *
- * - If 'facet' was installed in the datapath, uninstalls it and updates its
- * rule's statistics, via subfacet_uninstall().
- *
- * - Removes 'facet' from its rule and from ofproto->facets.
- */
-static void
-facet_remove(struct facet *facet)
-{
- struct subfacet *subfacet, *next_subfacet;
-
- COVERAGE_INC(facet_remove);
- ovs_assert(!list_is_empty(&facet->subfacets));
-
- /* First uninstall all of the subfacets to get final statistics. */
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- subfacet_uninstall(subfacet);
- }
-
- /* Flush the final stats to the rule.
- *
- * This might require us to have at least one subfacet around so that we
- * can use its actions for accounting in facet_account(), which is why we
- * have uninstalled but not yet destroyed the subfacets. */
- facet_flush_stats(facet);
-
- /* Now we're really all done so destroy everything. */
- LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node,
- &facet->subfacets) {
- subfacet_destroy__(subfacet);
- }
- ovs_rwlock_wrlock(&facet->ofproto->facets.rwlock);
- classifier_remove(&facet->ofproto->facets, &facet->cr);
- ovs_rwlock_unlock(&facet->ofproto->facets.rwlock);
- cls_rule_destroy(&facet->cr);
- facet_free(facet);
-}
-
-/* Feed information from 'facet' back into the learning table to keep it in
- * sync with what is actually flowing through the datapath. */
-static void
-facet_learn(struct facet *facet)
-{
- long long int now = time_msec();
-
- if (!facet->xout.has_fin_timeout && now < facet->learn_rl) {
- return;
- }
-
- facet->learn_rl = now + 500;
-
- if (!facet->xout.has_learn
- && !facet->xout.has_normal
- && (!facet->xout.has_fin_timeout
- || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) {
- return;
- }
-
- facet_push_stats(facet, true);
-}
-
-static void
-facet_account(struct facet *facet)
-{
- const struct nlattr *a;
- unsigned int left;
- ovs_be16 vlan_tci;
- uint64_t n_bytes;
-
- if (!facet->xout.has_normal || !facet->ofproto->has_bonded_bundles) {
- return;
- }
- n_bytes = facet->byte_count - facet->accounted_bytes;
-
- /* This loop feeds byte counters to bond_account() for rebalancing to use
- * as a basis. We also need to track the actual VLAN on which the packet
- * is going to be sent to ensure that it matches the one passed to
- * bond_choose_output_slave(). (Otherwise, we will account to the wrong
- * hash bucket.)
- *
- * We use the actions from an arbitrary subfacet because they should all
- * be equally valid for our purpose. */
- vlan_tci = facet->flow.vlan_tci;
- NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size) {
- const struct ovs_action_push_vlan *vlan;
- struct ofport_dpif *port;
-
- switch (nl_attr_type(a)) {
- case OVS_ACTION_ATTR_OUTPUT:
- port = get_odp_port(facet->ofproto, nl_attr_get_odp_port(a));
- if (port && port->bundle && port->bundle->bond) {
- bond_account(port->bundle->bond, &facet->flow,
- vlan_tci_to_vid(vlan_tci), n_bytes);
- }
- break;
-
- case OVS_ACTION_ATTR_POP_VLAN:
- vlan_tci = htons(0);
- break;
-
- case OVS_ACTION_ATTR_PUSH_VLAN:
- vlan = nl_attr_get(a);
- vlan_tci = vlan->vlan_tci;
- break;
- }
- }
-}
-
-/* Returns true if the only action for 'facet' is to send to the controller.
- * (We don't report NetFlow expiration messages for such facets because they
- * are just part of the control logic for the network, not real traffic). */
-static bool
-facet_is_controller_flow(struct facet *facet)
-{
- if (facet) {
- struct ofproto_dpif *ofproto = facet->ofproto;
- const struct ofpact *ofpacts;
- struct rule_actions *actions;
- struct rule_dpif *rule;
- size_t ofpacts_len;
- bool is_controller;
-
- rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
- actions = rule_dpif_get_actions(rule);
- rule_dpif_unref(rule);
-
- ofpacts_len = actions->ofpacts_len;
- ofpacts = actions->ofpacts;
- is_controller = ofpacts_len > 0
- && ofpacts->type == OFPACT_CONTROLLER
- && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
- rule_actions_unref(actions);
-
- return is_controller;
- }
- return false;
-}
-
-/* Folds all of 'facet''s statistics into its rule. Also updates the
- * accounting ofhook and emits a NetFlow expiration if appropriate. All of
- * 'facet''s statistics in the datapath should have been zeroed and folded into
- * its packet and byte counts before this function is called. */
-static void
-facet_flush_stats(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = facet->ofproto;
- struct subfacet *subfacet;
-
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- ovs_assert(!subfacet->dp_byte_count);
- ovs_assert(!subfacet->dp_packet_count);
- }
-
- facet_push_stats(facet, false);
- if (facet->accounted_bytes < facet->byte_count) {
- facet_account(facet);
- facet->accounted_bytes = facet->byte_count;
- }
-
- if (ofproto->netflow && !facet_is_controller_flow(facet)) {
- struct ofexpired expired;
- expired.flow = facet->flow;
- expired.packet_count = facet->packet_count;
- expired.byte_count = facet->byte_count;
- expired.used = facet->used;
- netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
- }
-
- /* Reset counters to prevent double counting if 'facet' ever gets
- * reinstalled. */
- facet_reset_counters(facet);
-
- netflow_flow_clear(&facet->nf_flow);
- facet->tcp_flags = 0;
-}
-
-/* Searches 'ofproto''s table of facets for one which would be responsible for
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet might need revalidation; use facet_lookup_valid()
- * instead if that is important. */
-static struct facet *
-facet_find(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct cls_rule *cr;
-
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cr = classifier_lookup(&ofproto->facets, flow, NULL);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- return cr ? CONTAINER_OF(cr, struct facet, cr) : NULL;
-}
-
-/* Searches 'ofproto''s table of facets for one capable that covers
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet is guaranteed to be valid. */
-static struct facet *
-facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct facet *facet;
-
- facet = facet_find(ofproto, flow);
- if (facet
- && ofproto->backer->need_revalidate
- && !facet_revalidate(facet)) {
- return NULL;
- }
-
- return facet;
-}
-
-static bool
-facet_check_consistency(struct facet *facet)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
-
- struct xlate_out xout;
- struct xlate_in xin;
- bool ok;
-
- /* Check the datapath actions for consistency. */
- xlate_in_init(&xin, facet->ofproto, &facet->flow, NULL, 0, NULL);
- xlate_actions(&xin, &xout);
-
- ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
- && facet->xout.slow == xout.slow;
- if (!ok && !VLOG_DROP_WARN(&rl)) {
- struct ds s = DS_EMPTY_INITIALIZER;
-
- flow_format(&s, &facet->flow);
- ds_put_cstr(&s, ": inconsistency in facet");
-
- if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
- ds_put_cstr(&s, " (actions were: ");
- format_odp_actions(&s, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size);
- ds_put_cstr(&s, ") (correct actions: ");
- format_odp_actions(&s, xout.odp_actions.data,
- xout.odp_actions.size);
- ds_put_char(&s, ')');
- }
-
- if (facet->xout.slow != xout.slow) {
- ds_put_format(&s, " slow path incorrect. should be %d", xout.slow);
- }
-
- ds_destroy(&s);
- }
- xlate_out_uninit(&xout);
-
- return ok;
-}
-
-/* Re-searches the classifier for 'facet':