- * Returns 0 if successful, ENODEV if the parsed flow has no associated ofport,
- * or some other positive errno if there are other problems. */
-static int
-ofproto_receive(const struct dpif_backer *backer, struct ofpbuf *packet,
- const struct nlattr *key, size_t key_len,
- struct flow *flow, enum odp_key_fitness *fitnessp,
- struct ofproto_dpif **ofproto, odp_port_t *odp_in_port)
-{
- const struct ofport_dpif *port;
- enum odp_key_fitness fitness;
- int error = ENODEV;
-
- fitness = odp_flow_key_to_flow(key, key_len, flow);
- if (fitness == ODP_FIT_ERROR) {
- error = EINVAL;
- goto exit;
- }
-
- if (odp_in_port) {
- *odp_in_port = flow->in_port.odp_port;
- }
-
- port = (tnl_port_should_receive(flow)
- ? ofport_dpif_cast(tnl_port_receive(flow))
- : odp_port_to_ofport(backer, flow->in_port.odp_port));
- flow->in_port.ofp_port = port ? port->up.ofp_port : OFPP_NONE;
- if (!port) {
- goto exit;
- }
-
- /* XXX: Since the tunnel module is not scoped per backer, for a tunnel port
- * it's theoretically possible that we'll receive an ofport belonging to an
- * entirely different datapath. In practice, this can't happen because no
- * platforms has two separate datapaths which each support tunneling. */
- ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
-
- if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
- if (packet) {
- /* Make the packet resemble the flow, so that it gets sent to
- * an OpenFlow controller properly, so that it looks correct
- * for sFlow, and so that flow_extract() will get the correct
- * vlan_tci if it is called on 'packet'.
- *
- * The allocated space inside 'packet' probably also contains
- * 'key', that is, both 'packet' and 'key' are probably part of
- * a struct dpif_upcall (see the large comment on that
- * structure definition), so pushing data on 'packet' is in
- * general not a good idea since it could overwrite 'key' or
- * free it as a side effect. However, it's OK in this special
- * case because we know that 'packet' is inside a Netlink
- * attribute: pushing 4 bytes will just overwrite the 4-byte
- * "struct nlattr", which is fine since we don't need that
- * header anymore. */
- eth_push_vlan(packet, flow->vlan_tci);
- }
- /* We can't reproduce 'key' from 'flow'. */
- fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
- }
- error = 0;
-
- if (ofproto) {
- *ofproto = ofproto_dpif_cast(port->up.ofproto);
- }
-
-exit:
- if (fitnessp) {
- *fitnessp = fitness;
- }
- return error;
-}
-
-static void
-handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
- size_t n_upcalls)
-{
- struct dpif_upcall *upcall;
- struct flow_miss *miss;
- struct flow_miss misses[FLOW_MISS_MAX_BATCH];
- struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH * 2];
- struct dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH * 2];
- struct hmap todo;
- int n_misses;
- size_t n_ops;
- size_t i;
-
- if (!n_upcalls) {
- return;
- }
-
- /* Construct the to-do list.
- *
- * This just amounts to extracting the flow from each packet and sticking
- * the packets that have the same flow in the same "flow_miss" structure so
- * that we can process them together. */
- hmap_init(&todo);
- n_misses = 0;
- for (upcall = upcalls; upcall < &upcalls[n_upcalls]; upcall++) {
- struct flow_miss *miss = &misses[n_misses];
- struct flow_miss *existing_miss;
- struct ofproto_dpif *ofproto;
- odp_port_t odp_in_port;
- struct flow flow;
- uint32_t hash;
- int error;
-
- error = ofproto_receive(backer, upcall->packet, upcall->key,
- upcall->key_len, &flow, &miss->key_fitness,
- &ofproto, &odp_in_port);
- if (error == ENODEV) {
- struct drop_key *drop_key;
-
- /* Received packet on datapath port for which we couldn't
- * associate an ofproto. This can happen if a port is removed
- * while traffic is being received. Print a rate-limited message
- * in case it happens frequently. Install a drop flow so
- * that future packets of the flow are inexpensively dropped
- * in the kernel. */
- VLOG_INFO_RL(&rl, "received packet on unassociated datapath port "
- "%"PRIu32, odp_in_port);
-
- drop_key = drop_key_lookup(backer, upcall->key, upcall->key_len);
- if (!drop_key) {
- drop_key = xmalloc(sizeof *drop_key);
- drop_key->key = xmemdup(upcall->key, upcall->key_len);
- drop_key->key_len = upcall->key_len;
-
- hmap_insert(&backer->drop_keys, &drop_key->hmap_node,
- hash_bytes(drop_key->key, drop_key->key_len, 0));
- dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
- drop_key->key, drop_key->key_len,
- NULL, 0, NULL, 0, NULL);
- }
- continue;
- }
- if (error) {
- continue;
- }
-
- ofproto->n_missed++;
- flow_extract(upcall->packet, flow.skb_priority, flow.skb_mark,
- &flow.tunnel, &flow.in_port, &miss->flow);
-
- /* Add other packets to a to-do list. */
- hash = flow_hash(&miss->flow, 0);
- existing_miss = flow_miss_find(&todo, ofproto, &miss->flow, hash);
- if (!existing_miss) {
- hmap_insert(&todo, &miss->hmap_node, hash);
- miss->ofproto = ofproto;
- miss->key = upcall->key;
- miss->key_len = upcall->key_len;
- miss->upcall_type = upcall->type;
- list_init(&miss->packets);
-
- n_misses++;
- } else {
- miss = existing_miss;
- }
- list_push_back(&miss->packets, &upcall->packet->list_node);
- }
-
- /* Process each element in the to-do list, constructing the set of
- * operations to batch. */
- n_ops = 0;
- HMAP_FOR_EACH (miss, hmap_node, &todo) {
- handle_flow_miss(miss, flow_miss_ops, &n_ops);
- }
- ovs_assert(n_ops <= ARRAY_SIZE(flow_miss_ops));
-
- /* Execute batch. */
- for (i = 0; i < n_ops; i++) {
- dpif_ops[i] = &flow_miss_ops[i].dpif_op;
- }
- dpif_operate(backer->dpif, dpif_ops, n_ops);
-
- for (i = 0; i < n_ops; i++) {
- if (dpif_ops[i]->error != 0
- && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT
- && flow_miss_ops[i].subfacet) {
- struct subfacet *subfacet = flow_miss_ops[i].subfacet;
-
- COVERAGE_INC(subfacet_install_fail);
-
- subfacet->path = SF_NOT_INSTALLED;
- }
-
- /* Free memory. */
- if (flow_miss_ops[i].xout_garbage) {
- xlate_out_uninit(&flow_miss_ops[i].xout);
- }
- }
- hmap_destroy(&todo);
-}
-
-static enum { SFLOW_UPCALL, MISS_UPCALL, BAD_UPCALL, FLOW_SAMPLE_UPCALL,
- IPFIX_UPCALL }
-classify_upcall(const struct dpif_upcall *upcall)
-{
- size_t userdata_len;
- union user_action_cookie cookie;
-
- /* First look at the upcall type. */
- switch (upcall->type) {
- case DPIF_UC_ACTION:
- break;
-
- case DPIF_UC_MISS:
- return MISS_UPCALL;
-
- case DPIF_N_UC_TYPES:
- default:
- VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, upcall->type);
- return BAD_UPCALL;
- }
-
- /* "action" upcalls need a closer look. */
- if (!upcall->userdata) {
- VLOG_WARN_RL(&rl, "action upcall missing cookie");
- return BAD_UPCALL;
- }
- userdata_len = nl_attr_get_size(upcall->userdata);
- if (userdata_len < sizeof cookie.type
- || userdata_len > sizeof cookie) {
- VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %zu",
- userdata_len);
- return BAD_UPCALL;
- }
- memset(&cookie, 0, sizeof cookie);
- memcpy(&cookie, nl_attr_get(upcall->userdata), userdata_len);
- if (userdata_len == sizeof cookie.sflow
- && cookie.type == USER_ACTION_COOKIE_SFLOW) {
- return SFLOW_UPCALL;
- } else if (userdata_len == sizeof cookie.slow_path
- && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
- return MISS_UPCALL;
- } else if (userdata_len == sizeof cookie.flow_sample
- && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
- return FLOW_SAMPLE_UPCALL;
- } else if (userdata_len == sizeof cookie.ipfix
- && cookie.type == USER_ACTION_COOKIE_IPFIX) {
- return IPFIX_UPCALL;
- } else {
- VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
- " and size %zu", cookie.type, userdata_len);
- return BAD_UPCALL;
- }
-}
-
-static void
-handle_sflow_upcall(struct dpif_backer *backer,
- const struct dpif_upcall *upcall)
-{
- struct ofproto_dpif *ofproto;
- union user_action_cookie cookie;
- struct flow flow;
- odp_port_t odp_in_port;
-
- if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
- &flow, NULL, &ofproto, &odp_in_port)
- || !ofproto->sflow) {
- return;
- }
-
- memset(&cookie, 0, sizeof cookie);
- memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow);
- dpif_sflow_received(ofproto->sflow, upcall->packet, &flow,
- odp_in_port, &cookie);
-}
-
-static void
-handle_flow_sample_upcall(struct dpif_backer *backer,
- const struct dpif_upcall *upcall)
-{
- struct ofproto_dpif *ofproto;
- union user_action_cookie cookie;
- struct flow flow;
-
- if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
- &flow, NULL, &ofproto, NULL)
- || !ofproto->ipfix) {
- return;
- }
-
- memset(&cookie, 0, sizeof cookie);
- memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample);
-
- /* The flow reflects exactly the contents of the packet. Sample
- * the packet using it. */
- dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow,
- cookie.flow_sample.collector_set_id,
- cookie.flow_sample.probability,
- cookie.flow_sample.obs_domain_id,
- cookie.flow_sample.obs_point_id);
-}
-
-static void
-handle_ipfix_upcall(struct dpif_backer *backer,
- const struct dpif_upcall *upcall)
-{
- struct ofproto_dpif *ofproto;
- struct flow flow;
-
- if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
- &flow, NULL, &ofproto, NULL)
- || !ofproto->ipfix) {
- return;
- }
-
- /* The flow reflects exactly the contents of the packet. Sample
- * the packet using it. */
- dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow);
-}
-
-static int
-handle_upcalls(struct dpif_backer *backer, unsigned int max_batch)
-{
- struct dpif_upcall misses[FLOW_MISS_MAX_BATCH];
- struct ofpbuf miss_bufs[FLOW_MISS_MAX_BATCH];
- uint64_t miss_buf_stubs[FLOW_MISS_MAX_BATCH][4096 / 8];
- int n_processed;
- int n_misses;
- int i;
-
- ovs_assert(max_batch <= FLOW_MISS_MAX_BATCH);
-
- n_misses = 0;
- for (n_processed = 0; n_processed < max_batch; n_processed++) {
- struct dpif_upcall *upcall = &misses[n_misses];
- struct ofpbuf *buf = &miss_bufs[n_misses];
- int error;
-
- ofpbuf_use_stub(buf, miss_buf_stubs[n_misses],
- sizeof miss_buf_stubs[n_misses]);
- error = dpif_recv(backer->dpif, upcall, buf);
- if (error) {
- ofpbuf_uninit(buf);
- break;
- }
-
- switch (classify_upcall(upcall)) {
- case MISS_UPCALL:
- /* Handle it later. */
- n_misses++;
- break;
-
- case SFLOW_UPCALL:
- handle_sflow_upcall(backer, upcall);
- ofpbuf_uninit(buf);
- break;
-
- case FLOW_SAMPLE_UPCALL:
- handle_flow_sample_upcall(backer, upcall);
- ofpbuf_uninit(buf);
- break;
-
- case IPFIX_UPCALL:
- handle_ipfix_upcall(backer, upcall);
- ofpbuf_uninit(buf);
- break;
-
- case BAD_UPCALL:
- ofpbuf_uninit(buf);
- break;
- }
- }
-
- /* Handle deferred MISS_UPCALL processing. */
- handle_miss_upcalls(backer, misses, n_misses);
- for (i = 0; i < n_misses; i++) {
- ofpbuf_uninit(&miss_bufs[i]);
- }
-
- return n_processed;
-}
-\f
-/* Flow expiration. */
-
-static int subfacet_max_idle(const struct dpif_backer *);
-static void update_stats(struct dpif_backer *);
-static void rule_expire(struct rule_dpif *);
-static void expire_subfacets(struct dpif_backer *, int dp_max_idle);
-
-/* This function is called periodically by run(). Its job is to collect
- * updates for the flows that have been installed into the datapath, most
- * importantly when they last were used, and then use that information to
- * expire flows that have not been used recently.
- *
- * Returns the number of milliseconds after which it should be called again. */
-static int
-expire(struct dpif_backer *backer)
-{
- struct ofproto_dpif *ofproto;
- size_t n_subfacets;
- int max_idle;
-
- /* Periodically clear out the drop keys in an effort to keep them
- * relatively few. */
- drop_key_clear(backer);
-
- /* Update stats for each flow in the backer. */
- update_stats(backer);
-
- n_subfacets = hmap_count(&backer->subfacets);
- if (n_subfacets) {
- struct subfacet *subfacet;
- long long int total, now;
-
- total = 0;
- now = time_msec();
- HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
- total += now - subfacet->created;
- }
- backer->avg_subfacet_life += total / n_subfacets;
- }
- backer->avg_subfacet_life /= 2;
-
- backer->avg_n_subfacet += n_subfacets;
- backer->avg_n_subfacet /= 2;
-
- backer->max_n_subfacet = MAX(backer->max_n_subfacet, n_subfacets);
-
- max_idle = subfacet_max_idle(backer);
- expire_subfacets(backer, max_idle);
-
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- struct rule *rule, *next_rule;
-
- if (ofproto->backer != backer) {
- continue;
- }
-
- /* Expire OpenFlow flows whose idle_timeout or hard_timeout
- * has passed. */
- LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
- &ofproto->up.expirable) {
- rule_expire(rule_dpif_cast(rule));
- }
-
- /* All outstanding data in existing flows has been accounted, so it's a
- * good time to do bond rebalancing. */
- if (ofproto->has_bonded_bundles) {
- struct ofbundle *bundle;
-
- HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
- if (bundle->bond) {
- bond_rebalance(bundle->bond, &backer->revalidate_set);
- }
- }
- }
- }
-
- return MIN(max_idle, 1000);
-}
-
-/* Updates flow table statistics given that the datapath just reported 'stats'
- * as 'subfacet''s statistics. */
-static void
-update_subfacet_stats(struct subfacet *subfacet,
- const struct dpif_flow_stats *stats)
-{
- struct facet *facet = subfacet->facet;
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
- struct dpif_flow_stats diff;
-
- diff.tcp_flags = stats->tcp_flags;
- diff.used = stats->used;
-
- if (stats->n_packets >= subfacet->dp_packet_count) {
- diff.n_packets = stats->n_packets - subfacet->dp_packet_count;
- } else {
- VLOG_WARN_RL(&rl, "unexpected packet count from the datapath");
- diff.n_packets = 0;
- }
-
- if (stats->n_bytes >= subfacet->dp_byte_count) {
- diff.n_bytes = stats->n_bytes - subfacet->dp_byte_count;
- } else {
- VLOG_WARN_RL(&rl, "unexpected byte count from datapath");
- diff.n_bytes = 0;
- }
-
- ofproto->n_hit += diff.n_packets;
- subfacet->dp_packet_count = stats->n_packets;
- subfacet->dp_byte_count = stats->n_bytes;
- subfacet_update_stats(subfacet, &diff);
-
- if (facet->accounted_bytes < facet->byte_count) {
- facet_learn(facet);
- facet_account(facet);
- facet->accounted_bytes = facet->byte_count;
- }
-}
-
-/* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
- * about, or a flow that shouldn't be installed but was anyway. Delete it. */
-static void
-delete_unexpected_flow(struct dpif_backer *backer,
- const struct nlattr *key, size_t key_len)
-{
- if (!VLOG_DROP_WARN(&rl)) {
- struct ds s;
-
- ds_init(&s);
- odp_flow_key_format(key, key_len, &s);
- VLOG_WARN("unexpected flow: %s", ds_cstr(&s));
- ds_destroy(&s);
- }
-
- COVERAGE_INC(facet_unexpected);
- dpif_flow_del(backer->dpif, key, key_len, NULL);
-}
-
-/* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
- *
- * This function also pushes statistics updates to rules which each facet
- * resubmits into. Generally these statistics will be accurate. However, if a
- * facet changes the rule it resubmits into at some time in between
- * update_stats() runs, it is possible that statistics accrued to the
- * old rule will be incorrectly attributed to the new rule. This could be
- * avoided by calling update_stats() whenever rules are created or
- * deleted. However, the performance impact of making so many calls to the
- * datapath do not justify the benefit of having perfectly accurate statistics.
- *
- * In addition, this function maintains per ofproto flow hit counts. The patch
- * port is not treated specially. e.g. A packet ingress from br0 patched into
- * br1 will increase the hit count of br0 by 1, however, does not affect
- * the hit or miss counts of br1.
- */
-static void
-update_stats(struct dpif_backer *backer)
-{
- const struct dpif_flow_stats *stats;
- struct dpif_flow_dump dump;
- const struct nlattr *key, *mask;
- size_t key_len, mask_len;
-
- dpif_flow_dump_start(&dump, backer->dpif);
- while (dpif_flow_dump_next(&dump, &key, &key_len,
- &mask, &mask_len, NULL, NULL, &stats)) {
- struct subfacet *subfacet;
- uint32_t key_hash;
-
- key_hash = odp_flow_key_hash(key, key_len);
- subfacet = subfacet_find(backer, key, key_len, key_hash);
- switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) {
- case SF_FAST_PATH:
- update_subfacet_stats(subfacet, stats);
- break;
-
- case SF_SLOW_PATH:
- /* Stats are updated per-packet. */
- break;
-
- case SF_NOT_INSTALLED:
- default:
- delete_unexpected_flow(backer, key, key_len);
- break;
- }
- run_fast_rl();
- }
- dpif_flow_dump_done(&dump);
-
- update_moving_averages(backer);
-}
-
-/* Calculates and returns the number of milliseconds of idle time after which
- * subfacets should expire from the datapath. When a subfacet expires, we fold
- * its statistics into its facet, and when a facet's last subfacet expires, we
- * fold its statistic into its rule. */
-static int
-subfacet_max_idle(const struct dpif_backer *backer)
-{
- /*
- * Idle time histogram.
- *
- * Most of the time a switch has a relatively small number of subfacets.
- * When this is the case we might as well keep statistics for all of them
- * in userspace and to cache them in the kernel datapath for performance as
- * well.
- *
- * As the number of subfacets increases, the memory required to maintain
- * statistics about them in userspace and in the kernel becomes
- * significant. However, with a large number of subfacets it is likely
- * that only a few of them are "heavy hitters" that consume a large amount
- * of bandwidth. At this point, only heavy hitters are worth caching in
- * the kernel and maintaining in userspaces; other subfacets we can
- * discard.
- *
- * The technique used to compute the idle time is to build a histogram with
- * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet
- * that is installed in the kernel gets dropped in the appropriate bucket.
- * After the histogram has been built, we compute the cutoff so that only
- * the most-recently-used 1% of subfacets (but at least
- * flow_eviction_threshold flows) are kept cached. At least
- * the most-recently-used bucket of subfacets is kept, so actually an
- * arbitrary number of subfacets can be kept in any given expiration run
- * (though the next run will delete most of those unless they receive
- * additional data).
- *
- * This requires a second pass through the subfacets, in addition to the
- * pass made by update_stats(), because the former function never looks at
- * uninstallable subfacets.
- */
- enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
- enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
- int buckets[N_BUCKETS] = { 0 };
- int total, subtotal, bucket;
- struct subfacet *subfacet;
- long long int now;
- int i;
-
- total = hmap_count(&backer->subfacets);
- if (total <= flow_eviction_threshold) {
- return N_BUCKETS * BUCKET_WIDTH;
- }
-
- /* Build histogram. */
- now = time_msec();
- HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
- long long int idle = now - subfacet->used;
- int bucket = (idle <= 0 ? 0
- : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
- : (unsigned int) idle / BUCKET_WIDTH);
- buckets[bucket]++;
- }
-
- /* Find the first bucket whose flows should be expired. */
- subtotal = bucket = 0;
- do {
- subtotal += buckets[bucket++];
- } while (bucket < N_BUCKETS &&
- subtotal < MAX(flow_eviction_threshold, total / 100));
-
- if (VLOG_IS_DBG_ENABLED()) {
- struct ds s;
-
- ds_init(&s);
- ds_put_cstr(&s, "keep");
- for (i = 0; i < N_BUCKETS; i++) {
- if (i == bucket) {
- ds_put_cstr(&s, ", drop");
- }
- if (buckets[i]) {
- ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
- }
- }
- VLOG_INFO("%s (msec:count)", ds_cstr(&s));
- ds_destroy(&s);
- }
-
- return bucket * BUCKET_WIDTH;
-}
-
-static void
-expire_subfacets(struct dpif_backer *backer, int dp_max_idle)
-{
- /* Cutoff time for most flows. */
- long long int normal_cutoff = time_msec() - dp_max_idle;
-
- /* We really want to keep flows for special protocols around, so use a more
- * conservative cutoff. */
- long long int special_cutoff = time_msec() - 10000;
-
- struct subfacet *subfacet, *next_subfacet;
- struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
- int n_batch;
-
- n_batch = 0;
- HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
- &backer->subfacets) {
- long long int cutoff;
-
- cutoff = (subfacet->facet->xout.slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP
- | SLOW_STP)
- ? special_cutoff
- : normal_cutoff);
- if (subfacet->used < cutoff) {
- if (subfacet->path != SF_NOT_INSTALLED) {
- batch[n_batch++] = subfacet;
- if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
- subfacet_destroy_batch(backer, batch, n_batch);
- n_batch = 0;
- }
- } else {
- subfacet_destroy(subfacet);
- }
- }
- }
-
- if (n_batch > 0) {
- subfacet_destroy_batch(backer, batch, n_batch);
- }
-}
-
-/* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
- * then delete it entirely. */
-static void
-rule_expire(struct rule_dpif *rule)
-{
- struct facet *facet, *next_facet;
- long long int now;
- uint8_t reason;
-
- if (rule->up.pending) {
- /* We'll have to expire it later. */
- return;
- }
-
- /* Has 'rule' expired? */
- now = time_msec();
- if (rule->up.hard_timeout
- && now > rule->up.modified + rule->up.hard_timeout * 1000) {
- reason = OFPRR_HARD_TIMEOUT;
- } else if (rule->up.idle_timeout
- && now > rule->up.used + rule->up.idle_timeout * 1000) {
- reason = OFPRR_IDLE_TIMEOUT;
- } else {
- return;
- }
-
- COVERAGE_INC(ofproto_dpif_expired);
-
- /* Update stats. (This is a no-op if the rule expired due to an idle
- * timeout, because that only happens when the rule has no facets left.) */
- LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) {
- facet_remove(facet);
- }
-
- /* Get rid of the rule. */
- ofproto_rule_expire(&rule->up, reason);
-}
-\f
-/* Facets. */
-
-/* Creates and returns a new facet based on 'miss'.
- *
- * The caller must already have determined that no facet with an identical
- * 'miss->flow' exists in 'miss->ofproto'.
- *
- * 'rule' and 'xout' must have been created based on 'miss'.
- *
- * 'facet'' statistics are initialized based on 'stats'.
- *
- * The facet will initially have no subfacets. The caller should create (at
- * least) one subfacet with subfacet_create(). */
-static struct facet *
-facet_create(const struct flow_miss *miss, struct rule_dpif *rule,
- struct xlate_out *xout, struct dpif_flow_stats *stats)
-{
- struct ofproto_dpif *ofproto = miss->ofproto;
- struct facet *facet;
- struct match match;
-
- facet = xzalloc(sizeof *facet);
- facet->packet_count = facet->prev_packet_count = stats->n_packets;
- facet->byte_count = facet->prev_byte_count = stats->n_bytes;
- facet->tcp_flags = stats->tcp_flags;
- facet->used = stats->used;
- facet->flow = miss->flow;
- facet->learn_rl = time_msec() + 500;
- facet->rule = rule;
-
- list_push_back(&facet->rule->facets, &facet->list_node);
- list_init(&facet->subfacets);
- netflow_flow_init(&facet->nf_flow);
- netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
-
- xlate_out_copy(&facet->xout, xout);
-
- match_init(&match, &facet->flow, &facet->xout.wc);
- cls_rule_init(&facet->cr, &match, OFP_DEFAULT_PRIORITY);
- classifier_insert(&ofproto->facets, &facet->cr);
-
- facet->nf_flow.output_iface = facet->xout.nf_output_iface;
-
- return facet;
-}
-
-static void
-facet_free(struct facet *facet)
-{
- if (facet) {
- xlate_out_uninit(&facet->xout);
- free(facet);
- }
-}
-
-/* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on
- * 'packet', which arrived on 'in_port'. */
-static bool
-execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
- const struct nlattr *odp_actions, size_t actions_len,
- struct ofpbuf *packet)
-{
- struct odputil_keybuf keybuf;
- struct ofpbuf key;
- int error;
-
- ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- odp_flow_key_from_flow(&key, flow,
- ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port));
-
- error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
- odp_actions, actions_len, packet);
- return !error;
-}
-
-/* Remove 'facet' from its ofproto and free up the associated memory:
- *
- * - If 'facet' was installed in the datapath, uninstalls it and updates its
- * rule's statistics, via subfacet_uninstall().
- *
- * - Removes 'facet' from its rule and from ofproto->facets.
- */
-static void
-facet_remove(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
- struct subfacet *subfacet, *next_subfacet;
-
- ovs_assert(!list_is_empty(&facet->subfacets));
-
- /* First uninstall all of the subfacets to get final statistics. */
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- subfacet_uninstall(subfacet);
- }
-
- /* Flush the final stats to the rule.
- *
- * This might require us to have at least one subfacet around so that we
- * can use its actions for accounting in facet_account(), which is why we
- * have uninstalled but not yet destroyed the subfacets. */
- facet_flush_stats(facet);
-
- /* Now we're really all done so destroy everything. */
- LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node,
- &facet->subfacets) {
- subfacet_destroy__(subfacet);
- }
- classifier_remove(&ofproto->facets, &facet->cr);
- cls_rule_destroy(&facet->cr);
- list_remove(&facet->list_node);
- facet_free(facet);
-}
-
-/* Feed information from 'facet' back into the learning table to keep it in
- * sync with what is actually flowing through the datapath. */
-static void
-facet_learn(struct facet *facet)
-{
- long long int now = time_msec();
-
- if (!facet->xout.has_fin_timeout && now < facet->learn_rl) {
- return;
- }
-
- facet->learn_rl = now + 500;
-
- if (!facet->xout.has_learn
- && !facet->xout.has_normal
- && (!facet->xout.has_fin_timeout
- || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) {
- return;
- }
-
- facet_push_stats(facet, true);
-}
-
-static void
-facet_account(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
- const struct nlattr *a;
- unsigned int left;
- ovs_be16 vlan_tci;
- uint64_t n_bytes;
-
- if (!facet->xout.has_normal || !ofproto->has_bonded_bundles) {
- return;
- }
- n_bytes = facet->byte_count - facet->accounted_bytes;
-
- /* This loop feeds byte counters to bond_account() for rebalancing to use
- * as a basis. We also need to track the actual VLAN on which the packet
- * is going to be sent to ensure that it matches the one passed to
- * bond_choose_output_slave(). (Otherwise, we will account to the wrong
- * hash bucket.)
- *
- * We use the actions from an arbitrary subfacet because they should all
- * be equally valid for our purpose. */
- vlan_tci = facet->flow.vlan_tci;
- NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size) {
- const struct ovs_action_push_vlan *vlan;
- struct ofport_dpif *port;
-
- switch (nl_attr_type(a)) {
- case OVS_ACTION_ATTR_OUTPUT:
- port = get_odp_port(ofproto, nl_attr_get_odp_port(a));
- if (port && port->bundle && port->bundle->bond) {
- bond_account(port->bundle->bond, &facet->flow,
- vlan_tci_to_vid(vlan_tci), n_bytes);
- }
- break;
-
- case OVS_ACTION_ATTR_POP_VLAN:
- vlan_tci = htons(0);
- break;
-
- case OVS_ACTION_ATTR_PUSH_VLAN:
- vlan = nl_attr_get(a);
- vlan_tci = vlan->vlan_tci;
- break;
- }
- }
-}
-
-/* Returns true if the only action for 'facet' is to send to the controller.
- * (We don't report NetFlow expiration messages for such facets because they
- * are just part of the control logic for the network, not real traffic). */
-static bool
-facet_is_controller_flow(struct facet *facet)
-{
- if (facet) {
- const struct rule *rule = &facet->rule->up;
- const struct ofpact *ofpacts = rule->ofpacts;
- size_t ofpacts_len = rule->ofpacts_len;
-
- if (ofpacts_len > 0 &&
- ofpacts->type == OFPACT_CONTROLLER &&
- ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len)) {
- return true;
- }
- }
- return false;
-}
-
-/* Folds all of 'facet''s statistics into its rule. Also updates the
- * accounting ofhook and emits a NetFlow expiration if appropriate. All of
- * 'facet''s statistics in the datapath should have been zeroed and folded into
- * its packet and byte counts before this function is called. */
-static void
-facet_flush_stats(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
- struct subfacet *subfacet;
-
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- ovs_assert(!subfacet->dp_byte_count);
- ovs_assert(!subfacet->dp_packet_count);
- }
-
- facet_push_stats(facet, false);
- if (facet->accounted_bytes < facet->byte_count) {
- facet_account(facet);
- facet->accounted_bytes = facet->byte_count;
- }
-
- if (ofproto->netflow && !facet_is_controller_flow(facet)) {
- struct ofexpired expired;
- expired.flow = facet->flow;
- expired.packet_count = facet->packet_count;
- expired.byte_count = facet->byte_count;
- expired.used = facet->used;
- netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
- }
-
- /* Reset counters to prevent double counting if 'facet' ever gets
- * reinstalled. */
- facet_reset_counters(facet);
-
- netflow_flow_clear(&facet->nf_flow);
- facet->tcp_flags = 0;
-}
-
-/* Searches 'ofproto''s table of facets for one which would be responsible for
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet might need revalidation; use facet_lookup_valid()
- * instead if that is important. */
-static struct facet *
-facet_find(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct cls_rule *cr = classifier_lookup(&ofproto->facets, flow, NULL);
- return cr ? CONTAINER_OF(cr, struct facet, cr) : NULL;
-}
-
-/* Searches 'ofproto''s table of facets for one capable that covers
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet is guaranteed to be valid. */
-static struct facet *
-facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct facet *facet;
-
- facet = facet_find(ofproto, flow);
- if (facet
- && (ofproto->backer->need_revalidate
- || tag_set_intersects(&ofproto->backer->revalidate_set,
- facet->xout.tags))
- && !facet_revalidate(facet)) {
- return NULL;
- }
-
- return facet;
-}
-
-static bool
-facet_check_consistency(struct facet *facet)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
-
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
-
- struct xlate_out xout;
- struct xlate_in xin;
-
- struct rule_dpif *rule;
- bool ok;
-
- /* Check the rule for consistency. */
- rule = rule_dpif_lookup(ofproto, &facet->flow, NULL);
- if (rule != facet->rule) {
- if (!VLOG_DROP_WARN(&rl)) {
- struct ds s = DS_EMPTY_INITIALIZER;
-
- flow_format(&s, &facet->flow);
- ds_put_format(&s, ": facet associated with wrong rule (was "
- "table=%"PRIu8",", facet->rule->up.table_id);
- cls_rule_format(&facet->rule->up.cr, &s);
- ds_put_format(&s, ") (should have been table=%"PRIu8",",
- rule->up.table_id);
- cls_rule_format(&rule->up.cr, &s);
- ds_put_char(&s, ')');
-
- VLOG_WARN("%s", ds_cstr(&s));
- ds_destroy(&s);
- }
- return false;
- }
-
- /* Check the datapath actions for consistency. */
- xlate_in_init(&xin, ofproto, &facet->flow, rule, 0, NULL);
- xlate_actions(&xin, &xout);
-
- ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
- && facet->xout.slow == xout.slow;
- if (!ok && !VLOG_DROP_WARN(&rl)) {
- struct ds s = DS_EMPTY_INITIALIZER;
-
- flow_format(&s, &facet->flow);
- ds_put_cstr(&s, ": inconsistency in facet");
-
- if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
- ds_put_cstr(&s, " (actions were: ");
- format_odp_actions(&s, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size);
- ds_put_cstr(&s, ") (correct actions: ");
- format_odp_actions(&s, xout.odp_actions.data,
- xout.odp_actions.size);
- ds_put_char(&s, ')');
- }
-
- if (facet->xout.slow != xout.slow) {
- ds_put_format(&s, " slow path incorrect. should be %d", xout.slow);
- }
-
- VLOG_WARN("%s", ds_cstr(&s));
- ds_destroy(&s);
- }
- xlate_out_uninit(&xout);
-
- return ok;
-}
-
-/* Re-searches the classifier for 'facet':