X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=8b65becfc28d2af13c57ee45830c5fbfe9ef806f;hb=299016266ed13376a7d671f66d4e0181b41098e3;hp=b7194930d0a0e17bbf98605706f7a1b363dec053;hpb=7df6a8bdb4abe4ac41c6d1a9043b69cbaea23bcc;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index b7194930d..8b65becfc 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -16,12 +16,13 @@ #include -#include "ofproto/private.h" +#include "ofproto/ofproto-provider.h" #include #include "autopath.h" #include "bond.h" +#include "bundle.h" #include "byte-order.h" #include "connmgr.h" #include "coverage.h" @@ -40,7 +41,7 @@ #include "ofp-util.h" #include "ofpbuf.h" #include "ofp-print.h" -#include "ofproto-sflow.h" +#include "ofproto-dpif-sflow.h" #include "poll-loop.h" #include "timer.h" #include "unaligned.h" @@ -59,7 +60,7 @@ COVERAGE_DEFINE(facet_invalidated); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); -/* Maximum depth of flow table recursion (due to NXAST_RESUBMIT actions) in a +/* Maximum depth of flow table recursion (due to resubmit actions) in a * flow translation. */ #define MAX_RESUBMIT_RECURSION 16 @@ -95,8 +96,8 @@ static struct rule_dpif *rule_dpif_cast(const struct rule *rule) return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL; } -static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *ofproto, - const struct flow *flow); +static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *, + const struct flow *, uint8_t table); #define MAX_MIRRORS 32 typedef uint32_t mirror_mask_t; @@ -184,8 +185,10 @@ struct action_xlate_ctx { * reason to look at them. */ int recurse; /* Recursion level, via xlate_table_action. */ - int last_pop_priority; /* Offset in 'odp_actions' just past most - * recent ODP_ACTION_ATTR_SET_PRIORITY. */ + uint32_t priority; /* Current flow priority. 0 if none. */ + struct flow base_flow; /* Flow at the last commit. */ + uint32_t base_priority; /* Priority at the last commit. */ + uint8_t table_id; /* OpenFlow table ID where flow was found. */ }; static void action_xlate_ctx_init(struct action_xlate_ctx *, @@ -266,6 +269,7 @@ static void facet_update_time(struct ofproto_dpif *, struct facet *, long long int used); static void facet_update_stats(struct ofproto_dpif *, struct facet *, const struct dpif_flow_stats *); +static void facet_reset_dp_stats(struct facet *, struct dpif_flow_stats *); static void facet_push_stats(struct facet *); static void facet_account(struct ofproto_dpif *, struct facet *, uint64_t extra_bytes); @@ -285,6 +289,7 @@ struct ofport_dpif { struct cfm *cfm; /* Connectivity Fault Management, if any. */ tag_type tag; /* Tag associated with this port. */ uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */ + bool may_enable; /* May be enabled in bonds. */ }; static struct ofport_dpif * @@ -296,8 +301,12 @@ ofport_dpif_cast(const struct ofport *ofport) static void port_run(struct ofport_dpif *); static void port_wait(struct ofport_dpif *); -static int set_cfm(struct ofport *, const struct cfm *, - const uint16_t *remote_mps, size_t n_remote_mps); +static int set_cfm(struct ofport *, const struct cfm_settings *); + +struct dpif_completion { + struct list list_node; + struct ofoperation *op; +}; struct ofproto_dpif { struct ofproto up; @@ -309,7 +318,7 @@ struct ofproto_dpif { /* Bridging. */ struct netflow *netflow; - struct ofproto_sflow *sflow; + struct dpif_sflow *sflow; struct hmap bundles; /* Contains "struct ofbundle"s. */ struct mac_learning *ml; struct ofmirror *mirrors[MAX_MIRRORS]; @@ -322,8 +331,17 @@ struct ofproto_dpif { struct hmap facets; bool need_revalidate; struct tag_set revalidate_set; + + /* Support for debugging async flow mods. */ + struct list completions; + + bool has_bundle_action; /* True when the first bundle action appears. */ }; +/* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only + * for debugging the asynchronous flow_mod implementation.) */ +static bool clogged; + static void ofproto_dpif_unixctl_init(void); static struct ofproto_dpif * @@ -351,8 +369,7 @@ static void handle_upcall(struct ofproto_dpif *, struct dpif_upcall *); static int expire(struct ofproto_dpif *); /* Utilities. */ -static int send_packet(struct ofproto_dpif *, - uint32_t odp_port, uint16_t vlan_tci, +static int send_packet(struct ofproto_dpif *, uint32_t odp_port, const struct ofpbuf *packet); /* Global variables. */ @@ -403,7 +420,7 @@ dealloc(struct ofproto *ofproto_) } static int -construct(struct ofproto *ofproto_) +construct(struct ofproto *ofproto_, int *n_tablesp) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); const char *name = ofproto->up.name; @@ -446,27 +463,53 @@ construct(struct ofproto *ofproto_) ofproto->need_revalidate = false; tag_set_init(&ofproto->revalidate_set); - ofproto->up.tables = xmalloc(sizeof *ofproto->up.tables); - classifier_init(&ofproto->up.tables[0]); - ofproto->up.n_tables = 1; + list_init(&ofproto->completions); ofproto_dpif_unixctl_init(); + ofproto->has_bundle_action = false; + + *n_tablesp = 255; return 0; } +static void +complete_operations(struct ofproto_dpif *ofproto) +{ + struct dpif_completion *c, *next; + + LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) { + ofoperation_complete(c->op, 0); + list_remove(&c->list_node); + free(c); + } +} + static void destruct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct rule_dpif *rule, *next_rule; + struct classifier *table; int i; + complete_operations(ofproto); + + OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { + struct cls_cursor cursor; + + cls_cursor_init(&cursor, table, NULL); + CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { + ofproto_rule_destroy(&rule->up); + } + } + for (i = 0; i < MAX_MIRRORS; i++) { mirror_destroy(ofproto->mirrors[i]); } netflow_destroy(ofproto->netflow); - ofproto_sflow_destroy(ofproto->sflow); + dpif_sflow_destroy(ofproto->sflow); hmap_destroy(&ofproto->bundles); mac_learning_destroy(ofproto->ml); @@ -483,6 +526,9 @@ run(struct ofproto *ofproto_) struct ofbundle *bundle; int i; + if (!clogged) { + complete_operations(ofproto); + } dpif_run(ofproto->dpif); for (i = 0; i < 50; i++) { @@ -510,7 +556,7 @@ run(struct ofproto *ofproto_) netflow_run(ofproto->netflow); } if (ofproto->sflow) { - ofproto_sflow_run(ofproto->sflow); + dpif_sflow_run(ofproto->sflow); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { @@ -549,10 +595,14 @@ wait(struct ofproto *ofproto_) struct ofport_dpif *ofport; struct ofbundle *bundle; + if (!clogged && !list_is_empty(&ofproto->completions)) { + poll_immediate_wake(); + } + dpif_wait(ofproto->dpif); dpif_recv_wait(ofproto->dpif); if (ofproto->sflow) { - ofproto_sflow_wait(ofproto->sflow); + dpif_sflow_wait(ofproto->sflow); } if (!tag_set_is_empty(&ofproto->revalidate_set)) { poll_immediate_wake(); @@ -666,10 +716,11 @@ port_construct(struct ofport *port_) port->bundle = NULL; port->cfm = NULL; port->tag = tag_create_random(); + port->may_enable = true; if (ofproto->sflow) { - ofproto_sflow_add_port(ofproto->sflow, port->odp_port, - netdev_get_name(port->up.netdev)); + dpif_sflow_add_port(ofproto->sflow, port->odp_port, + netdev_get_name(port->up.netdev)); } return 0; @@ -682,9 +733,9 @@ port_destruct(struct ofport *port_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); bundle_remove(port_); - set_cfm(port_, NULL, NULL, 0); + set_cfm(port_, NULL); if (ofproto->sflow) { - ofproto_sflow_del_port(ofproto->sflow, port->odp_port); + dpif_sflow_del_port(ofproto->sflow, port->odp_port); } } @@ -716,46 +767,39 @@ set_sflow(struct ofproto *ofproto_, const struct ofproto_sflow_options *sflow_options) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct ofproto_sflow *os = ofproto->sflow; + struct dpif_sflow *ds = ofproto->sflow; if (sflow_options) { - if (!os) { + if (!ds) { struct ofport_dpif *ofport; - os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif); + ds = ofproto->sflow = dpif_sflow_create(ofproto->dpif); HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { - ofproto_sflow_add_port(os, ofport->odp_port, - netdev_get_name(ofport->up.netdev)); + dpif_sflow_add_port(ds, ofport->odp_port, + netdev_get_name(ofport->up.netdev)); } } - ofproto_sflow_set_options(os, sflow_options); + dpif_sflow_set_options(ds, sflow_options); } else { - ofproto_sflow_destroy(os); + dpif_sflow_destroy(ds); ofproto->sflow = NULL; } return 0; } static int -set_cfm(struct ofport *ofport_, const struct cfm *cfm, - const uint16_t *remote_mps, size_t n_remote_mps) +set_cfm(struct ofport *ofport_, const struct cfm_settings *s) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); int error; - if (!cfm) { + if (!s) { error = 0; } else { if (!ofport->cfm) { - ofport->cfm = cfm_create(); + ofport->cfm = cfm_create(netdev_get_name(ofport->up.netdev)); } - ofport->cfm->mpid = cfm->mpid; - ofport->cfm->interval = cfm->interval; - memcpy(ofport->cfm->maid, cfm->maid, CCM_MAID_LEN); - - cfm_update_remote_mps(ofport->cfm, remote_mps, n_remote_mps); - - if (cfm_configure(ofport->cfm)) { + if (cfm_configure(ofport->cfm, s)) { return 0; } @@ -767,11 +811,11 @@ set_cfm(struct ofport *ofport_, const struct cfm *cfm, } static int -get_cfm(struct ofport *ofport_, const struct cfm **cfmp) +get_cfm_fault(const struct ofport *ofport_) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); - *cfmp = ofport->cfm; - return 0; + + return ofport->cfm ? cfm_get_fault(ofport->cfm) : -1; } /* Bundles. */ @@ -830,6 +874,8 @@ bundle_del_port(struct ofport_dpif *port) { struct ofbundle *bundle = port->bundle; + bundle->ofproto->need_revalidate = true; + list_remove(&port->bundle_node); port->bundle = NULL; @@ -861,6 +907,7 @@ bundle_add_port(struct ofbundle *bundle, uint32_t ofp_port, } if (port->bundle != bundle) { + bundle->ofproto->need_revalidate = true; if (port->bundle) { bundle_del_port(port); } @@ -990,7 +1037,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { for (i = 0; i < s->n_slaves; i++) { - if (s->slaves[i] == odp_port_to_ofp_port(port->odp_port)) { + if (s->slaves[i] == port->up.ofp_port) { goto found; } } @@ -1029,6 +1076,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, } } else { bundle->bond = bond_create(s->bond); + ofproto->need_revalidate = true; } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { @@ -1137,8 +1185,7 @@ bundle_run(struct ofbundle *bundle) struct ofport_dpif *port; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bool may_enable = lacp_slave_may_enable(bundle->lacp, port); - bond_slave_set_lacp_may_enable(bundle->bond, port, may_enable); + bond_slave_set_may_enable(bundle->bond, port, port->may_enable); } bond_run(bundle->bond, &bundle->ofproto->revalidate_set, @@ -1222,6 +1269,7 @@ mirror_set(struct ofproto *ofproto_, void *aux, mirror = ofproto->mirrors[idx] = xzalloc(sizeof *mirror); mirror->ofproto = ofproto; mirror->idx = idx; + mirror->aux = aux; mirror->out_vlan = -1; mirror->name = NULL; } @@ -1375,22 +1423,37 @@ ofproto_port_from_dpif_port(struct ofproto_port *ofproto_port, static void port_run(struct ofport_dpif *ofport) { + bool enable = netdev_get_carrier(ofport->up.netdev); + if (ofport->cfm) { cfm_run(ofport->cfm); if (cfm_should_send_ccm(ofport->cfm)) { struct ofpbuf packet; - struct ccm *ccm; ofpbuf_init(&packet, 0); - ccm = eth_compose(&packet, eth_addr_ccm, ofport->up.opp.hw_addr, - ETH_TYPE_CFM, sizeof *ccm); - cfm_compose_ccm(ofport->cfm, ccm); + cfm_compose_ccm(ofport->cfm, &packet, ofport->up.opp.hw_addr); send_packet(ofproto_dpif_cast(ofport->up.ofproto), - ofport->odp_port, 0, &packet); + ofport->odp_port, &packet); ofpbuf_uninit(&packet); } + + enable = enable && !cfm_get_fault(ofport->cfm); } + + if (ofport->bundle) { + enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport); + } + + if (ofport->may_enable != enable) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + + if (ofproto->has_bundle_action) { + ofproto->need_revalidate = true; + } + } + + ofport->may_enable = enable; } static void @@ -1548,19 +1611,19 @@ process_special(struct ofproto_dpif *ofproto, const struct flow *flow, { if (cfm_should_process_flow(flow)) { struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port); - if (ofport && ofport->cfm) { + if (packet && ofport && ofport->cfm) { cfm_process_heartbeat(ofport->cfm, packet); } return true; } else if (flow->dl_type == htons(ETH_TYPE_LACP)) { struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port); - if (port && port->bundle && port->bundle->lacp) { + if (packet && port && port->bundle && port->bundle->lacp) { const struct lacp_pdu *pdu = parse_lacp_packet(packet); if (pdu) { lacp_process_pdu(port->bundle->lacp, port, pdu); } - return true; } + return true; } return false; } @@ -1587,12 +1650,12 @@ handle_miss_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) /* Check with in-band control to see if this packet should be sent * to the local port regardless of the flow table. */ if (connmgr_msg_in_hook(ofproto->up.connmgr, &flow, upcall->packet)) { - send_packet(ofproto, OFPP_LOCAL, 0, upcall->packet); + send_packet(ofproto, ODPP_LOCAL, upcall->packet); } facet = facet_lookup_valid(ofproto, &flow); if (!facet) { - struct rule_dpif *rule = rule_dpif_lookup(ofproto, &flow); + struct rule_dpif *rule = rule_dpif_lookup(ofproto, &flow, 0); if (!rule) { /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */ struct ofport_dpif *port = get_ofp_port(ofproto, flow.in_port); @@ -1653,7 +1716,7 @@ handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) case DPIF_UC_SAMPLE: if (ofproto->sflow) { odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - ofproto_sflow_received(ofproto->sflow, upcall, &flow); + dpif_sflow_received(ofproto->sflow, upcall, &flow); } ofpbuf_delete(upcall->packet); break; @@ -1686,7 +1749,7 @@ static int expire(struct ofproto_dpif *ofproto) { struct rule_dpif *rule, *next_rule; - struct cls_cursor cursor; + struct classifier *table; int dp_max_idle; /* Update stats for each flow in the datapath. */ @@ -1697,9 +1760,13 @@ expire(struct ofproto_dpif *ofproto) expire_facets(ofproto, dp_max_idle); /* Expire OpenFlow flows whose idle_timeout or hard_timeout has passed. */ - cls_cursor_init(&cursor, &ofproto->up.tables[0], NULL); - CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { - rule_expire(rule); + OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { + struct cls_cursor cursor; + + cls_cursor_init(&cursor, table, NULL); + CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { + rule_expire(rule); + } } /* All outstanding data in existing flows has been accounted, so it's a @@ -1810,11 +1877,12 @@ facet_max_idle(const struct ofproto_dpif *ofproto) * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each facet * that is installed in the kernel gets dropped in the appropriate bucket. * After the histogram has been built, we compute the cutoff so that only - * the most-recently-used 1% of facets (but at least 1000 flows) are kept - * cached. At least the most-recently-used bucket of facets is kept, so - * actually an arbitrary number of facets can be kept in any given - * expiration run (though the next run will delete most of those unless - * they receive additional data). + * the most-recently-used 1% of facets (but at least + * ofproto->up.flow_eviction_threshold flows) are kept cached. At least + * the most-recently-used bucket of facets is kept, so actually an + * arbitrary number of facets can be kept in any given expiration run + * (though the next run will delete most of those unless they receive + * additional data). * * This requires a second pass through the facets, in addition to the pass * made by update_stats(), because the former function never looks @@ -1823,13 +1891,13 @@ facet_max_idle(const struct ofproto_dpif *ofproto) enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) }; enum { N_BUCKETS = 5000 / BUCKET_WIDTH }; int buckets[N_BUCKETS] = { 0 }; + int total, subtotal, bucket; struct facet *facet; - int total, bucket; long long int now; int i; total = hmap_count(&ofproto->facets); - if (total <= 1000) { + if (total <= ofproto->up.flow_eviction_threshold) { return N_BUCKETS * BUCKET_WIDTH; } @@ -1844,15 +1912,11 @@ facet_max_idle(const struct ofproto_dpif *ofproto) } /* Find the first bucket whose flows should be expired. */ - for (bucket = 0; bucket < N_BUCKETS; bucket++) { - if (buckets[bucket]) { - int subtotal = 0; - do { - subtotal += buckets[bucket++]; - } while (bucket < N_BUCKETS && subtotal < MAX(1000, total / 100)); - break; - } - } + subtotal = bucket = 0; + do { + subtotal += buckets[bucket++]; + } while (bucket < N_BUCKETS && + subtotal < MAX(ofproto->up.flow_eviction_threshold, total / 100)); if (VLOG_IS_DBG_ENABLED()) { struct ds s; @@ -1991,7 +2055,7 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, struct ofpbuf *packet) { if (actions_len == NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)) - && odp_actions->nla_type == ODP_ACTION_ATTR_CONTROLLER) { + && odp_actions->nla_type == ODP_ACTION_ATTR_USERSPACE) { /* As an optimization, avoid a round-trip from userspace to kernel to * userspace. This also avoids possibly filling up kernel packet * buffers along the way. */ @@ -2010,9 +2074,16 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, return true; } else { + struct odputil_keybuf keybuf; + struct ofpbuf key; int error; - error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); + + error = dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions, actions_len, packet); + ofpbuf_delete(packet); return !error; } @@ -2088,6 +2159,12 @@ facet_make_actions(struct ofproto_dpif *p, struct facet *facet, ofpbuf_delete(odp_actions); } +/* Updates 'facet''s flow in the datapath setting its actions to 'actions_len' + * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters + * in the datapath will be zeroed and 'stats' will be updated with traffic new + * since 'facet' was last updated. + * + * Returns 0 if successful, otherwise a positive errno value.*/ static int facet_put__(struct ofproto_dpif *ofproto, struct facet *facet, const struct nlattr *actions, size_t actions_len, @@ -2096,19 +2173,24 @@ facet_put__(struct ofproto_dpif *ofproto, struct facet *facet, struct odputil_keybuf keybuf; enum dpif_flow_put_flags flags; struct ofpbuf key; + int ret; flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; if (stats) { flags |= DPIF_FP_ZERO_STATS; - facet->dp_packet_count = 0; - facet->dp_byte_count = 0; } ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, &facet->flow); - return dpif_flow_put(ofproto->dpif, flags, key.data, key.size, - actions, actions_len, stats); + ret = dpif_flow_put(ofproto->dpif, flags, key.data, key.size, + actions, actions_len, stats); + + if (stats) { + facet_reset_dp_stats(facet, stats); + } + + return ret; } /* If 'facet' is installable, inserts or re-inserts it into 'p''s datapath. If @@ -2126,6 +2208,12 @@ facet_install(struct ofproto_dpif *p, struct facet *facet, bool zero_stats) } } +static int +vlan_tci_to_openflow_vlan(ovs_be16 vlan_tci) +{ + return vlan_tci != htons(0) ? vlan_tci_to_vid(vlan_tci) : OFP_VLAN_NONE; +} + static void facet_account(struct ofproto_dpif *ofproto, struct facet *facet, uint64_t extra_bytes) @@ -2135,6 +2223,7 @@ facet_account(struct ofproto_dpif *ofproto, const struct nlattr *a; tag_type dummy = 0; unsigned int left; + ovs_be16 vlan_tci; int vlan; total_bytes = facet->byte_count + extra_bytes; @@ -2162,14 +2251,32 @@ facet_account(struct ofproto_dpif *ofproto, if (!ofproto->has_bonded_bundles) { return; } + + /* This loop feeds byte counters to bond_account() for rebalancing to use + * as a basis. We also need to track the actual VLAN on which the packet + * is going to be sent to ensure that it matches the one passed to + * bond_choose_output_slave(). (Otherwise, we will account to the wrong + * hash bucket.) */ + vlan_tci = facet->flow.vlan_tci; NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->actions, facet->actions_len) { - if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT) { - struct ofport_dpif *port; + struct ofport_dpif *port; + switch (nl_attr_type(a)) { + case ODP_ACTION_ATTR_OUTPUT: port = get_odp_port(ofproto, nl_attr_get_u32(a)); if (port && port->bundle && port->bundle->bond) { - bond_account(port->bundle->bond, &facet->flow, vlan, n_bytes); + bond_account(port->bundle->bond, &facet->flow, + vlan_tci_to_openflow_vlan(vlan_tci), n_bytes); } + break; + + case ODP_ACTION_ATTR_STRIP_VLAN: + vlan_tci = htons(0); + break; + + case ODP_ACTION_ATTR_SET_DL_TCI: + vlan_tci = nl_attr_get_be16(a); + break; } } } @@ -2182,16 +2289,17 @@ facet_uninstall(struct ofproto_dpif *p, struct facet *facet) struct odputil_keybuf keybuf; struct dpif_flow_stats stats; struct ofpbuf key; + int error; ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, &facet->flow); - if (!dpif_flow_del(p->dpif, key.data, key.size, &stats)) { + error = dpif_flow_del(p->dpif, key.data, key.size, &stats); + facet_reset_dp_stats(facet, &stats); + if (!error) { facet_update_stats(p, facet, &stats); } facet->installed = false; - facet->dp_packet_count = 0; - facet->dp_byte_count = 0; } else { assert(facet->dp_packet_count == 0); assert(facet->dp_byte_count == 0); @@ -2210,6 +2318,24 @@ facet_is_controller_flow(struct facet *facet) htons(OFPP_CONTROLLER))); } +/* Resets 'facet''s datapath statistics counters. This should be called when + * 'facet''s statistics are cleared in the datapath. If 'stats' is non-null, + * it should contain the statistics returned by dpif when 'facet' was reset in + * the datapath. 'stats' will be modified to only included statistics new + * since 'facet' was last updated. */ +static void +facet_reset_dp_stats(struct facet *facet, struct dpif_flow_stats *stats) +{ + if (stats && facet->dp_packet_count <= stats->n_packets + && facet->dp_byte_count <= stats->n_bytes) { + stats->n_packets -= facet->dp_packet_count; + stats->n_bytes -= facet->dp_byte_count; + } + + facet->dp_packet_count = 0; + facet->dp_byte_count = 0; +} + /* Folds all of 'facet''s statistics into its rule. Also updates the * accounting ofhook and emits a NetFlow expiration if appropriate. All of * 'facet''s statistics in the datapath should have been zeroed and folded into @@ -2309,7 +2435,7 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) COVERAGE_INC(facet_revalidate); /* Determine the new rule. */ - new_rule = rule_dpif_lookup(ofproto, &facet->flow); + new_rule = rule_dpif_lookup(ofproto, &facet->flow, 0); if (!new_rule) { /* No new rule, so delete the facet. */ facet_remove(ofproto, facet); @@ -2467,13 +2593,29 @@ flow_push_stats(const struct rule_dpif *rule, /* Rules. */ static struct rule_dpif * -rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow) +rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, + uint8_t table_id) { return rule_dpif_cast(rule_from_cls_rule( - classifier_lookup(&ofproto->up.tables[0], + classifier_lookup(&ofproto->up.tables[table_id], flow))); } +static void +complete_operation(struct rule_dpif *rule) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + ofproto->need_revalidate = true; + if (clogged) { + struct dpif_completion *c = xmalloc(sizeof *c); + c->op = rule->up.pending; + list_push_back(&ofproto->completions, &c->list_node); + } else { + ofoperation_complete(rule->up.pending, 0); + } +} + static struct rule * rule_alloc(void) { @@ -2493,7 +2635,7 @@ rule_construct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - struct rule_dpif *old_rule; + struct rule_dpif *victim; int error; error = validate_actions(rule->up.actions, rule->up.n_actions, @@ -2502,21 +2644,25 @@ rule_construct(struct rule *rule_) return error; } - old_rule = rule_dpif_cast(rule_from_cls_rule(classifier_find_rule_exactly( - &ofproto->up.tables[0], - &rule->up.cr))); - if (old_rule) { - ofproto_rule_destroy(&old_rule->up); - } - rule->used = rule->up.created; rule->packet_count = 0; rule->byte_count = 0; - list_init(&rule->facets); - classifier_insert(&ofproto->up.tables[0], &rule->up.cr); - ofproto->need_revalidate = true; + victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending)); + if (victim && !list_is_empty(&victim->facets)) { + struct facet *facet; + rule->facets = victim->facets; + list_moved(&rule->facets); + LIST_FOR_EACH (facet, list_node, &rule->facets) { + facet->rule = rule; + } + } else { + /* Must avoid list_moved() in this case. */ + list_init(&rule->facets); + } + + complete_operation(rule); return 0; } @@ -2527,11 +2673,11 @@ rule_destruct(struct rule *rule_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct facet *facet, *next_facet; - classifier_remove(&ofproto->up.tables[0], &rule->up.cr); LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) { facet_revalidate(ofproto, facet); } - ofproto->need_revalidate = true; + + complete_operation(rule); } static void @@ -2573,7 +2719,7 @@ rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet) /* Otherwise, if 'rule' is in fact the correct rule for 'packet', then * create a new facet for it and use that. */ - if (rule_dpif_lookup(ofproto, flow) == rule) { + if (rule_dpif_lookup(ofproto, flow, 0) == rule) { facet = facet_create(rule, flow, packet); facet_execute(ofproto, facet, packet); facet_install(ofproto, facet, true); @@ -2597,41 +2743,43 @@ rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet) return 0; } -static int -rule_modify_actions(struct rule *rule_, - const union ofp_action *actions, size_t n_actions) +static void +rule_modify_actions(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); int error; - error = validate_actions(actions, n_actions, &rule->up.cr.flow, - ofproto->max_ports); - if (!error) { - ofproto->need_revalidate = true; + error = validate_actions(rule->up.actions, rule->up.n_actions, + &rule->up.cr.flow, ofproto->max_ports); + if (error) { + ofoperation_complete(rule->up.pending, error); + return; } - return error; + + complete_operation(rule); } -/* Sends 'packet' out of port 'odp_port' within 'ofproto'. If 'vlan_tci' is - * zero the packet will not have any 802.1Q hader; if it is nonzero, then the - * packet will be sent with the VLAN TCI specified by 'vlan_tci & ~VLAN_CFI'. - * +/* Sends 'packet' out of port 'odp_port' within 'p'. * Returns 0 if successful, otherwise a positive errno value. */ static int -send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, uint16_t vlan_tci, +send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, const struct ofpbuf *packet) { - struct ofpbuf odp_actions; + struct ofpbuf key, odp_actions; + struct odputil_keybuf keybuf; + struct flow flow; int error; + flow_extract((struct ofpbuf *) packet, 0, 0, &flow); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow); + ofpbuf_init(&odp_actions, 32); - if (vlan_tci != 0) { - nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_SET_DL_TCI, - ntohs(vlan_tci & ~VLAN_CFI)); - } nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port); - error = dpif_execute(ofproto->dpif, odp_actions.data, odp_actions.size, + error = dpif_execute(ofproto->dpif, + key.data, key.size, + odp_actions.data, odp_actions.size, packet); ofpbuf_uninit(&odp_actions); @@ -2646,7 +2794,77 @@ send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, uint16_t vlan_tci, static void do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx); -static bool xlate_normal(struct action_xlate_ctx *); +static void xlate_normal(struct action_xlate_ctx *); + +static void +commit_odp_actions(struct action_xlate_ctx *ctx) +{ + const struct flow *flow = &ctx->flow; + struct flow *base = &ctx->base_flow; + struct ofpbuf *odp_actions = ctx->odp_actions; + + if (base->tun_id != flow->tun_id) { + nl_msg_put_be64(odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, flow->tun_id); + base->tun_id = flow->tun_id; + } + + if (base->nw_src != flow->nw_src) { + nl_msg_put_be32(odp_actions, ODP_ACTION_ATTR_SET_NW_SRC, flow->nw_src); + base->nw_src = flow->nw_src; + } + + if (base->nw_dst != flow->nw_dst) { + nl_msg_put_be32(odp_actions, ODP_ACTION_ATTR_SET_NW_DST, flow->nw_dst); + base->nw_dst = flow->nw_dst; + } + + if (base->nw_tos != flow->nw_tos) { + nl_msg_put_u8(odp_actions, ODP_ACTION_ATTR_SET_NW_TOS, flow->nw_tos); + base->nw_tos = flow->nw_tos; + } + + if (base->vlan_tci != flow->vlan_tci) { + if (!(flow->vlan_tci & htons(VLAN_CFI))) { + nl_msg_put_flag(odp_actions, ODP_ACTION_ATTR_STRIP_VLAN); + } else { + nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_DL_TCI, + flow->vlan_tci & ~htons(VLAN_CFI)); + } + base->vlan_tci = flow->vlan_tci; + } + + if (base->tp_src != flow->tp_src) { + nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_TP_SRC, flow->tp_src); + base->tp_src = flow->tp_src; + } + + if (base->tp_dst != flow->tp_dst) { + nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_TP_DST, flow->tp_dst); + base->tp_dst = flow->tp_dst; + } + + if (!eth_addr_equals(base->dl_src, flow->dl_src)) { + nl_msg_put_unspec(odp_actions, ODP_ACTION_ATTR_SET_DL_SRC, + flow->dl_src, ETH_ADDR_LEN); + memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); + } + + if (!eth_addr_equals(base->dl_dst, flow->dl_dst)) { + nl_msg_put_unspec(odp_actions, ODP_ACTION_ATTR_SET_DL_DST, + flow->dl_dst, ETH_ADDR_LEN); + memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); + } + + if (ctx->base_priority != ctx->priority) { + if (ctx->priority) { + nl_msg_put_u32(odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, + ctx->priority); + } else { + nl_msg_put_flag(odp_actions, ODP_ACTION_ATTR_POP_PRIORITY); + } + ctx->base_priority = ctx->priority; + } +} static void add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) @@ -2667,23 +2885,29 @@ add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) */ } + commit_odp_actions(ctx); nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port); ctx->nf_output_iface = ofp_port; } static void -xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port) +xlate_table_action(struct action_xlate_ctx *ctx, + uint16_t in_port, uint8_t table_id) { if (ctx->recurse < MAX_RESUBMIT_RECURSION) { struct rule_dpif *rule; uint16_t old_in_port; + uint8_t old_table_id; + + old_table_id = ctx->table_id; + ctx->table_id = table_id; /* Look up a flow with 'in_port' as the input port. Then restore the * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will * have surprising behavior). */ old_in_port = ctx->flow.in_port; ctx->flow.in_port = in_port; - rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow); + rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow, table_id); ctx->flow.in_port = old_in_port; if (ctx->resubmit_hook) { @@ -2695,29 +2919,46 @@ xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port) do_xlate_actions(rule->up.actions, rule->up.n_actions, ctx); ctx->recurse--; } + + ctx->table_id = old_table_id; } else { static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_ERR_RL(&recurse_rl, "NXAST_RESUBMIT recursed over %d times", + VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times", MAX_RESUBMIT_RECURSION); } } static void -flood_packets(struct ofproto_dpif *ofproto, - uint16_t ofp_in_port, ovs_be32 mask, - uint16_t *nf_output_iface, struct ofpbuf *odp_actions) +xlate_resubmit_table(struct action_xlate_ctx *ctx, + const struct nx_action_resubmit *nar) +{ + uint16_t in_port; + uint8_t table_id; + + in_port = (nar->in_port == htons(OFPP_IN_PORT) + ? ctx->flow.in_port + : ntohs(nar->in_port)); + table_id = nar->table == 255 ? ctx->table_id : nar->table; + + xlate_table_action(ctx, in_port, table_id); +} + +static void +flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) { struct ofport_dpif *ofport; - HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { + commit_odp_actions(ctx); + HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { uint16_t ofp_port = ofport->up.ofp_port; - if (ofp_port != ofp_in_port && !(ofport->up.opp.config & mask)) { - nl_msg_put_u32(odp_actions, ODP_ACTION_ATTR_OUTPUT, + if (ofp_port != ctx->flow.in_port && !(ofport->up.opp.config & mask)) { + nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_OUTPUT, ofport->odp_port); } } - *nf_output_iface = NF_OUT_FLOOD; + + ctx->nf_output_iface = NF_OUT_FLOOD; } static void @@ -2733,25 +2974,26 @@ xlate_output_action__(struct action_xlate_ctx *ctx, add_output_action(ctx, ctx->flow.in_port); break; case OFPP_TABLE: - xlate_table_action(ctx, ctx->flow.in_port); + xlate_table_action(ctx, ctx->flow.in_port, ctx->table_id); break; case OFPP_NORMAL: xlate_normal(ctx); break; case OFPP_FLOOD: - flood_packets(ctx->ofproto, ctx->flow.in_port, htonl(OFPPC_NO_FLOOD), - &ctx->nf_output_iface, ctx->odp_actions); + flood_packets(ctx, htonl(OFPPC_NO_FLOOD)); break; case OFPP_ALL: - flood_packets(ctx->ofproto, ctx->flow.in_port, htonl(0), - &ctx->nf_output_iface, ctx->odp_actions); + flood_packets(ctx, htonl(0)); break; case OFPP_CONTROLLER: - nl_msg_put_u64(ctx->odp_actions, ODP_ACTION_ATTR_CONTROLLER, max_len); + commit_odp_actions(ctx); + nl_msg_put_u64(ctx->odp_actions, ODP_ACTION_ATTR_USERSPACE, max_len); break; case OFPP_LOCAL: add_output_action(ctx, OFPP_LOCAL); break; + case OFPP_NONE: + break; default: if (port != ctx->flow.in_port) { add_output_action(ctx, port); @@ -2776,34 +3018,12 @@ xlate_output_action(struct action_xlate_ctx *ctx, xlate_output_action__(ctx, ntohs(oao->port), ntohs(oao->max_len)); } -/* If the final ODP action in 'ctx' is "pop priority", drop it, as an - * optimization, because we're going to add another action that sets the - * priority immediately after, or because there are no actions following the - * pop. */ -static void -remove_pop_action(struct action_xlate_ctx *ctx) -{ - if (ctx->odp_actions->size == ctx->last_pop_priority) { - ctx->odp_actions->size -= NLA_ALIGN(NLA_HDRLEN); - ctx->last_pop_priority = -1; - } -} - -static void -add_pop_action(struct action_xlate_ctx *ctx) -{ - if (ctx->odp_actions->size != ctx->last_pop_priority) { - nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_POP_PRIORITY); - ctx->last_pop_priority = ctx->odp_actions->size; - } -} - static void xlate_enqueue_action(struct action_xlate_ctx *ctx, const struct ofp_action_enqueue *oae) { uint16_t ofp_port, odp_port; - uint32_t priority; + uint32_t ctx_priority, priority; int error; error = dpif_queue_to_priority(ctx->ofproto->dpif, ntohl(oae->queue_id), @@ -2822,10 +3042,10 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, odp_port = ofp_port_to_odp_port(ofp_port); /* Add ODP actions. */ - remove_pop_action(ctx); - nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, priority); + ctx_priority = ctx->priority; + ctx->priority = priority; add_output_action(ctx, odp_port); - add_pop_action(ctx); + ctx->priority = ctx_priority; /* Update NetFlow output port. */ if (ctx->nf_output_iface == NF_OUT_DROP) { @@ -2850,20 +3070,7 @@ xlate_set_queue_action(struct action_xlate_ctx *ctx, return; } - remove_pop_action(ctx); - nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, priority); -} - -static void -xlate_set_dl_tci(struct action_xlate_ctx *ctx) -{ - ovs_be16 tci = ctx->flow.vlan_tci; - if (!(tci & htons(VLAN_CFI))) { - nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_STRIP_VLAN); - } else { - nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_TCI, - tci & ~htons(VLAN_CFI)); - } + ctx->priority = priority; } struct xlate_reg_state { @@ -2871,27 +3078,6 @@ struct xlate_reg_state { ovs_be64 tun_id; }; -static void -save_reg_state(const struct action_xlate_ctx *ctx, - struct xlate_reg_state *state) -{ - state->vlan_tci = ctx->flow.vlan_tci; - state->tun_id = ctx->flow.tun_id; -} - -static void -update_reg_state(struct action_xlate_ctx *ctx, - const struct xlate_reg_state *state) -{ - if (ctx->flow.vlan_tci != state->vlan_tci) { - xlate_set_dl_tci(ctx); - } - if (ctx->flow.tun_id != state->tun_id) { - nl_msg_put_be64(ctx->odp_actions, - ODP_ACTION_ATTR_SET_TUNNEL, ctx->flow.tun_id); - } -} - static void xlate_autopath(struct action_xlate_ctx *ctx, const struct nx_action_autopath *naa) @@ -2912,90 +3098,25 @@ xlate_autopath(struct action_xlate_ctx *ctx, autopath_execute(naa, &ctx->flow, ofp_port); } -static void -xlate_nicira_action(struct action_xlate_ctx *ctx, - const struct nx_action_header *nah) +static bool +slave_enabled_cb(uint16_t ofp_port, void *ofproto_) { - const struct nx_action_resubmit *nar; - const struct nx_action_set_tunnel *nast; - const struct nx_action_set_queue *nasq; - const struct nx_action_multipath *nam; - const struct nx_action_autopath *naa; - enum nx_action_subtype subtype = ntohs(nah->subtype); - struct xlate_reg_state state; - ovs_be64 tun_id; - - assert(nah->vendor == htonl(NX_VENDOR_ID)); - switch (subtype) { - case NXAST_RESUBMIT: - nar = (const struct nx_action_resubmit *) nah; - xlate_table_action(ctx, ntohs(nar->in_port)); - break; - - case NXAST_SET_TUNNEL: - nast = (const struct nx_action_set_tunnel *) nah; - tun_id = htonll(ntohl(nast->tun_id)); - nl_msg_put_be64(ctx->odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, tun_id); - ctx->flow.tun_id = tun_id; - break; - - case NXAST_DROP_SPOOFED_ARP: - if (ctx->flow.dl_type == htons(ETH_TYPE_ARP)) { - nl_msg_put_flag(ctx->odp_actions, - ODP_ACTION_ATTR_DROP_SPOOFED_ARP); - } - break; - - case NXAST_SET_QUEUE: - nasq = (const struct nx_action_set_queue *) nah; - xlate_set_queue_action(ctx, nasq); - break; - - case NXAST_POP_QUEUE: - add_pop_action(ctx); - break; - - case NXAST_REG_MOVE: - save_reg_state(ctx, &state); - nxm_execute_reg_move((const struct nx_action_reg_move *) nah, - &ctx->flow); - update_reg_state(ctx, &state); - break; - - case NXAST_REG_LOAD: - save_reg_state(ctx, &state); - nxm_execute_reg_load((const struct nx_action_reg_load *) nah, - &ctx->flow); - update_reg_state(ctx, &state); - break; - - case NXAST_NOTE: - /* Nothing to do. */ - break; - - case NXAST_SET_TUNNEL64: - tun_id = ((const struct nx_action_set_tunnel64 *) nah)->tun_id; - nl_msg_put_be64(ctx->odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, tun_id); - ctx->flow.tun_id = tun_id; - break; - - case NXAST_MULTIPATH: - nam = (const struct nx_action_multipath *) nah; - multipath_execute(nam, &ctx->flow); - break; - - case NXAST_AUTOPATH: - naa = (const struct nx_action_autopath *) nah; - xlate_autopath(ctx, naa); - break; - - /* If you add a new action here that modifies flow data, don't forget to - * update the flow key in ctx->flow at the same time. */ + struct ofproto_dpif *ofproto = ofproto_; + struct ofport_dpif *port; - case NXAST_SNAT__OBSOLETE: + switch (ofp_port) { + case OFPP_IN_PORT: + case OFPP_TABLE: + case OFPP_NORMAL: + case OFPP_FLOOD: + case OFPP_ALL: + case OFPP_LOCAL: + return true; + case OFPP_CONTROLLER: /* Not supported by the bundle action. */ + return false; default: - VLOG_DBG_RL(&rl, "unknown Nicira action type %d", (int) subtype); - break; + port = get_ofp_port(ofproto, ofp_port); + return port ? port->may_enable : false; } } @@ -3004,8 +3125,8 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx) { const struct ofport_dpif *port; - struct actions_iterator iter; const union ofp_action *ia; + size_t left; port = get_ofp_port(ctx->ofproto, ctx->flow.in_port); if (port @@ -3017,87 +3138,138 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, return; } - for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { - enum ofp_action_type type = ntohs(ia->type); + OFPUTIL_ACTION_FOR_EACH_UNSAFE (ia, left, in, n_in) { const struct ofp_action_dl_addr *oada; - - switch (type) { - case OFPAT_OUTPUT: + const struct nx_action_resubmit *nar; + const struct nx_action_set_tunnel *nast; + const struct nx_action_set_queue *nasq; + const struct nx_action_multipath *nam; + const struct nx_action_autopath *naa; + const struct nx_action_bundle *nab; + enum ofputil_action_code code; + ovs_be64 tun_id; + + code = ofputil_decode_action_unsafe(ia); + switch (code) { + case OFPUTIL_OFPAT_OUTPUT: xlate_output_action(ctx, &ia->output); break; - case OFPAT_SET_VLAN_VID: + case OFPUTIL_OFPAT_SET_VLAN_VID: ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK); ctx->flow.vlan_tci |= ia->vlan_vid.vlan_vid | htons(VLAN_CFI); - xlate_set_dl_tci(ctx); break; - case OFPAT_SET_VLAN_PCP: + case OFPUTIL_OFPAT_SET_VLAN_PCP: ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK); ctx->flow.vlan_tci |= htons( (ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) | VLAN_CFI); - xlate_set_dl_tci(ctx); break; - case OFPAT_STRIP_VLAN: + case OFPUTIL_OFPAT_STRIP_VLAN: ctx->flow.vlan_tci = htons(0); - xlate_set_dl_tci(ctx); break; - case OFPAT_SET_DL_SRC: + case OFPUTIL_OFPAT_SET_DL_SRC: oada = ((struct ofp_action_dl_addr *) ia); - nl_msg_put_unspec(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_SRC, - oada->dl_addr, ETH_ADDR_LEN); memcpy(ctx->flow.dl_src, oada->dl_addr, ETH_ADDR_LEN); break; - case OFPAT_SET_DL_DST: + case OFPUTIL_OFPAT_SET_DL_DST: oada = ((struct ofp_action_dl_addr *) ia); - nl_msg_put_unspec(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_DST, - oada->dl_addr, ETH_ADDR_LEN); memcpy(ctx->flow.dl_dst, oada->dl_addr, ETH_ADDR_LEN); break; - case OFPAT_SET_NW_SRC: - nl_msg_put_be32(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_SRC, - ia->nw_addr.nw_addr); + case OFPUTIL_OFPAT_SET_NW_SRC: ctx->flow.nw_src = ia->nw_addr.nw_addr; break; - case OFPAT_SET_NW_DST: - nl_msg_put_be32(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_DST, - ia->nw_addr.nw_addr); + case OFPUTIL_OFPAT_SET_NW_DST: ctx->flow.nw_dst = ia->nw_addr.nw_addr; break; - case OFPAT_SET_NW_TOS: - nl_msg_put_u8(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_TOS, - ia->nw_tos.nw_tos); - ctx->flow.nw_tos = ia->nw_tos.nw_tos; + case OFPUTIL_OFPAT_SET_NW_TOS: + ctx->flow.nw_tos = ia->nw_tos.nw_tos & IP_DSCP_MASK; break; - case OFPAT_SET_TP_SRC: - nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_TP_SRC, - ia->tp_port.tp_port); + case OFPUTIL_OFPAT_SET_TP_SRC: ctx->flow.tp_src = ia->tp_port.tp_port; break; - case OFPAT_SET_TP_DST: - nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_TP_DST, - ia->tp_port.tp_port); + case OFPUTIL_OFPAT_SET_TP_DST: ctx->flow.tp_dst = ia->tp_port.tp_port; break; - case OFPAT_VENDOR: - xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + case OFPUTIL_OFPAT_ENQUEUE: + xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); break; - case OFPAT_ENQUEUE: - xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); + case OFPUTIL_NXAST_RESUBMIT: + nar = (const struct nx_action_resubmit *) ia; + xlate_table_action(ctx, ntohs(nar->in_port), ctx->table_id); + break; + + case OFPUTIL_NXAST_RESUBMIT_TABLE: + xlate_resubmit_table(ctx, (const struct nx_action_resubmit *) ia); + break; + + case OFPUTIL_NXAST_SET_TUNNEL: + nast = (const struct nx_action_set_tunnel *) ia; + tun_id = htonll(ntohl(nast->tun_id)); + ctx->flow.tun_id = tun_id; + break; + + case OFPUTIL_NXAST_SET_QUEUE: + nasq = (const struct nx_action_set_queue *) ia; + xlate_set_queue_action(ctx, nasq); + break; + + case OFPUTIL_NXAST_POP_QUEUE: + ctx->priority = 0; + break; + + case OFPUTIL_NXAST_REG_MOVE: + nxm_execute_reg_move((const struct nx_action_reg_move *) ia, + &ctx->flow); + break; + + case OFPUTIL_NXAST_REG_LOAD: + nxm_execute_reg_load((const struct nx_action_reg_load *) ia, + &ctx->flow); + break; + + case OFPUTIL_NXAST_NOTE: + /* Nothing to do. */ break; - default: - VLOG_DBG_RL(&rl, "unknown action type %d", (int) type); + case OFPUTIL_NXAST_SET_TUNNEL64: + tun_id = ((const struct nx_action_set_tunnel64 *) ia)->tun_id; + ctx->flow.tun_id = tun_id; + break; + + case OFPUTIL_NXAST_MULTIPATH: + nam = (const struct nx_action_multipath *) ia; + multipath_execute(nam, &ctx->flow); + break; + + case OFPUTIL_NXAST_AUTOPATH: + naa = (const struct nx_action_autopath *) ia; + xlate_autopath(ctx, naa); + break; + + case OFPUTIL_NXAST_BUNDLE: + ctx->ofproto->has_bundle_action = true; + nab = (const struct nx_action_bundle *) ia; + xlate_output_action__(ctx, bundle_execute(nab, &ctx->flow, + slave_enabled_cb, + ctx->ofproto), 0); + break; + + case OFPUTIL_NXAST_BUNDLE_LOAD: + ctx->ofproto->has_bundle_action = true; + nab = (const struct nx_action_bundle *) ia; + bundle_execute_load(nab, &ctx->flow, slave_enabled_cb, + ctx->ofproto); break; } } @@ -3125,7 +3297,10 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->may_set_up_flow = true; ctx->nf_output_iface = NF_OUT_DROP; ctx->recurse = 0; - ctx->last_pop_priority = -1; + ctx->priority = 0; + ctx->base_priority = 0; + ctx->base_flow = ctx->flow; + ctx->table_id = 0; if (process_special(ctx->ofproto, &ctx->flow, ctx->packet)) { ctx->may_set_up_flow = false; @@ -3133,8 +3308,6 @@ xlate_actions(struct action_xlate_ctx *ctx, do_xlate_actions(in, n_in, ctx); } - remove_pop_action(ctx); - /* Check with in-band control to see if we're allowed to set up this * flow. */ if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow, @@ -3240,7 +3413,8 @@ dst_is_duplicate(const struct dst_set *set, const struct dst *test) static bool ofbundle_trunks_vlan(const struct ofbundle *bundle, uint16_t vlan) { - return bundle->vlan < 0 && vlan_bitmap_contains(bundle->trunks, vlan); + return (bundle->vlan < 0 + && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan))); } static bool @@ -3286,7 +3460,48 @@ compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan, static bool vlan_is_mirrored(const struct ofmirror *m, int vlan) { - return vlan_bitmap_contains(m->vlans, vlan); + return !m->vlans || bitmap_is_set(m->vlans, vlan); +} + +/* Returns true if a packet with Ethernet destination MAC 'dst' may be mirrored + * to a VLAN. In general most packets may be mirrored but we want to drop + * protocols that may confuse switches. */ +static bool +eth_dst_may_rspan(const uint8_t dst[ETH_ADDR_LEN]) +{ + /* If you change this function's behavior, please update corresponding + * documentation in vswitch.xml at the same time. */ + if (dst[0] != 0x01) { + /* All the currently banned MACs happen to start with 01 currently, so + * this is a quick way to eliminate most of the good ones. */ + } else { + if (eth_addr_is_reserved(dst)) { + /* Drop STP, IEEE pause frames, and other reserved protocols + * (01-80-c2-00-00-0x). */ + return false; + } + + if (dst[0] == 0x01 && dst[1] == 0x00 && dst[2] == 0x0c) { + /* Cisco OUI. */ + if ((dst[3] & 0xfe) == 0xcc && + (dst[4] & 0xfe) == 0xcc && + (dst[5] & 0xfe) == 0xcc) { + /* Drop the following protocols plus others following the same + pattern: + + CDP, VTP, DTP, PAgP (01-00-0c-cc-cc-cc) + Spanning Tree PVSTP+ (01-00-0c-cc-cc-cd) + STP Uplink Fast (01-00-0c-cd-cd-cd) */ + return false; + } + + if (!(dst[3] | dst[4] | dst[5])) { + /* Drop Inter Switch Link packets (01-00-0c-00-00-00). */ + return false; + } + } + } + return true; } static void @@ -3323,7 +3538,7 @@ compose_mirror_dsts(struct action_xlate_ctx *ctx, && !dst_is_duplicate(set, &dst)) { dst_set_add(set, &dst); } - } else { + } else if (eth_dst_may_rspan(ctx->flow.dl_dst)) { struct ofbundle *bundle; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { @@ -3526,7 +3741,7 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, /* Find the port and bundle for the received packet. */ in_port = get_ofp_port(ofproto, flow->in_port); - *in_bundlep = in_bundle = in_port->bundle; + *in_bundlep = in_bundle = in_port ? in_port->bundle : NULL; if (!in_port || !in_bundle) { /* No interface? Something fishy... */ if (have_packet) { @@ -3595,10 +3810,7 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, return true; } -/* If the composed actions may be applied to any packet in the given 'flow', - * returns true. Otherwise, the actions should only be applied to 'packet', or - * not at all, if 'packet' was NULL. */ -static bool +static void xlate_normal(struct action_xlate_ctx *ctx) { struct ofbundle *in_bundle; @@ -3629,7 +3841,8 @@ xlate_normal(struct action_xlate_ctx *ctx) * of time where we could learn from a packet reflected on a bond and * blackhole packets before the learning table is updated to reflect * the correct port. */ - return false; + ctx->may_set_up_flow = false; + return; } else { out_bundle = OFBUNDLE_FLOOD; } @@ -3643,8 +3856,6 @@ done: if (in_bundle) { compose_actions(ctx, vlan, in_bundle, out_bundle); } - - return true; } static bool @@ -3676,13 +3887,18 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet, error = validate_actions(ofp_actions, n_ofp_actions, flow, ofproto->max_ports); if (!error) { + struct odputil_keybuf keybuf; struct action_xlate_ctx ctx; struct ofpbuf *odp_actions; + struct ofpbuf key; + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); action_xlate_ctx_init(&ctx, ofproto, flow, packet); odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions); - dpif_execute(ofproto->dpif, odp_actions->data, odp_actions->size, - packet); + dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions->data, odp_actions->size, packet); ofpbuf_delete(odp_actions); } return error; @@ -3738,7 +3954,8 @@ struct ofproto_trace { }; static void -trace_format_rule(struct ds *result, int level, const struct rule *rule) +trace_format_rule(struct ds *result, uint8_t table_id, int level, + const struct rule_dpif *rule) { ds_put_char_multiple(result, '\t', level); if (!rule) { @@ -3746,15 +3963,14 @@ trace_format_rule(struct ds *result, int level, const struct rule *rule) return; } - ds_put_format(result, "Rule: cookie=%#"PRIx64" ", - ntohll(rule->flow_cookie)); - cls_rule_format(&rule->cr, result); + ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ", + table_id, ntohll(rule->up.flow_cookie)); + cls_rule_format(&rule->up.cr, result); ds_put_char(result, '\n'); ds_put_char_multiple(result, '\t', level); ds_put_cstr(result, "OpenFlow "); - ofp_print_actions(result, (const struct ofp_action_header *) rule->actions, - rule->n_actions * sizeof *rule->actions); + ofp_print_actions(result, rule->up.actions, rule->up.n_actions); ds_put_char(result, '\n'); } @@ -3781,33 +3997,78 @@ trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) ds_put_char(result, '\n'); trace_format_flow(result, ctx->recurse + 1, "Resubmitted flow", trace); - trace_format_rule(result, ctx->recurse + 1, &rule->up); + trace_format_rule(result, ctx->table_id, ctx->recurse + 1, rule); } static void ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, void *aux OVS_UNUSED) { - char *dpname, *in_port_s, *tun_id_s, *packet_s; + char *dpname, *arg1, *arg2, *arg3; char *args = xstrdup(args_); char *save_ptr = NULL; struct ofproto_dpif *ofproto; - struct ofpbuf packet; + struct ofpbuf odp_key; + struct ofpbuf *packet; struct rule_dpif *rule; struct ds result; struct flow flow; - uint16_t in_port; - ovs_be64 tun_id; char *s; - ofpbuf_init(&packet, strlen(args) / 2); + packet = NULL; + ofpbuf_init(&odp_key, 0); ds_init(&result); dpname = strtok_r(args, " ", &save_ptr); - tun_id_s = strtok_r(NULL, " ", &save_ptr); - in_port_s = strtok_r(NULL, " ", &save_ptr); - packet_s = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ - if (!dpname || !in_port_s || !packet_s) { + arg1 = strtok_r(NULL, " ", &save_ptr); + arg2 = strtok_r(NULL, " ", &save_ptr); + arg3 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ + if (dpname && arg1 && !arg2 && !arg3) { + /* ofproto/trace dpname flow */ + int error; + + /* Convert string to ODP key. */ + ofpbuf_init(&odp_key, 0); + error = odp_flow_key_from_string(arg1, &odp_key); + if (error) { + unixctl_command_reply(conn, 501, "Bad flow syntax"); + goto exit; + } + + /* Convert odp_key to flow. */ + error = odp_flow_key_to_flow(odp_key.data, odp_key.size, &flow); + if (error) { + unixctl_command_reply(conn, 501, "Invalid flow"); + goto exit; + } + } else if (dpname && arg1 && arg2 && arg3) { + /* ofproto/trace dpname tun_id in_port packet */ + uint16_t in_port; + ovs_be64 tun_id; + + tun_id = htonll(strtoull(arg1, NULL, 0)); + in_port = ofp_port_to_odp_port(atoi(arg2)); + + packet = ofpbuf_new(strlen(args) / 2); + arg3 = ofpbuf_put_hex(packet, arg3, NULL); + arg3 += strspn(arg3, " "); + if (*arg3 != '\0') { + unixctl_command_reply(conn, 501, "Trailing garbage in command"); + goto exit; + } + if (packet->size < ETH_HEADER_LEN) { + unixctl_command_reply(conn, 501, + "Packet data too short for Ethernet"); + goto exit; + } + + ds_put_cstr(&result, "Packet: "); + s = ofp_packet_to_string(packet->data, packet->size, packet->size); + ds_put_cstr(&result, s); + free(s); + + flow_extract(packet, tun_id, in_port, &flow); + } else { unixctl_command_reply(conn, 501, "Bad command syntax"); goto exit; } @@ -3819,39 +4080,19 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, goto exit; } - tun_id = htonll(strtoull(tun_id_s, NULL, 0)); - in_port = ofp_port_to_odp_port(atoi(in_port_s)); - - packet_s = ofpbuf_put_hex(&packet, packet_s, NULL); - packet_s += strspn(packet_s, " "); - if (*packet_s != '\0') { - unixctl_command_reply(conn, 501, "Trailing garbage in command"); - goto exit; - } - if (packet.size < ETH_HEADER_LEN) { - unixctl_command_reply(conn, 501, "Packet data too short for Ethernet"); - goto exit; - } - - ds_put_cstr(&result, "Packet: "); - s = ofp_packet_to_string(packet.data, packet.size, packet.size); - ds_put_cstr(&result, s); - free(s); - - flow_extract(&packet, tun_id, in_port, &flow); ds_put_cstr(&result, "Flow: "); flow_format(&result, &flow); ds_put_char(&result, '\n'); - rule = rule_dpif_lookup(ofproto, &flow); - trace_format_rule(&result, 0, &rule->up); + rule = rule_dpif_lookup(ofproto, &flow, 0); + trace_format_rule(&result, 0, 0, rule); if (rule) { struct ofproto_trace trace; struct ofpbuf *odp_actions; trace.result = &result; trace.flow = flow; - action_xlate_ctx_init(&trace.ctx, ofproto, &flow, &packet); + action_xlate_ctx_init(&trace.ctx, ofproto, &flow, packet); trace.ctx.resubmit_hook = trace_resubmit; odp_actions = xlate_actions(&trace.ctx, rule->up.actions, rule->up.n_actions); @@ -3861,16 +4102,42 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, ds_put_cstr(&result, "Datapath actions: "); format_odp_actions(&result, odp_actions->data, odp_actions->size); ofpbuf_delete(odp_actions); + + if (!trace.ctx.may_set_up_flow) { + if (packet) { + ds_put_cstr(&result, "\nThis flow is not cachable."); + } else { + ds_put_cstr(&result, "\nThe datapath actions are incomplete--" + "for complete actions, please supply a packet."); + } + } } unixctl_command_reply(conn, 200, ds_cstr(&result)); exit: ds_destroy(&result); - ofpbuf_uninit(&packet); + ofpbuf_delete(packet); + ofpbuf_uninit(&odp_key); free(args); } +static void +ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, + const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +{ + clogged = true; + unixctl_command_reply(conn, 200, NULL); +} + +static void +ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, + const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +{ + clogged = false; + unixctl_command_reply(conn, 200, NULL); +} + static void ofproto_dpif_unixctl_init(void) { @@ -3882,6 +4149,9 @@ ofproto_dpif_unixctl_init(void) unixctl_command_register("ofproto/trace", ofproto_unixctl_trace, NULL); unixctl_command_register("fdb/show", ofproto_unixctl_fdb_show, NULL); + + unixctl_command_register("ofproto/clog", ofproto_dpif_clog, NULL); + unixctl_command_register("ofproto/unclog", ofproto_dpif_unclog, NULL); } const struct ofproto_class ofproto_dpif_class = { @@ -3912,6 +4182,7 @@ const struct ofproto_class ofproto_dpif_class = { port_poll, port_poll_wait, port_is_lacp_current, + NULL, /* rule_choose_table */ rule_alloc, rule_construct, rule_destruct, @@ -3926,7 +4197,7 @@ const struct ofproto_class ofproto_dpif_class = { get_netflow_ids, set_sflow, set_cfm, - get_cfm, + get_cfm_fault, bundle_set, bundle_remove, mirror_set,