X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=58c50643696de7f3b33cfd5229f0da72ef951b2e;hb=60cda7d69b0bfd242045d346f2cd169836a3d78e;hp=c597114765ab41570042591b39c746a846ae66a7;hpb=3f142f59f2859ef5c2bc124405ab4d683d3f416b;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index c59711476..58c506436 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -53,6 +53,7 @@ #include "ofproto-dpif-ipfix.h" #include "ofproto-dpif-mirror.h" #include "ofproto-dpif-monitor.h" +#include "ofproto-dpif-rid.h" #include "ofproto-dpif-sflow.h" #include "ofproto-dpif-upcall.h" #include "ofproto-dpif-xlate.h" @@ -77,6 +78,9 @@ enum { N_TABLES = 255 }; enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); +/* No bfd/cfm status change. */ +#define NO_STATUS_CHANGE -1 + struct flow_miss; struct rule_dpif { @@ -87,9 +91,7 @@ struct rule_dpif { * - Do include packets and bytes from datapath flows which have not * recently been processed by a revalidator. */ struct ovs_mutex stats_mutex; - uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ - uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ - long long int used; /* Last used time (msec). */ + struct dpif_flow_stats stats OVS_GUARDED; }; static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes, @@ -254,6 +256,10 @@ struct dpif_backer { bool recv_set_enable; /* Enables or disables receiving packets. */ + /* Recirculation. */ + struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ + bool enable_recirc; /* True if the datapath supports recirculation */ + /* True if the datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. * False if the datapath supports only 8-byte (or shorter) userdata. */ @@ -311,6 +317,8 @@ struct ofproto_dpif { /* Work queues. */ struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */ + struct seq *pins_seq; /* For notifying 'pins' reception. */ + uint64_t pins_seqno; }; /* All existing ofproto_dpif instances, indexed by ->up.name. */ @@ -331,9 +339,15 @@ ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto) return ofproto->backer->max_mpls_depth; } +bool +ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto) +{ + return ofproto->backer->enable_recirc; +} + static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port); -static void ofproto_trace(struct ofproto_dpif *, const struct flow *, +static void ofproto_trace(struct ofproto_dpif *, struct flow *, const struct ofpbuf *packet, const struct ofpact[], size_t ofpacts_len, struct ds *); @@ -353,6 +367,18 @@ ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto, ofproto_flow_mod(&ofproto->up, fm); } +/* Resets the modified time for 'rule' or an equivalent rule. If 'rule' is not + * in the classifier, but an equivalent rule is, unref 'rule' and ref the new + * rule. Otherwise if 'rule' is no longer installed in the classifier, + * reinstall it. + * + * Returns the rule whose modified time has been reset. */ +struct rule_dpif * +ofproto_dpif_refresh_rule(struct rule_dpif *rule) +{ + return rule_dpif_cast(ofproto_refresh_rule(&rule->up)); +} + /* Appends 'pin' to the queue of "packet ins" to be sent to the controller. * Takes ownership of 'pin' and pin->packet. */ void @@ -364,6 +390,21 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto, free(CONST_CAST(void *, pin->up.packet)); free(pin); } + + /* Wakes up main thread for packet-in I/O. */ + seq_change(ofproto->pins_seq); +} + +/* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the + * packet rather than to send the packet to the controller. + * + * This function returns false to indicate that a packet_in message + * for a "table-miss" should be sent to at least one controller. + * False otherwise. */ +bool +ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto) +{ + return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr); } /* Factory functions. */ @@ -570,6 +611,7 @@ type_run(const char *type) ofproto->netflow, ofproto->up.frag_handling, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), + ofproto->backer->enable_recirc, ofproto->backer->variable_length_userdata, ofproto->backer->max_mpls_depth); @@ -781,9 +823,9 @@ close_dpif_backer(struct dpif_backer *backer) ovs_rwlock_destroy(&backer->odp_to_ofport_lock); hmap_destroy(&backer->odp_to_ofport_map); shash_find_and_delete(&all_dpif_backers, backer->type); + recirc_id_pool_destroy(backer->rid_pool); free(backer->type); dpif_close(backer->dpif); - free(backer); } @@ -795,6 +837,7 @@ struct odp_garbage { static bool check_variable_length_userdata(struct dpif_backer *backer); static size_t check_max_mpls_depth(struct dpif_backer *backer); +static bool check_recirc(struct dpif_backer *backer); static int open_dpif_backer(const char *type, struct dpif_backer **backerp) @@ -805,6 +848,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) struct shash_node *node; struct list garbage_list; struct odp_garbage *garbage, *next; + struct sset names; char *backer_name; const char *name; @@ -894,8 +938,10 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) close_dpif_backer(backer); return error; } + backer->enable_recirc = check_recirc(backer); backer->variable_length_userdata = check_variable_length_userdata(backer); backer->max_mpls_depth = check_max_mpls_depth(backer); + backer->rid_pool = recirc_id_pool_create(); if (backer->recv_set_enable) { udpif_set_threads(backer->udpif, n_handlers, n_revalidators); @@ -904,6 +950,61 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) return error; } +/* Tests whether 'backer''s datapath supports recirculation Only newer datapath + * supports OVS_KEY_ATTR in OVS_ACTION_ATTR_USERSPACE actions. We need to + * disable some features on older datapaths that don't support this feature. + * + * Returns false if 'backer' definitely does not support recirculation, true if + * it seems to support recirculation or if at least the error we get is + * ambiguous. */ +static bool +check_recirc(struct dpif_backer *backer) +{ + struct flow flow; + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + bool enable_recirc = false; + + memset(&flow, 0, sizeof flow); + flow.recirc_id = 1; + flow.dp_hash = 1; + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow, NULL, 0); + + error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, + ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL, + 0, NULL); + if (error && error != EEXIST) { + if (error != EINVAL) { + VLOG_WARN("%s: Reciculation flow probe failed (%s)", + dpif_name(backer->dpif), ovs_strerror(error)); + } + goto done; + } + + error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key), + NULL); + if (error) { + VLOG_WARN("%s: failed to delete recirculation feature probe flow", + dpif_name(backer->dpif)); + } + + enable_recirc = true; + +done: + if (enable_recirc) { + VLOG_INFO("%s: Datapath supports recirculation", + dpif_name(backer->dpif)); + } else { + VLOG_INFO("%s: Datapath does not support recirculation", + dpif_name(backer->dpif)); + } + + return enable_recirc; +} + /* Tests whether 'backer''s datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. We need * to disable some features on older datapaths that don't support this @@ -932,7 +1033,7 @@ check_variable_length_userdata(struct dpif_backer *backer) ofpbuf_init(&actions, 64); start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID, - dpif_port_get_pid(backer->dpif, ODPP_NONE)); + dpif_port_get_pid(backer->dpif, ODPP_NONE, 0)); nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4); nl_msg_end_nested(&actions, start); @@ -943,8 +1044,8 @@ check_variable_length_userdata(struct dpif_backer *backer) /* Execute the actions. On older datapaths this fails with ERANGE, on * newer datapaths it succeeds. */ - execute.actions = actions.data; - execute.actions_len = actions.size; + execute.actions = ofpbuf_data(&actions); + execute.actions_len = ofpbuf_size(&actions); execute.packet = &packet; execute.md = PKT_METADATA_INITIALIZER(0); execute.needs_help = false; @@ -1003,10 +1104,10 @@ check_max_mpls_depth(struct dpif_backer *backer) flow_set_mpls_bos(&flow, n, 1); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &flow, 0); + odp_flow_key_from_flow(&key, &flow, NULL, 0); error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, - key.data, key.size, NULL, 0, NULL, 0, NULL); + ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL, 0, NULL); if (error && error != EEXIST) { if (error != EINVAL) { VLOG_WARN("%s: MPLS stack length feature probe failed (%s)", @@ -1015,7 +1116,7 @@ check_max_mpls_depth(struct dpif_backer *backer) break; } - error = dpif_flow_del(backer->dpif, key.data, key.size, NULL); + error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key), NULL); if (error) { VLOG_WARN("%s: failed to delete MPLS feature probe flow", dpif_name(backer->dpif)); @@ -1064,6 +1165,9 @@ construct(struct ofproto *ofproto_) sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; ofproto->change_seq = 0; + ofproto->pins_seq = seq_create(); + ofproto->pins_seqno = seq_read(ofproto->pins_seq); + SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; @@ -1087,50 +1191,27 @@ construct(struct ofproto *ofproto_) ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); + ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; return error; } static int -add_internal_flow(struct ofproto_dpif *ofproto, int id, +add_internal_miss_flow(struct ofproto_dpif *ofproto, int id, const struct ofpbuf *ofpacts, struct rule_dpif **rulep) { - struct ofputil_flow_mod fm; + struct match match; int error; + struct rule *rule; - match_init_catchall(&fm.match); - fm.priority = 0; - match_set_reg(&fm.match, 0, id); - fm.new_cookie = htonll(0); - fm.cookie = htonll(0); - fm.cookie_mask = htonll(0); - fm.modify_cookie = false; - fm.table_id = TBL_INTERNAL; - fm.command = OFPFC_ADD; - fm.idle_timeout = 0; - fm.hard_timeout = 0; - fm.buffer_id = 0; - fm.out_port = 0; - fm.flags = 0; - fm.ofpacts = ofpacts->data; - fm.ofpacts_len = ofpacts->size; - - error = ofproto_flow_mod(&ofproto->up, &fm); - if (error) { - VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)", - id, ofperr_to_string(error)); - return error; - } + match_init_catchall(&match); + match_set_reg(&match, 0, id); - if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL, - rulep)) { - rule_dpif_unref(*rulep); - } else { - OVS_NOT_REACHED(); - } + error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, ofpacts, &rule); + *rulep = error ? NULL : rule_dpif_cast(rule); - return 0; + return error; } static int @@ -1139,6 +1220,9 @@ add_internal_flows(struct ofproto_dpif *ofproto) struct ofpact_controller *controller; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; + struct rule *unused_rulep OVS_UNUSED; + struct ofpact_resubmit *resubmit; + struct match match; int error; int id; @@ -1151,20 +1235,53 @@ add_internal_flows(struct ofproto_dpif *ofproto) controller->reason = OFPR_NO_MATCH; ofpact_pad(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); + error = add_internal_miss_flow(ofproto, id++, &ofpacts, + &ofproto->miss_rule); if (error) { return error; } ofpbuf_clear(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, + error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->no_packet_in_rule); if (error) { return error; } - error = add_internal_flow(ofproto, id++, &ofpacts, + error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->drop_frags_rule); + if (error) { + return error; + } + + /* Continue non-recirculation rule lookups from table 0. + * + * (priority=2), recirc=0, actions=resubmit(, 0) + */ + resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->ofpact.compat = 0; + resubmit->in_port = OFPP_IN_PORT; + resubmit->table_id = 0; + + match_init_catchall(&match); + match_set_recirc_id(&match, 0); + + error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, &ofpacts, + &unused_rulep); + if (error) { + return error; + } + + /* Drop any run away recirc rule lookups. Recirc_id has to be + * non-zero when reaching this rule. + * + * (priority=1), *, actions=drop + */ + ofpbuf_clear(&ofpacts); + match_init_catchall(&match); + error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, &ofpacts, + &unused_rulep); + return error; } @@ -1224,6 +1341,8 @@ destruct(struct ofproto *ofproto_) ovs_mutex_destroy(&ofproto->stats_mutex); ovs_mutex_destroy(&ofproto->vsp_mutex); + seq_destroy(ofproto->pins_seq); + close_dpif_backer(ofproto->backer); } @@ -1255,6 +1374,12 @@ run(struct ofproto *ofproto_) } } + /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during + * flow restore. Even though nothing is processed during flow restore, + * all queued 'pins' will be handled immediately when flow restore + * completes. */ + ofproto->pins_seqno = seq_read(ofproto->pins_seq); + if (ofproto->netflow) { netflow_run(ofproto->netflow); } @@ -1359,6 +1484,7 @@ wait(struct ofproto *ofproto_) } seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq); + seq_wait(ofproto->pins_seq, ofproto->pins_seqno); } static void @@ -1373,9 +1499,14 @@ type_get_memory_usage(const char *type, struct simap *usage) } static void -flush(struct ofproto *ofproto OVS_UNUSED) +flush(struct ofproto *ofproto_) { - udpif_flush(); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct dpif_backer *backer = ofproto->backer; + + if (backer) { + udpif_flush(backer->udpif); + } } static void @@ -1696,22 +1827,28 @@ out: return error; } -static bool +static int get_cfm_status(const struct ofport *ofport_, struct ofproto_cfm_status *status) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + int ret = 0; if (ofport->cfm) { - status->faults = cfm_get_fault(ofport->cfm); - status->flap_count = cfm_get_flap_count(ofport->cfm); - status->remote_opstate = cfm_get_opup(ofport->cfm); - status->health = cfm_get_health(ofport->cfm); - cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps); - return true; + if (cfm_check_status_change(ofport->cfm)) { + status->faults = cfm_get_fault(ofport->cfm); + status->flap_count = cfm_get_flap_count(ofport->cfm); + status->remote_opstate = cfm_get_opup(ofport->cfm); + status->health = cfm_get_health(ofport->cfm); + cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps); + } else { + ret = NO_STATUS_CHANGE; + } } else { - return false; + ret = ENOENT; } + + return ret; } static int @@ -1736,13 +1873,19 @@ static int get_bfd_status(struct ofport *ofport_, struct smap *smap) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + int ret = 0; if (ofport->bfd) { - bfd_get_status(ofport->bfd, smap); - return 0; + if (bfd_check_status_change(ofport->bfd)) { + bfd_get_status(ofport->bfd, smap); + } else { + ret = NO_STATUS_CHANGE; + } } else { - return ENOENT; + ret = ENOENT; } + + return ret; } /* Spanning Tree. */ @@ -1759,7 +1902,7 @@ send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", ofproto->up.name, port_num); } else { - struct eth_header *eth = pkt->l2; + struct eth_header *eth = ofpbuf_l2(pkt); netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); if (eth_addr_is_zero(eth->eth_src)) { @@ -2327,12 +2470,13 @@ bundle_set(struct ofproto *ofproto_, void *aux, ofproto->backer->need_revalidate = REV_RECONFIGURE; } } else { - bundle->bond = bond_create(s->bond); + bundle->bond = bond_create(s->bond, ofproto); ofproto->backer->need_revalidate = REV_RECONFIGURE; } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bond_slave_register(bundle->bond, port, port->up.netdev); + bond_slave_register(bundle->bond, port, + port->up.ofp_port, port->up.netdev); } } else { bond_unref(bundle->bond); @@ -2410,7 +2554,9 @@ bundle_send_learning_packets(struct ofbundle *bundle) learning_packet = bond_compose_learning_packet(bundle->bond, e->mac, e->vlan, &port_void); - learning_packet->private_p = port_void; + /* Temporarily use 'frame' as a private pointer (see below). */ + ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet)); + learning_packet->frame = port_void; list_push_back(&packets, &learning_packet->list_node); } } @@ -2419,8 +2565,11 @@ bundle_send_learning_packets(struct ofbundle *bundle) error = n_packets = n_errors = 0; LIST_FOR_EACH (learning_packet, list_node, &packets) { int ret; + void *port_void = learning_packet->frame; - ret = ofproto_dpif_send_packet(learning_packet->private_p, learning_packet); + /* Restore 'frame'. */ + learning_packet->frame = ofpbuf_data(learning_packet); + ret = ofproto_dpif_send_packet(port_void, learning_packet); if (ret) { error = ret; n_errors++; @@ -2944,7 +3093,7 @@ rule_expire(struct rule_dpif *rule) long long int used; ovs_mutex_lock(&rule->stats_mutex); - used = rule->used; + used = rule->stats.used; ovs_mutex_unlock(&rule->stats_mutex); if (now > used + idle_timeout * 1000) { @@ -2977,6 +3126,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, ovs_assert((rule != NULL) != (ofpacts != NULL)); dpif_flow_stats_extract(flow, packet, time_msec(), &stats); + if (rule) { rule_dpif_credit_stats(rule, &stats); } @@ -2991,13 +3141,13 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, if (in_port == OFPP_NONE) { in_port = OFPP_LOCAL; } - execute.actions = xout.odp_actions.data; - execute.actions_len = xout.odp_actions.size; + execute.actions = ofpbuf_data(&xout.odp_actions); + execute.actions_len = ofpbuf_size(&xout.odp_actions); execute.packet = packet; execute.md.tunnel = flow->tunnel; execute.md.skb_priority = flow->skb_priority; execute.md.pkt_mark = flow->pkt_mark; - execute.md.in_port = ofp_port_to_odp_port(ofproto, in_port); + execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port); execute.needs_help = (xout.slow & SLOW_ACTION) != 0; error = dpif_execute(ofproto->backer->dpif, &execute); @@ -3012,9 +3162,9 @@ rule_dpif_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats) { ovs_mutex_lock(&rule->stats_mutex); - rule->packet_count += stats->n_packets; - rule->byte_count += stats->n_bytes; - rule->used = MAX(rule->used, stats->used); + rule->stats.n_packets += stats->n_packets; + rule->stats.n_bytes += stats->n_bytes; + rule->stats.used = MAX(rule->stats.used, stats->used); ovs_mutex_unlock(&rule->stats_mutex); } @@ -3030,6 +3180,12 @@ rule_dpif_is_table_miss(const struct rule_dpif *rule) return rule_is_table_miss(&rule->up); } +bool +rule_dpif_is_internal(const struct rule_dpif *rule) +{ + return rule_is_internal(&rule->up); +} + ovs_be64 rule_dpif_get_flow_cookie(const struct rule_dpif *rule) OVS_REQUIRES(rule->up.mutex) @@ -3053,80 +3209,222 @@ rule_dpif_get_actions(const struct rule_dpif *rule) return rule_get_actions(&rule->up); } -/* Lookup 'flow' in 'ofproto''s classifier. If 'wc' is non-null, sets - * the fields that were relevant as part of the lookup. */ -void -rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, - struct flow_wildcards *wc, struct rule_dpif **rule) -{ - struct ofport_dpif *port; +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. + * If 'wc' is non-null, sets the fields that were relevant as part of + * the lookup. Returns the table_id where a match or miss occurred. + * + * The return value will be zero unless there was a miss and + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables + * where misses occur. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a non-zero 'take_ref' must be passed in to cause a reference to be taken + * on it before this returns. */ +uint8_t +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, + struct flow_wildcards *wc, struct rule_dpif **rule, + bool take_ref) +{ + enum rule_dpif_lookup_verdict verdict; + enum ofputil_port_config config = 0; + uint8_t table_id; + + if (ofproto_dpif_get_enable_recirc(ofproto)) { + /* Always exactly match recirc_id since datapath supports + * recirculation. */ + if (wc) { + wc->masks.recirc_id = UINT32_MAX; + } - if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) { - return; + /* Start looking up from internal table for post recirculation flows + * or packets. We can also simply send all, including normal flows + * or packets to the internal table. They will not match any post + * recirculation rules except the 'catch all' rule that resubmit + * them to table 0. + * + * As an optimization, we send normal flows and packets to table 0 + * directly, saving one table lookup. */ + table_id = flow->recirc_id ? TBL_INTERNAL : 0; + } else { + table_id = 0; } - port = get_ofp_port(ofproto, flow->in_port.ofp_port); - if (!port) { - VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, - flow->in_port.ofp_port); + + verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, + &table_id, rule, take_ref); + + switch (verdict) { + case RULE_DPIF_LOOKUP_VERDICT_MATCH: + return table_id; + case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { + struct ofport_dpif *port; + + port = get_ofp_port(ofproto, flow->in_port.ofp_port); + if (!port) { + VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, + flow->in_port.ofp_port); + } + config = port ? port->up.pp.config : 0; + break; + } + case RULE_DPIF_LOOKUP_VERDICT_DROP: + config = OFPUTIL_PC_NO_PACKET_IN; + break; + case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: + if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { + config = OFPUTIL_PC_NO_PACKET_IN; + } + break; + default: + OVS_NOT_REACHED(); } - choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule, - ofproto->no_packet_in_rule, rule); + choose_miss_rule(config, ofproto->miss_rule, + ofproto->no_packet_in_rule, rule, take_ref); + return table_id; } -bool -rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, +/* The returned rule is valid at least until the next RCU quiescent period. + * If the '*rule' needs to stay around longer, a non-zero 'take_ref' must be + * passed in to cause a reference to be taken on it before this returns. */ +static struct rule_dpif * +rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, const struct flow *flow, struct flow_wildcards *wc, - uint8_t table_id, struct rule_dpif **rule) + bool take_ref) { + struct classifier *cls = &ofproto->up.tables[table_id].cls; const struct cls_rule *cls_rule; - struct classifier *cls; - bool frag; - - *rule = NULL; - if (table_id >= N_TABLES) { - return false; - } + struct rule_dpif *rule; - if (wc) { - memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); - if (is_ip_any(flow)) { - wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; + fat_rwlock_rdlock(&cls->rwlock); + if (ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) { + if (wc) { + memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); + if (is_ip_any(flow)) { + wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; + } } - } - cls = &ofproto->up.tables[table_id].cls; - fat_rwlock_rdlock(&cls->rwlock); - frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0; - if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { - /* We must pretend that transport ports are unavailable. */ - struct flow ofpc_normal_flow = *flow; - ofpc_normal_flow.tp_src = htons(0); - ofpc_normal_flow.tp_dst = htons(0); - cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc); - } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) { - cls_rule = &ofproto->drop_frags_rule->up.cr; - /* Frag mask in wc already set above. */ + if (flow->nw_frag & FLOW_NW_FRAG_ANY) { + if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { + /* We must pretend that transport ports are unavailable. */ + struct flow ofpc_normal_flow = *flow; + ofpc_normal_flow.tp_src = htons(0); + ofpc_normal_flow.tp_dst = htons(0); + cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc); + } else { + /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM). */ + cls_rule = &ofproto->drop_frags_rule->up.cr; + } + } else { + cls_rule = classifier_lookup(cls, flow, wc); + } } else { cls_rule = classifier_lookup(cls, flow, wc); } - *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); - rule_dpif_ref(*rule); + rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); + if (take_ref) { + rule_dpif_ref(rule); + } fat_rwlock_unlock(&cls->rwlock); - return *rule != NULL; + return rule; +} + +/* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'. + * Stores the rule that was found in '*rule', or NULL if none was found. + * Updates 'wc', if nonnull, to reflect the fields that were used during the + * lookup. + * + * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but + * if none is found then the table miss configuration for that table is + * honored, which can result in additional lookups in other OpenFlow tables. + * In this case the function updates '*table_id' to reflect the final OpenFlow + * table that was searched. + * + * If 'honor_table_miss' is false, then only one table lookup occurs, in + * '*table_id'. + * + * Returns: + * + * - RULE_DPIF_LOOKUP_VERDICT_MATCH if a rule (in '*rule') was found. + * + * - RULE_OFPTC_TABLE_MISS_CONTROLLER if no rule was found and either: + * + 'honor_table_miss' is false + * + a table miss configuration specified that the packet should be + * sent to the controller in this case. + * + * - RULE_DPIF_LOOKUP_VERDICT_DROP if no rule was found, 'honor_table_miss' + * is true and a table miss configuration specified that the packet + * should be dropped in this case. + * + * - RULE_DPIF_LOOKUP_VERDICT_DEFAULT if no rule was found, + * 'honor_table_miss' is true and a table miss configuration has + * not been specified in this case. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a non-zero 'take_ref' must be passed in to cause a reference to be taken + * on it before this returns. */ +enum rule_dpif_lookup_verdict +rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, + const struct flow *flow, + struct flow_wildcards *wc, + bool honor_table_miss, + uint8_t *table_id, struct rule_dpif **rule, + bool take_ref) +{ + uint8_t next_id; + + for (next_id = *table_id; + next_id < ofproto->up.n_tables; + next_id++, next_id += (next_id == TBL_INTERNAL)) + { + *table_id = next_id; + *rule = rule_dpif_lookup_in_table(ofproto, *table_id, flow, wc, + take_ref); + if (*rule) { + return RULE_DPIF_LOOKUP_VERDICT_MATCH; + } else if (!honor_table_miss) { + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; + } else { + switch (ofproto_table_get_config(&ofproto->up, *table_id)) { + case OFPROTO_TABLE_MISS_CONTINUE: + break; + + case OFPROTO_TABLE_MISS_CONTROLLER: + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; + + case OFPROTO_TABLE_MISS_DROP: + return RULE_DPIF_LOOKUP_VERDICT_DROP; + + case OFPROTO_TABLE_MISS_DEFAULT: + return RULE_DPIF_LOOKUP_VERDICT_DEFAULT; + } + } + } + + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; } /* Given a port configuration (specified as zero if there's no port), chooses * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a - * flow table miss. */ + * flow table miss. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a reference must be taken on it (rule_dpif_ref()). + */ void choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule, - struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule) + struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule, + bool take_ref) { *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule; - rule_dpif_ref(*rule); + if (take_ref) { + rule_dpif_ref(*rule); + } } void @@ -3180,9 +3478,9 @@ rule_construct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_init_adaptive(&rule->stats_mutex); - rule->packet_count = 0; - rule->byte_count = 0; - rule->used = rule->up.modified; + rule->stats.n_packets = 0; + rule->stats.n_bytes = 0; + rule->stats.used = rule->up.modified; return 0; } @@ -3216,9 +3514,9 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes, struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_lock(&rule->stats_mutex); - *packets = rule->packet_count; - *bytes = rule->byte_count; - *used = rule->used; + *packets = rule->stats.n_packets; + *bytes = rule->stats.n_bytes; + *used = rule->stats.used; ovs_mutex_unlock(&rule->stats_mutex); } @@ -3248,8 +3546,8 @@ rule_modify_actions(struct rule *rule_, bool reset_counters) if (reset_counters) { ovs_mutex_lock(&rule->stats_mutex); - rule->packet_count = 0; - rule->byte_count = 0; + rule->stats.n_packets = 0; + rule->stats.n_bytes = 0; ovs_mutex_unlock(&rule->stats_mutex); } @@ -3336,6 +3634,7 @@ group_destruct(struct ofgroup *group_) static enum ofperr group_modify(struct ofgroup *group_, struct ofgroup *victim_) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto); struct group_dpif *group = group_dpif_cast(group_); struct group_dpif *victim = group_dpif_cast(victim_); @@ -3346,6 +3645,8 @@ group_modify(struct ofgroup *group_, struct ofgroup *victim_) group_construct_stats(group); ovs_mutex_unlock(&group->stats_mutex); + ofproto->backer->need_revalidate = REV_FLOW_TABLE; + return 0; } @@ -3412,7 +3713,7 @@ ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet ovs_mutex_lock(&ofproto->stats_mutex); ofproto->stats.tx_packets++; - ofproto->stats.tx_bytes += packet->size; + ofproto->stats.tx_bytes += ofpbuf_size(packet); ovs_mutex_unlock(&ofproto->stats_mutex); return error; } @@ -3555,6 +3856,7 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, struct trace_ctx { struct xlate_out xout; struct xlate_in xin; + const struct flow *key; struct flow flow; struct flow_wildcards wc; struct ds *result; @@ -3587,8 +3889,6 @@ trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule) ds_put_cstr(result, "OpenFlow actions="); ofpacts_format(actions->ofpacts, actions->ofpacts_len, result); ds_put_char(result, '\n'); - - rule_actions_unref(actions); } static void @@ -3597,7 +3897,9 @@ trace_format_flow(struct ds *result, int level, const char *title, { ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); - if (flow_equal(&trace->xin.flow, &trace->flow)) { + /* Do not report unchanged flows for resubmits. */ + if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow)) + || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) { ds_put_cstr(result, "unchanged"); } else { flow_format(result, &trace->xin.flow); @@ -3628,7 +3930,8 @@ trace_format_odp(struct ds *result, int level, const char *title, ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); - format_odp_actions(result, odp_actions->data, odp_actions->size); + format_odp_actions(result, ofpbuf_data(odp_actions), + ofpbuf_size(odp_actions)); ds_put_char(result, '\n'); } @@ -3641,7 +3944,7 @@ trace_format_megaflow(struct ds *result, int level, const char *title, ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc); - match_init(&match, &trace->flow, &trace->wc); + match_init(&match, trace->key, &trace->wc); match_format(&match, result, OFP_DEFAULT_PRIORITY); ds_put_char(result, '\n'); } @@ -3752,7 +4055,8 @@ parse_flow_and_packet(int argc, const char *argv[], goto exit; } - if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, flow, + if (xlate_receive(backer, NULL, ofpbuf_data(&odp_key), + ofpbuf_size(&odp_key), flow, ofprotop, NULL, NULL, NULL, NULL)) { error = "Invalid datapath flow"; goto exit; @@ -3780,15 +4084,14 @@ parse_flow_and_packet(int argc, const char *argv[], /* Generate a packet, if requested. */ if (packet) { - if (!packet->size) { + if (!ofpbuf_size(packet)) { flow_compose(packet, flow); } else { - union flow_in_port in_port = flow->in_port; + struct pkt_metadata md = pkt_metadata_from_flow(flow); /* Use the metadata from the flow and the packet argument * to reconstruct the flow. */ - flow_extract(packet, flow->skb_priority, flow->pkt_mark, NULL, - &in_port, flow); + flow_extract(packet, &md, flow); } } @@ -3894,11 +4197,11 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, goto exit; } if (enforce_consistency) { - retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, &flow, - u16_to_ofp(ofproto->up.max_ports), + retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), + &flow, u16_to_ofp(ofproto->up.max_ports), 0, 0, usable_protocols); } else { - retval = ofpacts_check(ofpacts.data, ofpacts.size, &flow, + retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow, u16_to_ofp(ofproto->up.max_ports), 0, 0, &usable_protocols); } @@ -3910,7 +4213,8 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, goto exit; } - ofproto_trace(ofproto, &flow, packet, ofpacts.data, ofpacts.size, &result); + ofproto_trace(ofproto, &flow, packet, + ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result); unixctl_command_reply(conn, ds_cstr(&result)); exit: @@ -3929,7 +4233,7 @@ exit: * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to * trace, otherwise the actions are determined by a flow table lookup. */ static void -ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, +ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow, const struct ofpbuf *packet, const struct ofpact ofpacts[], size_t ofpacts_len, struct ds *ds) @@ -3946,7 +4250,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, if (ofpacts) { rule = NULL; } else { - rule_dpif_lookup(ofproto, flow, &trace.wc, &rule); + rule_dpif_lookup(ofproto, flow, &trace.wc, &rule, false); trace_format_rule(ds, 0, rule); if (rule == ofproto->miss_rule) { @@ -3961,12 +4265,11 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, } if (rule || ofpacts) { - uint16_t tcp_flags; - - tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0; trace.result = ds; - trace.flow = *flow; - xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet); + trace.key = flow; /* Original flow key, used for megaflow. */ + trace.flow = *flow; /* May be modified by actions. */ + xlate_in_init(&trace.xin, ofproto, flow, rule, ntohs(flow->tcp_flags), + packet); if (ofpacts) { trace.xin.ofpacts = ofpacts; trace.xin.ofpacts_len = ofpacts_len; @@ -3981,8 +4284,8 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, trace_format_megaflow(ds, 0, "Megaflow", &trace); ds_put_cstr(ds, "Datapath actions: "); - format_odp_actions(ds, trace.xout.odp_actions.data, - trace.xout.odp_actions.size); + format_odp_actions(ds, ofpbuf_data(&trace.xout.odp_actions), + ofpbuf_size(&trace.xout.odp_actions)); if (trace.xout.slow) { enum slow_path_reason slow; @@ -4003,8 +4306,6 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, xlate_out_uninit(&trace.xout); } - - rule_dpif_unref(rule); } /* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list @@ -4165,6 +4466,8 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, struct dpif_port dpif_port; struct dpif_port_dump port_dump; struct hmap portno_names; + void *state = NULL; + int error; ofproto = ofproto_dpif_lookup(argv[argc - 1]); if (!ofproto) { @@ -4182,9 +4485,14 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, } ds_init(&ds); - dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif); - while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len, - &actions, &actions_len, &stats)) { + error = dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif); + if (error) { + goto exit; + } + dpif_flow_dump_state_init(ofproto->backer->dpif, &state); + while (dpif_flow_dump_next(&flow_dump, state, &key, &key_len, + &mask, &mask_len, &actions, &actions_len, + &stats)) { if (!ofproto_dpif_contains_flow(ofproto, key, key_len)) { continue; } @@ -4197,8 +4505,11 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, format_odp_actions(&ds, actions, actions_len); ds_put_char(&ds, '\n'); } + dpif_flow_dump_state_uninit(ofproto->backer->dpif, state); + error = dpif_flow_dump_done(&flow_dump); - if (dpif_flow_dump_done(&flow_dump)) { +exit: + if (error) { ds_clear(&ds); ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno)); unixctl_command_reply_error(conn, ds_cstr(&ds)); @@ -4238,6 +4549,14 @@ ofproto_dpif_unixctl_init(void) unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2, ofproto_unixctl_dpif_dump_flows, NULL); } + + +/* Returns true if 'rule' is an internal rule, false otherwise. */ +bool +rule_is_internal(const struct rule *rule) +{ + return rule->table_id == TBL_INTERNAL; +} /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * @@ -4265,7 +4584,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid) if (realdev_ofp_port && ofport->bundle) { /* vlandevs are enslaved to their realdevs, so they are not allowed to * themselves be part of a bundle. */ - bundle_set(ofport->up.ofproto, ofport->bundle, NULL); + bundle_set(ofport_->ofproto, ofport->bundle, NULL); } ofport->realdev_ofp_port = realdev_ofp_port; @@ -4292,6 +4611,7 @@ ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto) return !hmap_is_empty(&ofproto->realdev_vid_map); } + static ofp_port_t vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto, ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci) @@ -4499,6 +4819,93 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) } } +uint32_t +ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto) +{ + struct dpif_backer *backer = ofproto->backer; + + return recirc_id_alloc(backer->rid_pool); +} + +void +ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id) +{ + struct dpif_backer *backer = ofproto->backer; + + recirc_id_free(backer->rid_pool, recirc_id); +} + +int +ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, + struct match *match, int priority, + const struct ofpbuf *ofpacts, + struct rule **rulep) +{ + struct ofputil_flow_mod fm; + struct rule_dpif *rule; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.command = OFPFC_ADD; + fm.idle_timeout = 0; + fm.hard_timeout = 0; + fm.buffer_id = 0; + fm.out_port = 0; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.ofpacts = ofpbuf_data(ofpacts); + fm.ofpacts_len = ofpbuf_size(ofpacts); + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to add internal flow (%s)", + ofperr_to_string(error)); + *rulep = NULL; + return error; + } + + rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &match->flow, + &match->wc, false); + if (rule) { + *rulep = &rule->up; + } else { + OVS_NOT_REACHED(); + } + return 0; +} + +int +ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, + struct match *match, int priority) +{ + struct ofputil_flow_mod fm; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.command = OFPFC_DELETE_STRICT; + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)", + ofperr_to_string(error)); + return error; + } + + return 0; +} + const struct ofproto_class ofproto_dpif_class = { init, enumerate_types,