X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=88ec2c0a577cfb009e9494be72ba7312dc45e4ca;hb=6d324ae5dbe13066aa60d71c4e5d2beaaab6af21;hp=f09c230d3c7b574149d287ddd014d363b5c0a4c5;hpb=df2c07f4338faac04f4969f243fe4e8083b309ac;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index f09c230d3..88ec2c0a5 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -32,6 +32,7 @@ #include "fail-open.h" #include "hmapx.h" #include "lacp.h" +#include "learn.h" #include "mac-learning.h" #include "multipath.h" #include "netdev.h" @@ -64,6 +65,10 @@ COVERAGE_DEFINE(facet_unexpected); * flow translation. */ #define MAX_RESUBMIT_RECURSION 16 +/* Number of implemented OpenFlow tables. */ +enum { N_TABLES = 255 }; +BUILD_ASSERT_DECL(N_TABLES >= 1 && N_TABLES <= 255); + struct ofport_dpif; struct ofproto_dpif; @@ -88,6 +93,8 @@ struct rule_dpif { uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ + tag_type tag; /* Caches rule_calculate_tag() result. */ + struct list facets; /* List of "struct facet"s. */ }; @@ -166,6 +173,12 @@ struct action_xlate_ctx { * revalidating without a packet to refer to. */ const struct ofpbuf *packet; + /* Should OFPP_NORMAL MAC learning and NXAST_LEARN actions execute? We + * want to execute them if we are actually processing a packet, or if we + * are accounting for packets that the datapath has processed, but not if + * we are just revalidating. */ + bool may_learn; + /* If nonnull, called just before executing a resubmit action. * * This is normally null so the client has to set it manually after @@ -176,9 +189,11 @@ struct action_xlate_ctx { * to look at them after it returns. */ struct ofpbuf *odp_actions; /* Datapath actions. */ - tag_type tags; /* Tags associated with OFPP_NORMAL actions. */ + tag_type tags; /* Tags associated with actions. */ bool may_set_up_flow; /* True ordinarily; false if the actions must * be reassessed for every packet. */ + bool has_learn; /* Actions include NXAST_LEARN? */ + bool has_normal; /* Actions output to OFPP_NORMAL? */ uint16_t nf_output_iface; /* Output interface index for NetFlow. */ /* xlate_actions() initializes and uses these members, but the client has no @@ -229,6 +244,8 @@ struct facet { bool installed; /* Installed in datapath? */ bool may_install; /* True ordinarily; false if actions must * be reassessed for every packet. */ + bool has_learn; /* Actions include NXAST_LEARN? */ + bool has_normal; /* Actions output to OFPP_NORMAL? */ size_t actions_len; /* Number of bytes in actions[]. */ struct nlattr *actions; /* Datapath actions. */ tag_type tags; /* Tags. */ @@ -273,6 +290,11 @@ static void flow_push_stats(const struct rule_dpif *, struct flow *, uint64_t packets, uint64_t bytes, long long int used); +static uint32_t rule_calculate_tag(const struct flow *, + const struct flow_wildcards *, + uint32_t basis); +static void rule_invalidate(const struct rule_dpif *); + struct ofport_dpif { struct ofport up; @@ -301,6 +323,17 @@ struct dpif_completion { struct ofoperation *op; }; +/* Extra information about a classifier table. + * Currently used just for optimized flow revalidation. */ +struct table_dpif { + /* If either of these is nonnull, then this table has a form that allows + * flows to be tagged to avoid revalidating most flows for the most common + * kinds of flow table changes. */ + struct cls_table *catchall_table; /* Table that wildcards all fields. */ + struct cls_table *other_table; /* Table with any other wildcard set. */ + uint32_t basis; /* Keeps each table's tags separate. */ +}; + struct ofproto_dpif { struct ofproto up; struct dpif *dpif; @@ -322,6 +355,9 @@ struct ofproto_dpif { /* Facets. */ struct hmap facets; + + /* Revalidation. */ + struct table_dpif tables[N_TABLES]; bool need_revalidate; struct tag_set revalidate_set; @@ -453,6 +489,14 @@ construct(struct ofproto *ofproto_, int *n_tablesp) timer_set_duration(&ofproto->next_expiration, 1000); hmap_init(&ofproto->facets); + + for (i = 0; i < N_TABLES; i++) { + struct table_dpif *table = &ofproto->tables[i]; + + table->catchall_table = NULL; + table->other_table = NULL; + table->basis = random_uint32(); + } ofproto->need_revalidate = false; tag_set_init(&ofproto->revalidate_set); @@ -462,7 +506,7 @@ construct(struct ofproto *ofproto_, int *n_tablesp) ofproto->has_bundle_action = false; - *n_tablesp = 255; + *n_tablesp = N_TABLES; return 0; } @@ -813,6 +857,20 @@ get_cfm_fault(const struct ofport *ofport_) return ofport->cfm ? cfm_get_fault(ofport->cfm) : -1; } + +static int +get_cfm_remote_mpids(const struct ofport *ofport_, const uint64_t **rmps, + size_t *n_rmps) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + if (ofport->cfm) { + cfm_get_remote_mpids(ofport->cfm, rmps, n_rmps); + return 0; + } else { + return -1; + } +} /* Bundles. */ @@ -1111,7 +1169,7 @@ bundle_remove(struct ofport *port_) } static void -send_pdu_cb(void *port_, const struct lacp_pdu *pdu) +send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); struct ofport_dpif *port = port_; @@ -1120,13 +1178,14 @@ send_pdu_cb(void *port_, const struct lacp_pdu *pdu) error = netdev_get_etheraddr(port->up.netdev, ea); if (!error) { - struct lacp_pdu *packet_pdu; struct ofpbuf packet; + void *packet_pdu; ofpbuf_init(&packet, 0); packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP, - sizeof *packet_pdu); - *packet_pdu = *pdu; + pdu_size); + memcpy(packet_pdu, pdu, pdu_size); + error = netdev_send(port->up.netdev, &packet); if (error) { VLOG_WARN_RL(&rl, "port %s: sending LACP PDU on iface %s failed " @@ -1613,19 +1672,21 @@ static bool process_special(struct ofproto_dpif *ofproto, const struct flow *flow, const struct ofpbuf *packet) { - if (cfm_should_process_flow(flow)) { - struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port); - if (packet && ofport && ofport->cfm) { + struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port); + + if (!ofport) { + return false; + } + + if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) { + if (packet) { cfm_process_heartbeat(ofport->cfm, packet); } return true; - } else if (flow->dl_type == htons(ETH_TYPE_LACP)) { - struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port); - if (packet && port && port->bundle && port->bundle->lacp) { - const struct lacp_pdu *pdu = parse_lacp_packet(packet); - if (pdu) { - lacp_process_pdu(port->bundle->lacp, port, pdu); - } + } else if (ofport->bundle && ofport->bundle->lacp + && flow->dl_type == htons(ETH_TYPE_LACP)) { + if (packet) { + lacp_process_packet(ofport->bundle->lacp, ofport, packet); } return true; } @@ -1992,7 +2053,7 @@ rule_expire(struct rule_dpif *rule) /* Has 'rule' expired? */ now = time_msec(); if (rule->up.hard_timeout - && now > rule->up.created + rule->up.hard_timeout * 1000) { + && now > rule->up.modified + rule->up.hard_timeout * 1000) { reason = OFPRR_HARD_TIMEOUT; } else if (rule->up.idle_timeout && list_is_empty(&rule->facets) && now > rule->used + rule->up.idle_timeout * 1000) { @@ -2151,6 +2212,8 @@ facet_make_actions(struct ofproto_dpif *p, struct facet *facet, odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions); facet->tags = ctx.tags; facet->may_install = ctx.may_set_up_flow; + facet->has_learn = ctx.has_learn; + facet->has_normal = ctx.has_normal; facet->nf_flow.output_iface = ctx.nf_output_iface; if (facet->actions_len != odp_actions->size @@ -2222,12 +2285,9 @@ static void facet_account(struct ofproto_dpif *ofproto, struct facet *facet) { uint64_t n_bytes; - struct ofbundle *in_bundle; const struct nlattr *a; - tag_type dummy = 0; unsigned int left; ovs_be16 vlan_tci; - int vlan; if (facet->byte_count <= facet->accounted_bytes) { return; @@ -2235,22 +2295,19 @@ facet_account(struct ofproto_dpif *ofproto, struct facet *facet) n_bytes = facet->byte_count - facet->accounted_bytes; facet->accounted_bytes = facet->byte_count; - /* Test that 'tags' is nonzero to ensure that only flows that include an - * OFPP_NORMAL action are used for learning and bond slave rebalancing. - * This works because OFPP_NORMAL always sets a nonzero tag value. - * - * Feed information from the active flows back into the learning table to + /* Feed information from the active flows back into the learning table to * ensure that table is always in sync with what is actually flowing * through the datapath. */ - if (!facet->tags - || !is_admissible(ofproto, &facet->flow, false, &dummy, - &vlan, &in_bundle)) { - return; - } + if (facet->has_learn || facet->has_normal) { + struct action_xlate_ctx ctx; - update_learning_table(ofproto, &facet->flow, vlan, in_bundle); + action_xlate_ctx_init(&ctx, ofproto, &facet->flow, NULL); + ctx.may_learn = true; + ofpbuf_delete(xlate_actions(&ctx, facet->rule->up.actions, + facet->rule->up.n_actions)); + } - if (!ofproto->has_bonded_bundles) { + if (!facet->has_normal || !ofproto->has_bonded_bundles) { return; } @@ -2272,11 +2329,11 @@ facet_account(struct ofproto_dpif *ofproto, struct facet *facet) } break; - case OVS_ACTION_ATTR_STRIP_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: vlan_tci = htons(0); break; - case OVS_ACTION_ATTR_SET_DL_TCI: + case OVS_ACTION_ATTR_PUSH_VLAN: vlan_tci = nl_attr_get_be16(a); break; } @@ -2474,6 +2531,8 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) facet->tags = ctx.tags; facet->nf_flow.output_iface = ctx.nf_output_iface; facet->may_install = ctx.may_set_up_flow; + facet->has_learn = ctx.has_learn; + facet->has_normal = ctx.has_normal; if (actions_changed) { free(facet->actions); facet->actions_len = odp_actions->size; @@ -2604,6 +2663,10 @@ static struct rule_dpif * rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, uint8_t table_id) { + if (table_id >= N_TABLES) { + return NULL; + } + return rule_dpif_cast(rule_from_cls_rule( classifier_lookup(&ofproto->up.tables[table_id], flow))); @@ -2614,7 +2677,7 @@ complete_operation(struct rule_dpif *rule) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - ofproto->need_revalidate = true; + rule_invalidate(rule); if (clogged) { struct dpif_completion *c = xmalloc(sizeof *c); c->op = rule->up.pending; @@ -2644,6 +2707,7 @@ rule_construct(struct rule *rule_) struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct rule_dpif *victim; + uint8_t table_id; int error; error = validate_actions(rule->up.actions, rule->up.n_actions, @@ -2677,6 +2741,12 @@ rule_construct(struct rule *rule_) list_init(&rule->facets); } + table_id = rule->up.table_id; + rule->tag = (victim ? victim->tag + : table_id == 0 ? 0 + : rule_calculate_tag(&rule->up.cr.flow, &rule->up.cr.wc, + ofproto->tables[table_id].basis)); + complete_operation(rule); return 0; } @@ -2840,9 +2910,12 @@ commit_odp_actions(struct action_xlate_ctx *ctx) if (base->vlan_tci != flow->vlan_tci) { if (!(flow->vlan_tci & htons(VLAN_CFI))) { - nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_STRIP_VLAN); + nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); } else { - nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_SET_DL_TCI, + if (base->vlan_tci != htons(0)) { + nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); + } + nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, flow->vlan_tci & ~htons(VLAN_CFI)); } base->vlan_tci = flow->vlan_tci; @@ -2910,6 +2983,7 @@ xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port, uint8_t table_id) { if (ctx->recurse < MAX_RESUBMIT_RECURSION) { + struct ofproto_dpif *ofproto = ctx->ofproto; struct rule_dpif *rule; uint16_t old_in_port; uint8_t old_table_id; @@ -2917,12 +2991,25 @@ xlate_table_action(struct action_xlate_ctx *ctx, old_table_id = ctx->table_id; ctx->table_id = table_id; - /* Look up a flow with 'in_port' as the input port. Then restore the - * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will - * have surprising behavior). */ + /* Look up a flow with 'in_port' as the input port. */ old_in_port = ctx->flow.in_port; ctx->flow.in_port = in_port; - rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow, table_id); + rule = rule_dpif_lookup(ofproto, &ctx->flow, table_id); + + /* Tag the flow. */ + if (table_id > 0 && table_id < N_TABLES) { + struct table_dpif *table = &ofproto->tables[table_id]; + if (table->other_table) { + ctx->tags |= (rule + ? rule->tag + : rule_calculate_tag(&ctx->flow, + &table->other_table->wc, + table->basis)); + } + } + + /* Restore the original input port. Otherwise OFPP_NORMAL and + * OFPP_IN_PORT will have surprising behavior. */ ctx->flow.in_port = old_in_port; if (ctx->resubmit_hook) { @@ -3148,6 +3235,26 @@ slave_enabled_cb(uint16_t ofp_port, void *ofproto_) } } +static void +xlate_learn_action(struct action_xlate_ctx *ctx, + const struct nx_action_learn *learn) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + struct ofputil_flow_mod fm; + int error; + + learn_execute(learn, &ctx->flow, &fm); + + error = ofproto_flow_mod(&ctx->ofproto->up, &fm); + if (error && !VLOG_DROP_WARN(&rl)) { + char *msg = ofputil_error_to_string(error); + VLOG_WARN("learning action failed to modify flow table (%s)", msg); + free(msg); + } + + free(fm.actions); +} + static void do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx) @@ -3305,6 +3412,13 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, naor = (const struct nx_action_output_reg *) ia; xlate_output_reg_action(ctx, naor); break; + + case OFPUTIL_NXAST_LEARN: + ctx->has_learn = true; + if (ctx->may_learn) { + xlate_learn_action(ctx, (const struct nx_action_learn *) ia); + } + break; } } } @@ -3317,6 +3431,7 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx, ctx->ofproto = ofproto; ctx->flow = *flow; ctx->packet = packet; + ctx->may_learn = packet != NULL; ctx->resubmit_hook = NULL; } @@ -3329,11 +3444,14 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->odp_actions = ofpbuf_new(512); ctx->tags = 0; ctx->may_set_up_flow = true; + ctx->has_learn = false; + ctx->has_normal = false; ctx->nf_output_iface = NF_OUT_DROP; ctx->recurse = 0; ctx->priority = 0; ctx->base_priority = 0; ctx->base_flow = ctx->flow; + ctx->base_flow.tun_id = 0; ctx->table_id = 0; if (process_special(ctx->ofproto, &ctx->flow, ctx->packet)) { @@ -3639,13 +3757,17 @@ compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan, } if (dst->vlan != cur_vlan) { if (dst->vlan == OFP_VLAN_NONE) { - nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_STRIP_VLAN); + nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_VLAN); } else { ovs_be16 tci; + + if (cur_vlan != OFP_VLAN_NONE) { + nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_VLAN); + } tci = htons(dst->vlan & VLAN_VID_MASK); tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK); nl_msg_put_be16(ctx->odp_actions, - OVS_ACTION_ATTR_SET_DL_TCI, tci); + OVS_ACTION_ATTR_PUSH_VLAN, tci); } cur_vlan = dst->vlan; } @@ -3796,6 +3918,7 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, "port %"PRIu16, ofproto->up.name, flow->in_port); } + *vlanp = -1; return false; } *vlanp = vlan = flow_get_vlan(ofproto, flow, in_bundle, have_packet); @@ -3854,6 +3977,8 @@ xlate_normal(struct action_xlate_ctx *ctx) struct mac_entry *mac; int vlan; + ctx->has_normal = true; + /* Check whether we should drop packets in this flow. */ if (!is_admissible(ctx->ofproto, &ctx->flow, ctx->packet != NULL, &ctx->tags, &vlan, &in_bundle)) { @@ -3861,8 +3986,8 @@ xlate_normal(struct action_xlate_ctx *ctx) goto done; } - /* Learn source MAC (but don't try to learn from revalidation). */ - if (ctx->packet) { + /* Learn source MAC. */ + if (ctx->may_learn) { update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle); } @@ -3894,6 +4019,120 @@ done: } } +/* Optimized flow revalidation. + * + * It's a difficult problem, in general, to tell which facets need to have + * their actions recalculated whenever the OpenFlow flow table changes. We + * don't try to solve that general problem: for most kinds of OpenFlow flow + * table changes, we recalculate the actions for every facet. This is + * relatively expensive, but it's good enough if the OpenFlow flow table + * doesn't change very often. + * + * However, we can expect one particular kind of OpenFlow flow table change to + * happen frequently: changes caused by MAC learning. To avoid wasting a lot + * of CPU on revalidating every facet whenever MAC learning modifies the flow + * table, we add a special case that applies to flow tables in which every rule + * has the same form (that is, the same wildcards), except that the table is + * also allowed to have a single "catch-all" flow that matches all packets. We + * optimize this case by tagging all of the facets that resubmit into the table + * and invalidating the same tag whenever a flow changes in that table. The + * end result is that we revalidate just the facets that need it (and sometimes + * a few more, but not all of the facets or even all of the facets that + * resubmit to the table modified by MAC learning). */ + +/* Calculates the tag to use for 'flow' and wildcards 'wc' when it is inserted + * into an OpenFlow table with the given 'basis'. */ +static uint32_t +rule_calculate_tag(const struct flow *flow, const struct flow_wildcards *wc, + uint32_t secret) +{ + if (flow_wildcards_is_catchall(wc)) { + return 0; + } else { + struct flow tag_flow = *flow; + flow_zero_wildcards(&tag_flow, wc); + return tag_create_deterministic(flow_hash(&tag_flow, secret)); + } +} + +/* Following a change to OpenFlow table 'table_id' in 'ofproto', update the + * taggability of that table. + * + * This function must be called after *each* change to a flow table. If you + * skip calling it on some changes then the pointer comparisons at the end can + * be invalid if you get unlucky. For example, if a flow removal causes a + * cls_table to be destroyed and then a flow insertion causes a cls_table with + * different wildcards to be created with the same address, then this function + * will incorrectly skip revalidation. */ +static void +table_update_taggable(struct ofproto_dpif *ofproto, uint8_t table_id) +{ + struct table_dpif *table = &ofproto->tables[table_id]; + const struct classifier *cls = &ofproto->up.tables[table_id]; + struct cls_table *catchall, *other; + struct cls_table *t; + + catchall = other = NULL; + + switch (hmap_count(&cls->tables)) { + case 0: + /* We could tag this OpenFlow table but it would make the logic a + * little harder and it's a corner case that doesn't seem worth it + * yet. */ + break; + + case 1: + case 2: + HMAP_FOR_EACH (t, hmap_node, &cls->tables) { + if (cls_table_is_catchall(t)) { + catchall = t; + } else if (!other) { + other = t; + } else { + /* Indicate that we can't tag this by setting both tables to + * NULL. (We know that 'catchall' is already NULL.) */ + other = NULL; + } + } + break; + + default: + /* Can't tag this table. */ + break; + } + + if (table->catchall_table != catchall || table->other_table != other) { + table->catchall_table = catchall; + table->other_table = other; + ofproto->need_revalidate = true; + } +} + +/* Given 'rule' that has changed in some way (either it is a rule being + * inserted, a rule being deleted, or a rule whose actions are being + * modified), marks facets for revalidation to ensure that packets will be + * forwarded correctly according to the new state of the flow table. + * + * This function must be called after *each* change to a flow table. See + * the comment on table_update_taggable() for more information. */ +static void +rule_invalidate(const struct rule_dpif *rule) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + table_update_taggable(ofproto, rule->up.table_id); + + if (!ofproto->need_revalidate) { + struct table_dpif *table = &ofproto->tables[rule->up.table_id]; + + if (table->other_table && rule->tag) { + tag_set_add(&ofproto->revalidate_set, rule->tag); + } else { + ofproto->need_revalidate = true; + } + } +} + static bool get_drop_frags(struct ofproto *ofproto_) { @@ -4074,8 +4313,8 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, arg1 = strtok_r(NULL, " ", &save_ptr); arg2 = strtok_r(NULL, " ", &save_ptr); arg3 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ - if (dpname && arg1 && !arg2 && !arg3) { - /* ofproto/trace dpname flow */ + if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) { + /* ofproto/trace dpname flow [-generate] */ int error; /* Convert string to datapath key. */ @@ -4092,6 +4331,12 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, unixctl_command_reply(conn, 501, "Invalid flow"); goto exit; } + + /* Generate a packet, if requested. */ + if (arg2) { + packet = ofpbuf_new(0); + flow_compose(packet, &flow); + } } else if (dpname && arg1 && arg2 && arg3) { /* ofproto/trace dpname tun_id in_port packet */ uint16_t in_port; @@ -4249,6 +4494,7 @@ const struct ofproto_class ofproto_dpif_class = { set_sflow, set_cfm, get_cfm_fault, + get_cfm_remote_mpids, bundle_set, bundle_remove, mirror_set,