X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=d0a5e0044736fadc3a43000b7e0d89198c2d9f52;hb=848e88098fec85336b89c0c652c1d91577c87b11;hp=55d109f38a860dd1f5a62b67fd219c53bfc732fd;hpb=b3e9b2eda9ae5be2ac2b7917858732738a818b66;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 55d109f38..d0a5e0044 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -16,12 +16,13 @@ #include -#include "ofproto/private.h" +#include "ofproto/ofproto-provider.h" #include #include "autopath.h" #include "bond.h" +#include "bundle.h" #include "byte-order.h" #include "connmgr.h" #include "coverage.h" @@ -31,6 +32,7 @@ #include "fail-open.h" #include "hmapx.h" #include "lacp.h" +#include "learn.h" #include "mac-learning.h" #include "multipath.h" #include "netdev.h" @@ -40,7 +42,7 @@ #include "ofp-util.h" #include "ofpbuf.h" #include "ofp-print.h" -#include "ofproto-sflow.h" +#include "ofproto-dpif-sflow.h" #include "poll-loop.h" #include "timer.h" #include "unaligned.h" @@ -59,9 +61,13 @@ COVERAGE_DEFINE(facet_invalidated); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); -/* Maximum depth of flow table recursion (due to NXAST_RESUBMIT actions) in a +/* Maximum depth of flow table recursion (due to resubmit actions) in a * flow translation. */ -#define MAX_RESUBMIT_RECURSION 16 +#define MAX_RESUBMIT_RECURSION 32 + +/* Number of implemented OpenFlow tables. */ +enum { N_TABLES = 255 }; +BUILD_ASSERT_DECL(N_TABLES >= 1 && N_TABLES <= 255); struct ofport_dpif; struct ofproto_dpif; @@ -87,6 +93,8 @@ struct rule_dpif { uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ + tag_type tag; /* Caches rule_calculate_tag() result. */ + struct list facets; /* List of "struct facet"s. */ }; @@ -95,8 +103,8 @@ static struct rule_dpif *rule_dpif_cast(const struct rule *rule) return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL; } -static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *ofproto, - const struct flow *flow); +static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *, + const struct flow *, uint8_t table); #define MAX_MIRRORS 32 typedef uint32_t mirror_mask_t; @@ -130,6 +138,7 @@ struct ofbundle { /* Configuration. */ struct list ports; /* Contains "struct ofport"s. */ + enum port_vlan_mode vlan_mode; /* VLAN mode */ int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. * NULL if all VLANs are trunked. */ @@ -146,11 +155,15 @@ struct ofbundle { }; static void bundle_remove(struct ofport *); +static void bundle_update(struct ofbundle *); static void bundle_destroy(struct ofbundle *); static void bundle_del_port(struct ofport_dpif *); static void bundle_run(struct ofbundle *); static void bundle_wait(struct ofbundle *); +static void stp_run(struct ofproto_dpif *ofproto); +static void stp_wait(struct ofproto_dpif *ofproto); + struct action_xlate_ctx { /* action_xlate_ctx_init() initializes these members. */ @@ -165,6 +178,12 @@ struct action_xlate_ctx { * revalidating without a packet to refer to. */ const struct ofpbuf *packet; + /* Should OFPP_NORMAL MAC learning and NXAST_LEARN actions execute? We + * want to execute them if we are actually processing a packet, or if we + * are accounting for packets that the datapath has processed, but not if + * we are just revalidating. */ + bool may_learn; + /* If nonnull, called just before executing a resubmit action. * * This is normally null so the client has to set it manually after @@ -175,9 +194,11 @@ struct action_xlate_ctx { * to look at them after it returns. */ struct ofpbuf *odp_actions; /* Datapath actions. */ - tag_type tags; /* Tags associated with OFPP_NORMAL actions. */ + tag_type tags; /* Tags associated with actions. */ bool may_set_up_flow; /* True ordinarily; false if the actions must * be reassessed for every packet. */ + bool has_learn; /* Actions include NXAST_LEARN? */ + bool has_normal; /* Actions output to OFPP_NORMAL? */ uint16_t nf_output_iface; /* Output interface index for NetFlow. */ /* xlate_actions() initializes and uses these members, but the client has no @@ -187,6 +208,11 @@ struct action_xlate_ctx { uint32_t priority; /* Current flow priority. 0 if none. */ struct flow base_flow; /* Flow at the last commit. */ uint32_t base_priority; /* Priority at the last commit. */ + uint8_t table_id; /* OpenFlow table ID where flow was found. */ + uint32_t sflow_n_outputs; /* Number of output ports. */ + uint16_t sflow_odp_port; /* Output port for composing sFlow action. */ + uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */ + bool exit; /* No further actions should be processed. */ }; static void action_xlate_ctx_init(struct action_xlate_ctx *, @@ -205,12 +231,8 @@ struct facet { * dpif_execute(). * * - Do include packets and bytes that were obtained from the datapath - * when a flow was deleted (e.g. dpif_flow_del()) or when its - * statistics were reset (e.g. dpif_flow_put() with + * when its statistics were reset (e.g. dpif_flow_put() with * DPIF_FP_ZERO_STATS). - * - * - Do not include any packets or bytes that can currently be obtained - * from the datapath by, e.g., dpif_flow_get(). */ uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ @@ -222,10 +244,7 @@ struct facet { uint64_t rs_byte_count; /* Bytes pushed to resubmit children. */ long long int rs_used; /* Used time pushed to resubmit children. */ - /* Number of bytes passed to account_cb. This may include bytes that can - * currently obtained from the datapath (thus, it can be greater than - * byte_count). */ - uint64_t accounted_bytes; + uint64_t accounted_bytes; /* Bytes processed by facet_account(). */ struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */ struct list list_node; /* In owning rule's 'facets' list. */ @@ -234,14 +253,15 @@ struct facet { bool installed; /* Installed in datapath? */ bool may_install; /* True ordinarily; false if actions must * be reassessed for every packet. */ + bool has_learn; /* Actions include NXAST_LEARN? */ + bool has_normal; /* Actions output to OFPP_NORMAL? */ size_t actions_len; /* Number of bytes in actions[]. */ struct nlattr *actions; /* Datapath actions. */ tag_type tags; /* Tags. */ struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */ }; -static struct facet *facet_create(struct rule_dpif *, const struct flow *, - const struct ofpbuf *packet); +static struct facet *facet_create(struct rule_dpif *, const struct flow *); static void facet_remove(struct ofproto_dpif *, struct facet *); static void facet_free(struct facet *); @@ -250,6 +270,11 @@ static struct facet *facet_lookup_valid(struct ofproto_dpif *, const struct flow *); static bool facet_revalidate(struct ofproto_dpif *, struct facet *); +static bool execute_controller_action(struct ofproto_dpif *, + const struct flow *, + const struct nlattr *odp_actions, + size_t actions_len, + struct ofpbuf *packet); static void facet_execute(struct ofproto_dpif *, struct facet *, struct ofpbuf *packet); @@ -267,10 +292,10 @@ static void facet_update_time(struct ofproto_dpif *, struct facet *, long long int used); static void facet_update_stats(struct ofproto_dpif *, struct facet *, const struct dpif_flow_stats *); +static void facet_reset_counters(struct facet *); static void facet_reset_dp_stats(struct facet *, struct dpif_flow_stats *); static void facet_push_stats(struct facet *); -static void facet_account(struct ofproto_dpif *, struct facet *, - uint64_t extra_bytes); +static void facet_account(struct ofproto_dpif *, struct facet *); static bool facet_is_controller_flow(struct facet *); @@ -278,6 +303,11 @@ static void flow_push_stats(const struct rule_dpif *, struct flow *, uint64_t packets, uint64_t bytes, long long int used); +static uint32_t rule_calculate_tag(const struct flow *, + const struct flow_wildcards *, + uint32_t basis); +static void rule_invalidate(const struct rule_dpif *); + struct ofport_dpif { struct ofport up; @@ -287,6 +317,11 @@ struct ofport_dpif { struct cfm *cfm; /* Connectivity Fault Management, if any. */ tag_type tag; /* Tag associated with this port. */ uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */ + bool may_enable; /* May be enabled in bonds. */ + + struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ + enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ + long long int stp_state_entered; }; static struct ofport_dpif * @@ -300,6 +335,22 @@ static void port_run(struct ofport_dpif *); static void port_wait(struct ofport_dpif *); static int set_cfm(struct ofport *, const struct cfm_settings *); +struct dpif_completion { + struct list list_node; + struct ofoperation *op; +}; + +/* Extra information about a classifier table. + * Currently used just for optimized flow revalidation. */ +struct table_dpif { + /* If either of these is nonnull, then this table has a form that allows + * flows to be tagged to avoid revalidating most flows for the most common + * kinds of flow table changes. */ + struct cls_table *catchall_table; /* Table that wildcards all fields. */ + struct cls_table *other_table; /* Table with any other wildcard set. */ + uint32_t basis; /* Keeps each table's tags separate. */ +}; + struct ofproto_dpif { struct ofproto up; struct dpif *dpif; @@ -310,7 +361,7 @@ struct ofproto_dpif { /* Bridging. */ struct netflow *netflow; - struct ofproto_sflow *sflow; + struct dpif_sflow *sflow; struct hmap bundles; /* Contains "struct ofbundle"s. */ struct mac_learning *ml; struct ofmirror *mirrors[MAX_MIRRORS]; @@ -321,10 +372,26 @@ struct ofproto_dpif { /* Facets. */ struct hmap facets; + + /* Revalidation. */ + struct table_dpif tables[N_TABLES]; bool need_revalidate; struct tag_set revalidate_set; + + /* Support for debugging async flow mods. */ + struct list completions; + + bool has_bundle_action; /* True when the first bundle action appears. */ + + /* Spanning tree. */ + struct stp *stp; + long long int stp_last_tick; }; +/* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only + * for debugging the asynchronous flow_mod implementation.) */ +static bool clogged; + static void ofproto_dpif_unixctl_init(void); static struct ofproto_dpif * @@ -346,7 +413,12 @@ static void update_learning_table(struct ofproto_dpif *, static bool is_admissible(struct ofproto_dpif *, const struct flow *, bool have_packet, tag_type *, int *vlanp, struct ofbundle **in_bundlep); + +/* Upcalls. */ +#define FLOW_MISS_MAX_BATCH 50 static void handle_upcall(struct ofproto_dpif *, struct dpif_upcall *); +static void handle_miss_upcalls(struct ofproto_dpif *, + struct dpif_upcall *, size_t n); /* Flow expiration. */ static int expire(struct ofproto_dpif *); @@ -354,7 +426,9 @@ static int expire(struct ofproto_dpif *); /* Utilities. */ static int send_packet(struct ofproto_dpif *, uint32_t odp_port, const struct ofpbuf *packet); - +static size_t +compose_sflow_action(const struct ofproto_dpif *, struct ofpbuf *odp_actions, + const struct flow *, uint32_t odp_port); /* Global variables. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -403,7 +477,7 @@ dealloc(struct ofproto *ofproto_) } static int -construct(struct ofproto *ofproto_) +construct(struct ofproto *ofproto_, int *n_tablesp) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); const char *name = ofproto->up.name; @@ -419,20 +493,21 @@ construct(struct ofproto *ofproto_) ofproto->max_ports = dpif_get_max_ports(ofproto->dpif); ofproto->n_matches = 0; + dpif_flow_flush(ofproto->dpif); + dpif_recv_purge(ofproto->dpif); + error = dpif_recv_set_mask(ofproto->dpif, ((1u << DPIF_UC_MISS) | - (1u << DPIF_UC_ACTION) | - (1u << DPIF_UC_SAMPLE))); + (1u << DPIF_UC_ACTION))); if (error) { VLOG_ERR("failed to listen on datapath %s: %s", name, strerror(error)); dpif_close(ofproto->dpif); return error; } - dpif_flow_flush(ofproto->dpif); - dpif_recv_purge(ofproto->dpif); ofproto->netflow = NULL; ofproto->sflow = NULL; + ofproto->stp = NULL; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(); for (i = 0; i < MAX_MIRRORS; i++) { @@ -443,30 +518,64 @@ construct(struct ofproto *ofproto_) timer_set_duration(&ofproto->next_expiration, 1000); hmap_init(&ofproto->facets); + + for (i = 0; i < N_TABLES; i++) { + struct table_dpif *table = &ofproto->tables[i]; + + table->catchall_table = NULL; + table->other_table = NULL; + table->basis = random_uint32(); + } ofproto->need_revalidate = false; tag_set_init(&ofproto->revalidate_set); - ofproto->up.tables = xmalloc(sizeof *ofproto->up.tables); - classifier_init(&ofproto->up.tables[0]); - ofproto->up.n_tables = 1; + list_init(&ofproto->completions); ofproto_dpif_unixctl_init(); + ofproto->has_bundle_action = false; + + *n_tablesp = N_TABLES; return 0; } +static void +complete_operations(struct ofproto_dpif *ofproto) +{ + struct dpif_completion *c, *next; + + LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) { + ofoperation_complete(c->op, 0); + list_remove(&c->list_node); + free(c); + } +} + static void destruct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct rule_dpif *rule, *next_rule; + struct classifier *table; int i; + complete_operations(ofproto); + + OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { + struct cls_cursor cursor; + + cls_cursor_init(&cursor, table, NULL); + CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { + ofproto_rule_destroy(&rule->up); + } + } + for (i = 0; i < MAX_MIRRORS; i++) { mirror_destroy(ofproto->mirrors[i]); } netflow_destroy(ofproto->netflow); - ofproto_sflow_destroy(ofproto->sflow); + dpif_sflow_destroy(ofproto->sflow); hmap_destroy(&ofproto->bundles); mac_learning_destroy(ofproto->ml); @@ -479,28 +588,40 @@ static int run(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct dpif_upcall misses[FLOW_MISS_MAX_BATCH]; struct ofport_dpif *ofport; struct ofbundle *bundle; + size_t n_misses; int i; + if (!clogged) { + complete_operations(ofproto); + } dpif_run(ofproto->dpif); - for (i = 0; i < 50; i++) { - struct dpif_upcall packet; + n_misses = 0; + for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) { + struct dpif_upcall *upcall = &misses[n_misses]; int error; - error = dpif_recv(ofproto->dpif, &packet); + error = dpif_recv(ofproto->dpif, upcall); if (error) { - if (error == ENODEV) { - /* Datapath destroyed. */ + if (error == ENODEV && n_misses == 0) { return error; } break; } - handle_upcall(ofproto, &packet); + if (upcall->type == DPIF_UC_MISS) { + /* Handle it later. */ + n_misses++; + } else { + handle_upcall(ofproto, upcall); + } } + handle_miss_upcalls(ofproto, misses, n_misses); + if (timer_expired(&ofproto->next_expiration)) { int delay = expire(ofproto); timer_set_duration(&ofproto->next_expiration, delay); @@ -510,7 +631,7 @@ run(struct ofproto *ofproto_) netflow_run(ofproto->netflow); } if (ofproto->sflow) { - ofproto_sflow_run(ofproto->sflow); + dpif_sflow_run(ofproto->sflow); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { @@ -520,6 +641,9 @@ run(struct ofproto *ofproto_) bundle_run(bundle); } + stp_run(ofproto); + mac_learning_run(ofproto->ml, &ofproto->revalidate_set); + /* Now revalidate if there's anything to do. */ if (ofproto->need_revalidate || !tag_set_is_empty(&ofproto->revalidate_set)) { @@ -549,10 +673,14 @@ wait(struct ofproto *ofproto_) struct ofport_dpif *ofport; struct ofbundle *bundle; + if (!clogged && !list_is_empty(&ofproto->completions)) { + poll_immediate_wake(); + } + dpif_wait(ofproto->dpif); dpif_recv_wait(ofproto->dpif); if (ofproto->sflow) { - ofproto_sflow_wait(ofproto->sflow); + dpif_sflow_wait(ofproto->sflow); } if (!tag_set_is_empty(&ofproto->revalidate_set)) { poll_immediate_wake(); @@ -563,6 +691,8 @@ wait(struct ofproto *ofproto_) HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { bundle_wait(bundle); } + mac_learning_wait(ofproto->ml); + stp_wait(ofproto); if (ofproto->need_revalidate) { /* Shouldn't happen, but if it does just go around again. */ VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()"); @@ -614,7 +744,7 @@ static void get_tables(struct ofproto *ofproto_, struct ofp_table_stats *ots) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct odp_stats s; + struct dpif_dp_stats s; strcpy(ots->name, "classifier"); @@ -662,14 +792,18 @@ port_construct(struct ofport *port_) struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); + ofproto->need_revalidate = true; port->odp_port = ofp_port_to_odp_port(port->up.ofp_port); port->bundle = NULL; port->cfm = NULL; port->tag = tag_create_random(); + port->may_enable = true; + port->stp_port = NULL; + port->stp_state = STP_DISABLED; if (ofproto->sflow) { - ofproto_sflow_add_port(ofproto->sflow, port->odp_port, - netdev_get_name(port->up.netdev)); + dpif_sflow_add_port(ofproto->sflow, port->odp_port, + netdev_get_name(port->up.netdev)); } return 0; @@ -681,10 +815,11 @@ port_destruct(struct ofport *port_) struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); + ofproto->need_revalidate = true; bundle_remove(port_); set_cfm(port_, NULL); if (ofproto->sflow) { - ofproto_sflow_del_port(ofproto->sflow, port->odp_port); + dpif_sflow_del_port(ofproto->sflow, port->odp_port); } } @@ -708,6 +843,10 @@ port_reconfigured(struct ofport *port_, ovs_be32 old_config) if (changed & htonl(OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD | OFPPC_NO_FLOOD)) { ofproto->need_revalidate = true; + + if (changed & htonl(OFPPC_NO_FLOOD) && port->bundle) { + bundle_update(port->bundle); + } } } @@ -716,21 +855,26 @@ set_sflow(struct ofproto *ofproto_, const struct ofproto_sflow_options *sflow_options) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct ofproto_sflow *os = ofproto->sflow; + struct dpif_sflow *ds = ofproto->sflow; + if (sflow_options) { - if (!os) { + if (!ds) { struct ofport_dpif *ofport; - os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif); + ds = ofproto->sflow = dpif_sflow_create(ofproto->dpif); HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { - ofproto_sflow_add_port(os, ofport->odp_port, - netdev_get_name(ofport->up.netdev)); + dpif_sflow_add_port(ds, ofport->odp_port, + netdev_get_name(ofport->up.netdev)); } + ofproto->need_revalidate = true; } - ofproto_sflow_set_options(os, sflow_options); + dpif_sflow_set_options(ds, sflow_options); } else { - ofproto_sflow_destroy(os); - ofproto->sflow = NULL; + if (ds) { + dpif_sflow_destroy(ds); + ofproto->need_revalidate = true; + ofproto->sflow = NULL; + } } return 0; } @@ -745,6 +889,10 @@ set_cfm(struct ofport *ofport_, const struct cfm_settings *s) error = 0; } else { if (!ofport->cfm) { + struct ofproto_dpif *ofproto; + + ofproto = ofproto_dpif_cast(ofport->up.ofproto); + ofproto->need_revalidate = true; ofport->cfm = cfm_create(netdev_get_name(ofport->up.netdev)); } @@ -766,6 +914,266 @@ get_cfm_fault(const struct ofport *ofport_) return ofport->cfm ? cfm_get_fault(ofport->cfm) : -1; } + +static int +get_cfm_remote_mpids(const struct ofport *ofport_, const uint64_t **rmps, + size_t *n_rmps) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + if (ofport->cfm) { + cfm_get_remote_mpids(ofport->cfm, rmps, n_rmps); + return 0; + } else { + return -1; + } +} + +/* Spanning Tree. */ + +static void +send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) +{ + struct ofproto_dpif *ofproto = ofproto_; + struct stp_port *sp = stp_get_port(ofproto->stp, port_num); + struct ofport_dpif *ofport; + + ofport = stp_port_get_aux(sp); + if (!ofport) { + VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", + ofproto->up.name, port_num); + } else { + struct eth_header *eth = pkt->l2; + + netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); + if (eth_addr_is_zero(eth->eth_src)) { + VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d " + "with unknown MAC", ofproto->up.name, port_num); + } else { + int error = netdev_send(ofport->up.netdev, pkt); + if (error) { + VLOG_WARN_RL(&rl, "%s: sending BPDU on port %s failed (%s)", + ofproto->up.name, + netdev_get_name(ofport->up.netdev), + strerror(error)); + } + } + } + ofpbuf_delete(pkt); +} + +/* Configures STP on 'ofproto_' using the settings defined in 's'. */ +static int +set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + /* Only revalidate flows if the configuration changed. */ + if (!s != !ofproto->stp) { + ofproto->need_revalidate = true; + } + + if (s) { + if (!ofproto->stp) { + ofproto->stp = stp_create(ofproto_->name, s->system_id, + send_bpdu_cb, ofproto); + ofproto->stp_last_tick = time_msec(); + } + + stp_set_bridge_id(ofproto->stp, s->system_id); + stp_set_bridge_priority(ofproto->stp, s->priority); + stp_set_hello_time(ofproto->stp, s->hello_time); + stp_set_max_age(ofproto->stp, s->max_age); + stp_set_forward_delay(ofproto->stp, s->fwd_delay); + } else { + stp_destroy(ofproto->stp); + ofproto->stp = NULL; + } + + return 0; +} + +static int +get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + if (ofproto->stp) { + s->enabled = true; + s->bridge_id = stp_get_bridge_id(ofproto->stp); + s->designated_root = stp_get_designated_root(ofproto->stp); + s->root_path_cost = stp_get_root_path_cost(ofproto->stp); + } else { + s->enabled = false; + } + + return 0; +} + +static void +update_stp_port_state(struct ofport_dpif *ofport) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + enum stp_state state; + + /* Figure out new state. */ + state = ofport->stp_port ? stp_port_get_state(ofport->stp_port) + : STP_DISABLED; + + /* Update state. */ + if (ofport->stp_state != state) { + ovs_be32 of_state; + bool fwd_change; + + VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s", + netdev_get_name(ofport->up.netdev), + stp_state_name(ofport->stp_state), + stp_state_name(state)); + if (stp_learn_in_state(ofport->stp_state) + != stp_learn_in_state(state)) { + /* xxx Learning action flows should also be flushed. */ + mac_learning_flush(ofproto->ml); + } + fwd_change = stp_forward_in_state(ofport->stp_state) + != stp_forward_in_state(state); + + ofproto->need_revalidate = true; + ofport->stp_state = state; + ofport->stp_state_entered = time_msec(); + + if (fwd_change) { + bundle_update(ofport->bundle); + } + + /* Update the STP state bits in the OpenFlow port description. */ + of_state = (ofport->up.opp.state & htonl(~OFPPS_STP_MASK)) + | htonl(state == STP_LISTENING ? OFPPS_STP_LISTEN + : state == STP_LEARNING ? OFPPS_STP_LEARN + : state == STP_FORWARDING ? OFPPS_STP_FORWARD + : state == STP_BLOCKING ? OFPPS_STP_BLOCK + : 0); + ofproto_port_set_state(&ofport->up, of_state); + } +} + +/* Configures STP on 'ofport_' using the settings defined in 's'. The + * caller is responsible for assigning STP port numbers and ensuring + * there are no duplicates. */ +static int +set_stp_port(struct ofport *ofport_, + const struct ofproto_port_stp_settings *s) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct stp_port *sp = ofport->stp_port; + + if (!s || !s->enable) { + if (sp) { + ofport->stp_port = NULL; + stp_port_disable(sp); + } + return 0; + } else if (sp && stp_port_no(sp) != s->port_num + && ofport == stp_port_get_aux(sp)) { + /* The port-id changed, so disable the old one if it's not + * already in use by another port. */ + stp_port_disable(sp); + } + + sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num); + stp_port_enable(sp); + + stp_port_set_aux(sp, ofport); + stp_port_set_priority(sp, s->priority); + stp_port_set_path_cost(sp, s->path_cost); + + update_stp_port_state(ofport); + + return 0; +} + +static int +get_stp_port_status(struct ofport *ofport_, + struct ofproto_port_stp_status *s) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct stp_port *sp = ofport->stp_port; + + if (!ofproto->stp || !sp) { + s->enabled = false; + return 0; + } + + s->enabled = true; + s->port_id = stp_port_get_id(sp); + s->state = stp_port_get_state(sp); + s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000; + s->role = stp_port_get_role(sp); + + return 0; +} + +static void +stp_run(struct ofproto_dpif *ofproto) +{ + if (ofproto->stp) { + long long int now = time_msec(); + long long int elapsed = now - ofproto->stp_last_tick; + struct stp_port *sp; + + if (elapsed > 0) { + stp_tick(ofproto->stp, MIN(INT_MAX, elapsed)); + ofproto->stp_last_tick = now; + } + while (stp_get_changed_port(ofproto->stp, &sp)) { + struct ofport_dpif *ofport = stp_port_get_aux(sp); + + if (ofport) { + update_stp_port_state(ofport); + } + } + } +} + +static void +stp_wait(struct ofproto_dpif *ofproto) +{ + if (ofproto->stp) { + poll_timer_wait(1000); + } +} + +/* Returns true if STP should process 'flow'. */ +static bool +stp_should_process_flow(const struct flow *flow) +{ + return eth_addr_equals(flow->dl_dst, eth_addr_stp); +} + +static void +stp_process_packet(const struct ofport_dpif *ofport, + const struct ofpbuf *packet) +{ + struct ofpbuf payload = *packet; + struct eth_header *eth = payload.data; + struct stp_port *sp = ofport->stp_port; + + /* Sink packets on ports that have STP disabled when the bridge has + * STP enabled. */ + if (!sp || stp_port_get_state(sp) == STP_DISABLED) { + return; + } + + /* Trim off padding on payload. */ + if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) { + payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN; + } + + if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) { + stp_received_bpdu(sp, payload.data, payload.size); + } +} /* Bundles. */ @@ -818,6 +1226,21 @@ bundle_lookup_multiple(struct ofproto_dpif *ofproto, } } +static void +bundle_update(struct ofbundle *bundle) +{ + struct ofport_dpif *port; + + bundle->floodable = true; + LIST_FOR_EACH (port, bundle_node, &bundle->ports) { + if (port->up.opp.config & htonl(OFPPC_NO_FLOOD) + || !stp_forward_in_state(port->stp_state)) { + bundle->floodable = false; + break; + } + } +} + static void bundle_del_port(struct ofport_dpif *port) { @@ -835,12 +1258,7 @@ bundle_del_port(struct ofport_dpif *port) bond_slave_unregister(bundle->bond, port); } - bundle->floodable = true; - LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) { - bundle->floodable = false; - } - } + bundle_update(bundle); } static bool @@ -863,11 +1281,13 @@ bundle_add_port(struct ofbundle *bundle, uint32_t ofp_port, port->bundle = bundle; list_push_back(&bundle->ports, &port->bundle_node); - if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) { + if (port->up.opp.config & htonl(OFPPC_NO_FLOOD) + || !stp_forward_in_state(port->stp_state)) { bundle->floodable = false; } } if (lacp) { + port->bundle->ofproto->need_revalidate = true; lacp_slave_register(bundle->lacp, port, lacp); } @@ -919,9 +1339,10 @@ bundle_set(struct ofproto *ofproto_, void *aux, { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); bool need_flush = false; - const unsigned long *trunks; struct ofport_dpif *port; struct ofbundle *bundle; + unsigned long *trunks; + int vlan; size_t i; bool ok; @@ -944,6 +1365,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, bundle->name = NULL; list_init(&bundle->ports); + bundle->vlan_mode = PORT_VLAN_TRUNK; bundle->vlan = -1; bundle->trunks = NULL; bundle->lacp = NULL; @@ -964,6 +1386,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, /* LACP. */ if (s->lacp) { if (!bundle->lacp) { + ofproto->need_revalidate = true; bundle->lacp = lacp_create(); } lacp_configure(bundle->lacp, s->lacp); @@ -1002,19 +1425,65 @@ bundle_set(struct ofproto *ofproto_, void *aux, return EINVAL; } + /* Set VLAN tagging mode */ + if (s->vlan_mode != bundle->vlan_mode) { + bundle->vlan_mode = s->vlan_mode; + need_flush = true; + } + /* Set VLAN tag. */ - if (s->vlan != bundle->vlan) { - bundle->vlan = s->vlan; + vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1 + : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan + : 0); + if (vlan != bundle->vlan) { + bundle->vlan = vlan; need_flush = true; } /* Get trunked VLANs. */ - trunks = s->vlan == -1 ? NULL : s->trunks; + switch (s->vlan_mode) { + case PORT_VLAN_ACCESS: + trunks = NULL; + break; + + case PORT_VLAN_TRUNK: + trunks = (unsigned long *) s->trunks; + break; + + case PORT_VLAN_NATIVE_UNTAGGED: + case PORT_VLAN_NATIVE_TAGGED: + if (vlan != 0 && (!s->trunks + || !bitmap_is_set(s->trunks, vlan) + || bitmap_is_set(s->trunks, 0))) { + /* Force trunking the native VLAN and prohibit trunking VLAN 0. */ + if (s->trunks) { + trunks = bitmap_clone(s->trunks, 4096); + } else { + trunks = bitmap_allocate1(4096); + } + bitmap_set1(trunks, vlan); + bitmap_set0(trunks, 0); + } else { + trunks = (unsigned long *) s->trunks; + } + break; + + default: + NOT_REACHED(); + } if (!vlan_bitmap_equal(trunks, bundle->trunks)) { free(bundle->trunks); - bundle->trunks = vlan_bitmap_clone(trunks); + if (trunks == s->trunks) { + bundle->trunks = vlan_bitmap_clone(trunks); + } else { + bundle->trunks = trunks; + trunks = NULL; + } need_flush = true; } + if (trunks != s->trunks) { + free(trunks); + } /* Bonding. */ if (!list_is_short(&bundle->ports)) { @@ -1064,7 +1533,7 @@ bundle_remove(struct ofport *port_) } static void -send_pdu_cb(void *port_, const struct lacp_pdu *pdu) +send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); struct ofport_dpif *port = port_; @@ -1073,13 +1542,14 @@ send_pdu_cb(void *port_, const struct lacp_pdu *pdu) error = netdev_get_etheraddr(port->up.netdev, ea); if (!error) { - struct lacp_pdu *packet_pdu; struct ofpbuf packet; + void *packet_pdu; ofpbuf_init(&packet, 0); packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP, - sizeof *packet_pdu); - *packet_pdu = *pdu; + pdu_size); + memcpy(packet_pdu, pdu, pdu_size); + error = netdev_send(port->up.netdev, &packet); if (error) { VLOG_WARN_RL(&rl, "port %s: sending LACP PDU on iface %s failed " @@ -1134,12 +1604,7 @@ bundle_run(struct ofbundle *bundle) struct ofport_dpif *port; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bool may_enable = lacp_slave_may_enable(bundle->lacp, port); - - if (may_enable && port->cfm) { - may_enable = !cfm_get_fault(port->cfm); - } - bond_slave_set_may_enable(bundle->bond, port, may_enable); + bond_slave_set_may_enable(bundle->bond, port, port->may_enable); } bond_run(bundle->bond, &bundle->ofproto->revalidate_set, @@ -1223,6 +1688,7 @@ mirror_set(struct ofproto *ofproto_, void *aux, mirror = ofproto->mirrors[idx] = xzalloc(sizeof *mirror); mirror->ofproto = ofproto; mirror->idx = idx; + mirror->aux = aux; mirror->out_vlan = -1; mirror->name = NULL; } @@ -1342,12 +1808,20 @@ set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans) } static bool -is_mirror_output_bundle(struct ofproto *ofproto_, void *aux) +is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofbundle *bundle = bundle_lookup(ofproto, aux); return bundle && bundle->mirror_out != 0; } + +static void +forward_bpdu_changed(struct ofproto *ofproto_) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + /* Revalidate cached flows whenever forward_bpdu option changes. */ + ofproto->need_revalidate = true; +} /* Ports. */ @@ -1376,6 +1850,8 @@ ofproto_port_from_dpif_port(struct ofproto_port *ofproto_port, static void port_run(struct ofport_dpif *ofport) { + bool enable = netdev_get_carrier(ofport->up.netdev); + if (ofport->cfm) { cfm_run(ofport->cfm); @@ -1388,7 +1864,24 @@ port_run(struct ofport_dpif *ofport) ofport->odp_port, &packet); ofpbuf_uninit(&packet); } + + enable = enable && !cfm_get_fault(ofport->cfm) + && cfm_get_opup(ofport->cfm); + } + + if (ofport->bundle) { + enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport); + } + + if (ofport->may_enable != enable) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + + if (ofproto->has_bundle_action) { + ofproto->need_revalidate = true; + } } + + ofport->may_enable = enable; } static void @@ -1519,146 +2012,345 @@ port_is_lacp_current(const struct ofport *ofport_) /* Upcall handling. */ -/* Given 'upcall', of type DPIF_UC_ACTION or DPIF_UC_MISS, sends an - * OFPT_PACKET_IN message to each OpenFlow controller as necessary according to - * their individual configurations. +/* Flow miss batching. + * + * Some dpifs implement operations faster when you hand them off in a batch. + * To allow batching, "struct flow_miss" queues the dpif-related work needed + * for a given flow. Each "struct flow_miss" corresponds to sending one or + * more packets, plus possibly installing the flow in the dpif. + * + * So far we only batch the operations that affect flow setup time the most. + * It's possible to batch more than that, but the benefit might be minimal. */ +struct flow_miss { + struct hmap_node hmap_node; + struct flow flow; + const struct nlattr *key; + size_t key_len; + struct list packets; +}; + +struct flow_miss_op { + union dpif_op dpif_op; + struct facet *facet; +}; + +/* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each + * OpenFlow controller as necessary according to their individual + * configurations. + * + * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise, + * ownership is transferred to this function. */ +static void +send_packet_in_miss(struct ofproto_dpif *ofproto, struct ofpbuf *packet, + const struct flow *flow, bool clone) +{ + struct ofputil_packet_in pin; + + pin.packet = packet; + pin.in_port = flow->in_port; + pin.reason = OFPR_NO_MATCH; + pin.buffer_id = 0; /* not yet known */ + pin.send_len = 0; /* not used for flow table misses */ + connmgr_send_packet_in(ofproto->up.connmgr, &pin, flow, + clone ? NULL : packet); +} + +/* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_ACTION to each + * OpenFlow controller as necessary according to their individual + * configurations. + * + * 'send_len' should be the number of bytes of 'packet' to send to the + * controller, as specified in the action that caused the packet to be sent. * * If 'clone' is true, the caller retains ownership of 'upcall->packet'. * Otherwise, ownership is transferred to this function. */ static void -send_packet_in(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall, - const struct flow *flow, bool clone) +send_packet_in_action(struct ofproto_dpif *ofproto, struct ofpbuf *packet, + uint64_t userdata, const struct flow *flow, bool clone) { struct ofputil_packet_in pin; + struct user_action_cookie cookie; + + memcpy(&cookie, &userdata, sizeof(cookie)); - pin.packet = upcall->packet; + pin.packet = packet; pin.in_port = flow->in_port; - pin.reason = upcall->type == DPIF_UC_MISS ? OFPR_NO_MATCH : OFPR_ACTION; + pin.reason = OFPR_ACTION; pin.buffer_id = 0; /* not yet known */ - pin.send_len = upcall->userdata; + pin.send_len = cookie.data; connmgr_send_packet_in(ofproto->up.connmgr, &pin, flow, - clone ? NULL : upcall->packet); + clone ? NULL : packet); } static bool process_special(struct ofproto_dpif *ofproto, const struct flow *flow, const struct ofpbuf *packet) { - if (cfm_should_process_flow(flow)) { - struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port); - if (packet && ofport && ofport->cfm) { + struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port); + + if (!ofport) { + return false; + } + + if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) { + if (packet) { cfm_process_heartbeat(ofport->cfm, packet); } return true; - } else if (flow->dl_type == htons(ETH_TYPE_LACP)) { - struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port); - if (packet && port && port->bundle && port->bundle->lacp) { - const struct lacp_pdu *pdu = parse_lacp_packet(packet); - if (pdu) { - lacp_process_pdu(port->bundle->lacp, port, pdu); - } + } else if (ofport->bundle && ofport->bundle->lacp + && flow->dl_type == htons(ETH_TYPE_LACP)) { + if (packet) { + lacp_process_packet(ofport->bundle->lacp, ofport, packet); + } + return true; + } else if (ofproto->stp && stp_should_process_flow(flow)) { + if (packet) { + stp_process_packet(ofport, packet); } return true; } return false; } -static void -handle_miss_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +static struct flow_miss * +flow_miss_create(struct hmap *todo, const struct flow *flow, + const struct nlattr *key, size_t key_len) { - struct facet *facet; - struct flow flow; - - /* Obtain in_port and tun_id, at least. */ - odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - - /* Set header pointers in 'flow'. */ - flow_extract(upcall->packet, flow.tun_id, flow.in_port, &flow); + uint32_t hash = flow_hash(flow, 0); + struct flow_miss *miss; - /* Handle 802.1ag and LACP. */ - if (process_special(ofproto, &flow, upcall->packet)) { - ofpbuf_delete(upcall->packet); - ofproto->n_matches++; - return; + HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) { + if (flow_equal(&miss->flow, flow)) { + return miss; + } } - /* Check with in-band control to see if this packet should be sent - * to the local port regardless of the flow table. */ - if (connmgr_msg_in_hook(ofproto->up.connmgr, &flow, upcall->packet)) { - send_packet(ofproto, ODPP_LOCAL, upcall->packet); - } + miss = xmalloc(sizeof *miss); + hmap_insert(todo, &miss->hmap_node, hash); + miss->flow = *flow; + miss->key = key; + miss->key_len = key_len; + list_init(&miss->packets); + return miss; +} + +static void +handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, + struct flow_miss_op *ops, size_t *n_ops) +{ + const struct flow *flow = &miss->flow; + struct ofpbuf *packet, *next_packet; + struct facet *facet; - facet = facet_lookup_valid(ofproto, &flow); + facet = facet_lookup_valid(ofproto, flow); if (!facet) { - struct rule_dpif *rule = rule_dpif_lookup(ofproto, &flow); + struct rule_dpif *rule; + + rule = rule_dpif_lookup(ofproto, flow, 0); if (!rule) { /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */ - struct ofport_dpif *port = get_ofp_port(ofproto, flow.in_port); + struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port); if (port) { if (port->up.opp.config & htonl(OFPPC_NO_PACKET_IN)) { COVERAGE_INC(ofproto_dpif_no_packet_in); /* XXX install 'drop' flow entry */ - ofpbuf_delete(upcall->packet); return; } } else { VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, - flow.in_port); + flow->in_port); + } + + LIST_FOR_EACH_SAFE (packet, next_packet, list_node, + &miss->packets) { + list_remove(&packet->list_node); + send_packet_in_miss(ofproto, packet, flow, false); } - send_packet_in(ofproto, upcall, &flow, false); return; } - facet = facet_create(rule, &flow, upcall->packet); - } else if (!facet->may_install) { - /* The facet is not installable, that is, we need to process every - * packet, so process the current packet's actions into 'facet'. */ - facet_make_actions(ofproto, facet, upcall->packet); + facet = facet_create(rule, flow); } - if (facet->rule->up.cr.priority == FAIL_OPEN_PRIORITY) { - /* - * Extra-special case for fail-open mode. - * - * We are in fail-open mode and the packet matched the fail-open rule, - * but we are connected to a controller too. We should send the packet - * up to the controller in the hope that it will try to set up a flow - * and thereby allow us to exit fail-open. - * - * See the top-level comment in fail-open.c for more information. - */ - send_packet_in(ofproto, upcall, &flow, true); + LIST_FOR_EACH_SAFE (packet, next_packet, list_node, &miss->packets) { + list_remove(&packet->list_node); + ofproto->n_matches++; + + if (facet->rule->up.cr.priority == FAIL_OPEN_PRIORITY) { + /* + * Extra-special case for fail-open mode. + * + * We are in fail-open mode and the packet matched the fail-open + * rule, but we are connected to a controller too. We should send + * the packet up to the controller in the hope that it will try to + * set up a flow and thereby allow us to exit fail-open. + * + * See the top-level comment in fail-open.c for more information. + */ + send_packet_in_miss(ofproto, packet, flow, true); + } + + if (!facet->may_install) { + facet_make_actions(ofproto, facet, packet); + } + if (!execute_controller_action(ofproto, &facet->flow, + facet->actions, facet->actions_len, + packet)) { + struct flow_miss_op *op = &ops[(*n_ops)++]; + struct dpif_execute *execute = &op->dpif_op.execute; + + op->facet = facet; + execute->type = DPIF_OP_EXECUTE; + execute->key = miss->key; + execute->key_len = miss->key_len; + execute->actions + = (facet->may_install + ? facet->actions + : xmemdup(facet->actions, facet->actions_len)); + execute->actions_len = facet->actions_len; + execute->packet = packet; + } } - facet_execute(ofproto, facet, upcall->packet); - facet_install(ofproto, facet, false); - ofproto->n_matches++; + if (facet->may_install) { + struct flow_miss_op *op = &ops[(*n_ops)++]; + struct dpif_flow_put *put = &op->dpif_op.flow_put; + + op->facet = facet; + put->type = DPIF_OP_FLOW_PUT; + put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; + put->key = miss->key; + put->key_len = miss->key_len; + put->actions = facet->actions; + put->actions_len = facet->actions_len; + put->stats = NULL; + } } static void -handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, + size_t n_upcalls) +{ + struct dpif_upcall *upcall; + struct flow_miss *miss, *next_miss; + struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH * 2]; + union dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH * 2]; + struct hmap todo; + size_t n_ops; + size_t i; + + if (!n_upcalls) { + return; + } + + /* Construct the to-do list. + * + * This just amounts to extracting the flow from each packet and sticking + * the packets that have the same flow in the same "flow_miss" structure so + * that we can process them together. */ + hmap_init(&todo); + for (upcall = upcalls; upcall < &upcalls[n_upcalls]; upcall++) { + struct flow_miss *miss; + struct flow flow; + + /* Obtain in_port and tun_id, at least, then set 'flow''s header + * pointers. */ + odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); + flow_extract(upcall->packet, flow.tun_id, flow.in_port, &flow); + + /* Handle 802.1ag, LACP, and STP specially. */ + if (process_special(ofproto, &flow, upcall->packet)) { + ofpbuf_delete(upcall->packet); + ofproto->n_matches++; + continue; + } + + /* Add other packets to a to-do list. */ + miss = flow_miss_create(&todo, &flow, upcall->key, upcall->key_len); + list_push_back(&miss->packets, &upcall->packet->list_node); + } + + /* Process each element in the to-do list, constructing the set of + * operations to batch. */ + n_ops = 0; + HMAP_FOR_EACH_SAFE (miss, next_miss, hmap_node, &todo) { + handle_flow_miss(ofproto, miss, flow_miss_ops, &n_ops); + ofpbuf_list_delete(&miss->packets); + hmap_remove(&todo, &miss->hmap_node); + free(miss); + } + assert(n_ops <= ARRAY_SIZE(flow_miss_ops)); + hmap_destroy(&todo); + + /* Execute batch. */ + for (i = 0; i < n_ops; i++) { + dpif_ops[i] = &flow_miss_ops[i].dpif_op; + } + dpif_operate(ofproto->dpif, dpif_ops, n_ops); + + /* Free memory and update facets. */ + for (i = 0; i < n_ops; i++) { + struct flow_miss_op *op = &flow_miss_ops[i]; + struct dpif_execute *execute; + struct dpif_flow_put *put; + + switch (op->dpif_op.type) { + case DPIF_OP_EXECUTE: + execute = &op->dpif_op.execute; + if (op->facet->actions != execute->actions) { + free((struct nlattr *) execute->actions); + } + ofpbuf_delete((struct ofpbuf *) execute->packet); + break; + + case DPIF_OP_FLOW_PUT: + put = &op->dpif_op.flow_put; + if (!put->error) { + op->facet->installed = true; + } + break; + } + } +} + +static void +handle_userspace_upcall(struct ofproto_dpif *ofproto, + struct dpif_upcall *upcall) { struct flow flow; + struct user_action_cookie cookie; - switch (upcall->type) { - case DPIF_UC_ACTION: - COVERAGE_INC(ofproto_dpif_ctlr_action); - odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - send_packet_in(ofproto, upcall, &flow, false); - break; + memcpy(&cookie, &upcall->userdata, sizeof(cookie)); - case DPIF_UC_SAMPLE: + if (cookie.type == USER_ACTION_COOKIE_SFLOW) { if (ofproto->sflow) { odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - ofproto_sflow_received(ofproto->sflow, upcall, &flow); + dpif_sflow_received(ofproto->sflow, upcall->packet, &flow, &cookie); } ofpbuf_delete(upcall->packet); + + } else if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { + COVERAGE_INC(ofproto_dpif_ctlr_action); + odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); + send_packet_in_action(ofproto, upcall->packet, upcall->userdata, + &flow, false); + } else { + VLOG_WARN_RL(&rl, "invalid user cookie : 0x%"PRIx64, upcall->userdata); + } +} + +static void +handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +{ + switch (upcall->type) { + case DPIF_UC_ACTION: + handle_userspace_upcall(ofproto, upcall); break; case DPIF_UC_MISS: - handle_miss_upcall(ofproto, upcall); - break; + /* The caller handles these. */ + NOT_REACHED(); case DPIF_N_UC_TYPES: default: @@ -1684,7 +2376,7 @@ static int expire(struct ofproto_dpif *ofproto) { struct rule_dpif *rule, *next_rule; - struct cls_cursor cursor; + struct classifier *table; int dp_max_idle; /* Update stats for each flow in the datapath. */ @@ -1695,9 +2387,13 @@ expire(struct ofproto_dpif *ofproto) expire_facets(ofproto, dp_max_idle); /* Expire OpenFlow flows whose idle_timeout or hard_timeout has passed. */ - cls_cursor_init(&cursor, &ofproto->up.tables[0], NULL); - CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { - rule_expire(rule); + OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { + struct cls_cursor cursor; + + cls_cursor_init(&cursor, table, NULL); + CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { + rule_expire(rule); + } } /* All outstanding data in existing flows has been accounted, so it's a @@ -1744,7 +2440,7 @@ update_stats(struct ofproto_dpif *p) ds_init(&s); odp_flow_key_format(key, key_len, &s); - VLOG_WARN_RL(&rl, "failed to convert ODP flow key to flow: %s", + VLOG_WARN_RL(&rl, "failed to convert datapath flow key to flow: %s", ds_cstr(&s)); ds_destroy(&s); @@ -1771,7 +2467,7 @@ update_stats(struct ofproto_dpif *p) facet->dp_byte_count = stats->n_bytes; facet_update_time(p, facet, stats->used); - facet_account(p, facet, stats->n_bytes); + facet_account(p, facet); facet_push_stats(facet); } else { /* There's a flow in the datapath that we know nothing about. @@ -1808,11 +2504,12 @@ facet_max_idle(const struct ofproto_dpif *ofproto) * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each facet * that is installed in the kernel gets dropped in the appropriate bucket. * After the histogram has been built, we compute the cutoff so that only - * the most-recently-used 1% of facets (but at least 1000 flows) are kept - * cached. At least the most-recently-used bucket of facets is kept, so - * actually an arbitrary number of facets can be kept in any given - * expiration run (though the next run will delete most of those unless - * they receive additional data). + * the most-recently-used 1% of facets (but at least + * ofproto->up.flow_eviction_threshold flows) are kept cached. At least + * the most-recently-used bucket of facets is kept, so actually an + * arbitrary number of facets can be kept in any given expiration run + * (though the next run will delete most of those unless they receive + * additional data). * * This requires a second pass through the facets, in addition to the pass * made by update_stats(), because the former function never looks @@ -1821,13 +2518,13 @@ facet_max_idle(const struct ofproto_dpif *ofproto) enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) }; enum { N_BUCKETS = 5000 / BUCKET_WIDTH }; int buckets[N_BUCKETS] = { 0 }; + int total, subtotal, bucket; struct facet *facet; - int total, bucket; long long int now; int i; total = hmap_count(&ofproto->facets); - if (total <= 1000) { + if (total <= ofproto->up.flow_eviction_threshold) { return N_BUCKETS * BUCKET_WIDTH; } @@ -1842,15 +2539,11 @@ facet_max_idle(const struct ofproto_dpif *ofproto) } /* Find the first bucket whose flows should be expired. */ - for (bucket = 0; bucket < N_BUCKETS; bucket++) { - if (buckets[bucket]) { - int subtotal = 0; - do { - subtotal += buckets[bucket++]; - } while (bucket < N_BUCKETS && subtotal < MAX(1000, total / 100)); - break; - } - } + subtotal = bucket = 0; + do { + subtotal += buckets[bucket++]; + } while (bucket < N_BUCKETS && + subtotal < MAX(ofproto->up.flow_eviction_threshold, total / 100)); if (VLOG_IS_DBG_ENABLED()) { struct ds s; @@ -1922,7 +2615,7 @@ rule_expire(struct rule_dpif *rule) /* Has 'rule' expired? */ now = time_msec(); if (rule->up.hard_timeout - && now > rule->up.created + rule->up.hard_timeout * 1000) { + && now > rule->up.modified + rule->up.hard_timeout * 1000) { reason = OFPRR_HARD_TIMEOUT; } else if (rule->up.idle_timeout && list_is_empty(&rule->facets) && now > rule->used + rule->up.idle_timeout * 1000) { @@ -1945,15 +2638,16 @@ rule_expire(struct rule_dpif *rule) /* Facets. */ -/* Creates and returns a new facet owned by 'rule', given a 'flow' and an - * example 'packet' within that flow. +/* Creates and returns a new facet owned by 'rule', given a 'flow'. * * The caller must already have determined that no facet with an identical * 'flow' exists in 'ofproto' and that 'flow' is the best match for 'rule' in - * the ofproto's classifier table. */ + * the ofproto's classifier table. + * + * The facet will initially have no ODP actions. The caller should fix that + * by calling facet_make_actions(). */ static struct facet * -facet_create(struct rule_dpif *rule, const struct flow *flow, - const struct ofpbuf *packet) +facet_create(struct rule_dpif *rule, const struct flow *flow) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct facet *facet; @@ -1967,8 +2661,6 @@ facet_create(struct rule_dpif *rule, const struct flow *flow, netflow_flow_init(&facet->nf_flow); netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used); - facet_make_actions(ofproto, facet, packet); - return facet; } @@ -1979,6 +2671,33 @@ facet_free(struct facet *facet) free(facet); } +static bool +execute_controller_action(struct ofproto_dpif *ofproto, + const struct flow *flow, + const struct nlattr *odp_actions, size_t actions_len, + struct ofpbuf *packet) +{ + if (actions_len + && odp_actions->nla_type == OVS_ACTION_ATTR_USERSPACE + && NLA_ALIGN(odp_actions->nla_len) == actions_len) { + /* As an optimization, avoid a round-trip from userspace to kernel to + * userspace. This also avoids possibly filling up kernel packet + * buffers along the way. + * + * This optimization will not accidentally catch sFlow + * OVS_ACTION_ATTR_USERSPACE actions, since those are encapsulated + * inside OVS_ACTION_ATTR_SAMPLE. */ + const struct nlattr *nla; + + nla = nl_attr_find_nested(odp_actions, OVS_USERSPACE_ATTR_USERDATA); + send_packet_in_action(ofproto, packet, nl_attr_get_u64(nla), flow, + false); + return true; + } else { + return false; + } +} + /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on * 'packet', which arrived on 'in_port'. * @@ -1988,39 +2707,23 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, const struct nlattr *odp_actions, size_t actions_len, struct ofpbuf *packet) { - if (actions_len == NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)) - && odp_actions->nla_type == ODP_ACTION_ATTR_CONTROLLER) { - /* As an optimization, avoid a round-trip from userspace to kernel to - * userspace. This also avoids possibly filling up kernel packet - * buffers along the way. */ - struct dpif_upcall upcall; - - upcall.type = DPIF_UC_ACTION; - upcall.packet = packet; - upcall.key = NULL; - upcall.key_len = 0; - upcall.userdata = nl_attr_get_u64(odp_actions); - upcall.sample_pool = 0; - upcall.actions = NULL; - upcall.actions_len = 0; - - send_packet_in(ofproto, &upcall, flow, false); + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + if (execute_controller_action(ofproto, flow, odp_actions, actions_len, + packet)) { return true; - } else { - struct odputil_keybuf keybuf; - struct ofpbuf key; - int error; + } - ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, flow); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); - error = dpif_execute(ofproto->dpif, key.data, key.size, - odp_actions, actions_len, packet); + error = dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions, actions_len, packet); - ofpbuf_delete(packet); - return !error; - } + ofpbuf_delete(packet); + return !error; } /* Executes the actions indicated by 'facet' on 'packet' and credits 'facet''s @@ -2031,8 +2734,8 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, * applying flow_extract() to 'packet' would yield the same flow as * 'facet->flow'. * - * 'facet' must have accurately composed ODP actions; that is, it must not be - * in need of revalidation. + * 'facet' must have accurately composed datapath actions; that is, it must + * not be in need of revalidation. * * Takes ownership of 'packet'. */ static void @@ -2043,7 +2746,7 @@ facet_execute(struct ofproto_dpif *ofproto, struct facet *facet, assert(ofpbuf_headroom(packet) >= sizeof(struct ofp_packet_in)); - flow_extract_stats(&facet->flow, packet, &stats); + dpif_flow_stats_extract(&facet->flow, packet, &stats); stats.used = time_msec(); if (execute_odp_actions(ofproto, &facet->flow, facet->actions, facet->actions_len, packet)) { @@ -2068,7 +2771,7 @@ facet_remove(struct ofproto_dpif *ofproto, struct facet *facet) facet_free(facet); } -/* Composes the ODP actions for 'facet' based on its rule's actions. */ +/* Composes the datapath actions for 'facet' based on its rule's actions. */ static void facet_make_actions(struct ofproto_dpif *p, struct facet *facet, const struct ofpbuf *packet) @@ -2081,6 +2784,8 @@ facet_make_actions(struct ofproto_dpif *p, struct facet *facet, odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions); facet->tags = ctx.tags; facet->may_install = ctx.may_set_up_flow; + facet->has_learn = ctx.has_learn; + facet->has_normal = ctx.has_normal; facet->nf_flow.output_iface = ctx.nf_output_iface; if (facet->actions_len != odp_actions->size @@ -2142,47 +2847,33 @@ facet_install(struct ofproto_dpif *p, struct facet *facet, bool zero_stats) } } -static int -vlan_tci_to_openflow_vlan(ovs_be16 vlan_tci) -{ - return vlan_tci != htons(0) ? vlan_tci_to_vid(vlan_tci) : OFP_VLAN_NONE; -} - static void -facet_account(struct ofproto_dpif *ofproto, - struct facet *facet, uint64_t extra_bytes) +facet_account(struct ofproto_dpif *ofproto, struct facet *facet) { - uint64_t total_bytes, n_bytes; - struct ofbundle *in_bundle; + uint64_t n_bytes; const struct nlattr *a; - tag_type dummy = 0; unsigned int left; ovs_be16 vlan_tci; - int vlan; - total_bytes = facet->byte_count + extra_bytes; - if (total_bytes <= facet->accounted_bytes) { + if (facet->byte_count <= facet->accounted_bytes) { return; } - n_bytes = total_bytes - facet->accounted_bytes; - facet->accounted_bytes = total_bytes; + n_bytes = facet->byte_count - facet->accounted_bytes; + facet->accounted_bytes = facet->byte_count; - /* Test that 'tags' is nonzero to ensure that only flows that include an - * OFPP_NORMAL action are used for learning and bond slave rebalancing. - * This works because OFPP_NORMAL always sets a nonzero tag value. - * - * Feed information from the active flows back into the learning table to + /* Feed information from the active flows back into the learning table to * ensure that table is always in sync with what is actually flowing * through the datapath. */ - if (!facet->tags - || !is_admissible(ofproto, &facet->flow, false, &dummy, - &vlan, &in_bundle)) { - return; - } + if (facet->has_learn || facet->has_normal) { + struct action_xlate_ctx ctx; - update_learning_table(ofproto, &facet->flow, vlan, in_bundle); + action_xlate_ctx_init(&ctx, ofproto, &facet->flow, NULL); + ctx.may_learn = true; + ofpbuf_delete(xlate_actions(&ctx, facet->rule->up.actions, + facet->rule->up.n_actions)); + } - if (!ofproto->has_bonded_bundles) { + if (!facet->has_normal || !ofproto->has_bonded_bundles) { return; } @@ -2196,20 +2887,29 @@ facet_account(struct ofproto_dpif *ofproto, struct ofport_dpif *port; switch (nl_attr_type(a)) { - case ODP_ACTION_ATTR_OUTPUT: + const struct nlattr *nested; + case OVS_ACTION_ATTR_OUTPUT: port = get_odp_port(ofproto, nl_attr_get_u32(a)); if (port && port->bundle && port->bundle->bond) { bond_account(port->bundle->bond, &facet->flow, - vlan_tci_to_openflow_vlan(vlan_tci), n_bytes); + vlan_tci_to_vid(vlan_tci), n_bytes); } break; - case ODP_ACTION_ATTR_STRIP_VLAN: - vlan_tci = htons(0); + case OVS_ACTION_ATTR_POP: + if (nl_attr_get_u16(a) == OVS_KEY_ATTR_8021Q) { + vlan_tci = htons(0); + } break; - case ODP_ACTION_ATTR_SET_DL_TCI: - vlan_tci = nl_attr_get_be16(a); + case OVS_ACTION_ATTR_PUSH: + nested = nl_attr_get(a); + if (nl_attr_type(nested) == OVS_KEY_ATTR_8021Q) { + const struct ovs_key_8021q *q_key; + + q_key = nl_attr_get_unspec(nested, sizeof(*q_key)); + vlan_tci = q_key->q_tci; + } break; } } @@ -2281,7 +2981,7 @@ facet_flush_stats(struct ofproto_dpif *ofproto, struct facet *facet) assert(!facet->dp_packet_count); facet_push_stats(facet); - facet_account(ofproto, facet, 0); + facet_account(ofproto, facet); if (ofproto->netflow && !facet_is_controller_flow(facet)) { struct ofexpired expired; @@ -2297,11 +2997,7 @@ facet_flush_stats(struct ofproto_dpif *ofproto, struct facet *facet) /* Reset counters to prevent double counting if 'facet' ever gets * reinstalled. */ - facet->packet_count = 0; - facet->byte_count = 0; - facet->rs_packet_count = 0; - facet->rs_byte_count = 0; - facet->accounted_bytes = 0; + facet_reset_counters(facet); netflow_flow_clear(&facet->nf_flow); } @@ -2338,7 +3034,8 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow) /* The facet we found might not be valid, since we could be in need of * revalidation. If it is not valid, don't return it. */ if (facet - && ofproto->need_revalidate + && (ofproto->need_revalidate + || tag_set_intersects(&ofproto->revalidate_set, facet->tags)) && !facet_revalidate(ofproto, facet)) { COVERAGE_INC(facet_invalidated); return NULL; @@ -2369,14 +3066,14 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) COVERAGE_INC(facet_revalidate); /* Determine the new rule. */ - new_rule = rule_dpif_lookup(ofproto, &facet->flow); + new_rule = rule_dpif_lookup(ofproto, &facet->flow, 0); if (!new_rule) { /* No new rule, so delete the facet. */ facet_remove(ofproto, facet); return false; } - /* Calculate new ODP actions. + /* Calculate new datapath actions. * * We do not modify any 'facet' state yet, because we might need to, e.g., * emit a NetFlow expiration and, if so, we need to have the old state @@ -2388,8 +3085,8 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) || memcmp(facet->actions, odp_actions->data, facet->actions_len)); - /* If the ODP actions changed or the installability changed, then we need - * to talk to the datapath. */ + /* If the datapath actions changed or the installability changed, + * then we need to talk to the datapath. */ if (actions_changed || ctx.may_set_up_flow != facet->installed) { if (ctx.may_set_up_flow) { struct dpif_flow_stats stats; @@ -2410,6 +3107,8 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) facet->tags = ctx.tags; facet->nf_flow.output_iface = ctx.nf_output_iface; facet->may_install = ctx.may_set_up_flow; + facet->has_learn = ctx.has_learn; + facet->has_normal = ctx.has_normal; if (actions_changed) { free(facet->actions); facet->actions_len = odp_actions->size; @@ -2463,6 +3162,16 @@ facet_update_stats(struct ofproto_dpif *ofproto, struct facet *facet, } } +static void +facet_reset_counters(struct facet *facet) +{ + facet->packet_count = 0; + facet->byte_count = 0; + facet->rs_packet_count = 0; + facet->rs_byte_count = 0; + facet->accounted_bytes = 0; +} + static void facet_push_stats(struct facet *facet) { @@ -2527,11 +3236,44 @@ flow_push_stats(const struct rule_dpif *rule, /* Rules. */ static struct rule_dpif * -rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow) +rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, + uint8_t table_id) { - return rule_dpif_cast(rule_from_cls_rule( - classifier_lookup(&ofproto->up.tables[0], - flow))); + struct cls_rule *cls_rule; + struct classifier *cls; + + if (table_id >= N_TABLES) { + return NULL; + } + + cls = &ofproto->up.tables[table_id]; + if (flow->tos_frag & FLOW_FRAG_ANY + && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { + /* For OFPC_NORMAL frag_handling, we must pretend that transport ports + * are unavailable. */ + struct flow ofpc_normal_flow = *flow; + ofpc_normal_flow.tp_src = htons(0); + ofpc_normal_flow.tp_dst = htons(0); + cls_rule = classifier_lookup(cls, &ofpc_normal_flow); + } else { + cls_rule = classifier_lookup(cls, flow); + } + return rule_dpif_cast(rule_from_cls_rule(cls_rule)); +} + +static void +complete_operation(struct rule_dpif *rule) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + rule_invalidate(rule); + if (clogged) { + struct dpif_completion *c = xmalloc(sizeof *c); + c->op = rule->up.pending; + list_push_back(&ofproto->completions, &c->list_node); + } else { + ofoperation_complete(rule->up.pending, 0); + } } static struct rule * @@ -2553,7 +3295,8 @@ rule_construct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - struct rule_dpif *old_rule; + struct rule_dpif *victim; + uint8_t table_id; int error; error = validate_actions(rule->up.actions, rule->up.n_actions, @@ -2562,21 +3305,38 @@ rule_construct(struct rule *rule_) return error; } - old_rule = rule_dpif_cast(rule_from_cls_rule(classifier_find_rule_exactly( - &ofproto->up.tables[0], - &rule->up.cr))); - if (old_rule) { - ofproto_rule_destroy(&old_rule->up); - } - rule->used = rule->up.created; rule->packet_count = 0; rule->byte_count = 0; - list_init(&rule->facets); - classifier_insert(&ofproto->up.tables[0], &rule->up.cr); - ofproto->need_revalidate = true; + victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending)); + if (victim && !list_is_empty(&victim->facets)) { + struct facet *facet; + + rule->facets = victim->facets; + list_moved(&rule->facets); + LIST_FOR_EACH (facet, list_node, &rule->facets) { + /* XXX: We're only clearing our local counters here. It's possible + * that quite a few packets are unaccounted for in the datapath + * statistics. These will be accounted to the new rule instead of + * cleared as required. This could be fixed by clearing out the + * datapath statistics for this facet, but currently it doesn't + * seem worth it. */ + facet_reset_counters(facet); + facet->rule = rule; + } + } else { + /* Must avoid list_moved() in this case. */ + list_init(&rule->facets); + } + table_id = rule->up.table_id; + rule->tag = (victim ? victim->tag + : table_id == 0 ? 0 + : rule_calculate_tag(&rule->up.cr.flow, &rule->up.cr.wc, + ofproto->tables[table_id].basis)); + + complete_operation(rule); return 0; } @@ -2587,11 +3347,11 @@ rule_destruct(struct rule *rule_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct facet *facet, *next_facet; - classifier_remove(&ofproto->up.tables[0], &rule->up.cr); LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) { facet_revalidate(ofproto, facet); } - ofproto->need_revalidate = true; + + complete_operation(rule); } static void @@ -2627,14 +3387,18 @@ rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet) /* First look for a related facet. If we find one, account it to that. */ facet = facet_lookup_valid(ofproto, flow); if (facet && facet->rule == rule) { + if (!facet->may_install) { + facet_make_actions(ofproto, facet, packet); + } facet_execute(ofproto, facet, packet); return 0; } /* Otherwise, if 'rule' is in fact the correct rule for 'packet', then * create a new facet for it and use that. */ - if (rule_dpif_lookup(ofproto, flow) == rule) { - facet = facet_create(rule, flow, packet); + if (rule_dpif_lookup(ofproto, flow, 0) == rule) { + facet = facet_create(rule, flow); + facet_make_actions(ofproto, facet, packet); facet_execute(ofproto, facet, packet); facet_install(ofproto, facet, true); return 0; @@ -2657,23 +3421,24 @@ rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet) return 0; } -static int -rule_modify_actions(struct rule *rule_, - const union ofp_action *actions, size_t n_actions) +static void +rule_modify_actions(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); int error; - error = validate_actions(actions, n_actions, &rule->up.cr.flow, - ofproto->max_ports); - if (!error) { - ofproto->need_revalidate = true; + error = validate_actions(rule->up.actions, rule->up.n_actions, + &rule->up.cr.flow, ofproto->max_ports); + if (error) { + ofoperation_complete(rule->up.pending, error); + return; } - return error; + + complete_operation(rule); } -/* Sends 'packet' out of port 'odp_port' within 'p'. +/* Sends 'packet' out of port 'odp_port' within 'ofproto'. * Returns 0 if successful, otherwise a positive errno value. */ static int send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, @@ -2689,7 +3454,9 @@ send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, odp_flow_key_from_flow(&key, &flow); ofpbuf_init(&odp_actions, 32); - nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port); + compose_sflow_action(ofproto, &odp_actions, &flow, odp_port); + + nl_msg_put_u32(&odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); error = dpif_execute(ofproto->dpif, key.data, key.size, odp_actions.data, odp_actions.size, @@ -2703,153 +3470,422 @@ send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, return error; } -/* OpenFlow to ODP action translation. */ +/* OpenFlow to datapath action translation. */ static void do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx); -static bool xlate_normal(struct action_xlate_ctx *); +static void xlate_normal(struct action_xlate_ctx *); -static void -commit_odp_actions(struct action_xlate_ctx *ctx) +static size_t +put_userspace_action(const struct ofproto_dpif *ofproto, + struct ofpbuf *odp_actions, + const struct flow *flow, + const struct user_action_cookie *cookie) { - const struct flow *flow = &ctx->flow; - struct flow *base = &ctx->base_flow; - struct ofpbuf *odp_actions = ctx->odp_actions; + size_t offset; + uint32_t pid; - if (base->tun_id != flow->tun_id) { - nl_msg_put_be64(odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, flow->tun_id); - base->tun_id = flow->tun_id; - } + pid = dpif_port_get_pid(ofproto->dpif, + ofp_port_to_odp_port(flow->in_port)); - if (base->nw_src != flow->nw_src) { - nl_msg_put_be32(odp_actions, ODP_ACTION_ATTR_SET_NW_SRC, flow->nw_src); - base->nw_src = flow->nw_src; - } + offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); + nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); + nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, + cookie, sizeof *cookie); + nl_msg_end_nested(odp_actions, offset); - if (base->nw_dst != flow->nw_dst) { - nl_msg_put_be32(odp_actions, ODP_ACTION_ATTR_SET_NW_DST, flow->nw_dst); - base->nw_dst = flow->nw_dst; - } + return odp_actions->size - NLA_ALIGN(sizeof *cookie); +} - if (base->vlan_tci != flow->vlan_tci) { - if (!(flow->vlan_tci & htons(VLAN_CFI))) { - nl_msg_put_flag(odp_actions, ODP_ACTION_ATTR_STRIP_VLAN); - } else { - nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_DL_TCI, - flow->vlan_tci & ~htons(VLAN_CFI)); - } - base->vlan_tci = flow->vlan_tci; - } +/* Compose SAMPLE action for sFlow. */ +static size_t +compose_sflow_action(const struct ofproto_dpif *ofproto, + struct ofpbuf *odp_actions, + const struct flow *flow, + uint32_t odp_port) +{ + uint32_t port_ifindex; + uint32_t probability; + struct user_action_cookie cookie; + size_t sample_offset, actions_offset; + int cookie_offset, n_output; - if (base->tp_src != flow->tp_src) { - nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_TP_SRC, flow->tp_src); - base->tp_src = flow->tp_src; + if (!ofproto->sflow || flow->in_port == OFPP_NONE) { + return 0; } - if (base->tp_dst != flow->tp_dst) { - nl_msg_put_be16(odp_actions, ODP_ACTION_ATTR_SET_TP_DST, flow->tp_dst); - base->tp_dst = flow->tp_dst; + if (odp_port == OVSP_NONE) { + port_ifindex = 0; + n_output = 0; + } else { + port_ifindex = dpif_sflow_odp_port_to_ifindex(ofproto->sflow, odp_port); + n_output = 1; } - if (!eth_addr_equals(base->dl_src, flow->dl_src)) { - nl_msg_put_unspec(odp_actions, ODP_ACTION_ATTR_SET_DL_SRC, - flow->dl_src, ETH_ADDR_LEN); - memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); - } + sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE); - if (!eth_addr_equals(base->dl_dst, flow->dl_dst)) { - nl_msg_put_unspec(odp_actions, ODP_ACTION_ATTR_SET_DL_DST, - flow->dl_dst, ETH_ADDR_LEN); - memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); - } + /* Number of packets out of UINT_MAX to sample. */ + probability = dpif_sflow_get_probability(ofproto->sflow); + nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability); - if (ctx->base_priority != ctx->priority) { - if (ctx->priority) { - nl_msg_put_u32(odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, - ctx->priority); - } else { - nl_msg_put_flag(odp_actions, ODP_ACTION_ATTR_POP_PRIORITY); - } - ctx->base_priority = ctx->priority; - } + actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS); + + cookie.type = USER_ACTION_COOKIE_SFLOW; + cookie.data = port_ifindex; + cookie.n_output = n_output; + cookie.vlan_tci = 0; + cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie); + + nl_msg_end_nested(odp_actions, actions_offset); + nl_msg_end_nested(odp_actions, sample_offset); + return cookie_offset; } +/* SAMPLE action must be first action in any given list of actions. + * At this point we do not have all information required to build it. So try to + * build sample action as complete as possible. */ static void -add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) +add_sflow_action(struct action_xlate_ctx *ctx) { - const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port); - uint16_t odp_port = ofp_port_to_odp_port(ofp_port); - - if (ofport) { - if (ofport->up.opp.config & htonl(OFPPC_NO_FWD)) { - /* Forwarding disabled on port. */ - return; - } - } else { - /* - * We don't have an ofport record for this port, but it doesn't hurt to - * allow forwarding to it anyhow. Maybe such a port will appear later - * and we're pre-populating the flow table. - */ - } - - commit_odp_actions(ctx); - nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port); - ctx->nf_output_iface = ofp_port; + ctx->user_cookie_offset = compose_sflow_action(ctx->ofproto, + ctx->odp_actions, + &ctx->flow, OVSP_NONE); + ctx->sflow_odp_port = 0; + ctx->sflow_n_outputs = 0; } +/* Fix SAMPLE action according to data collected while composing ODP actions. + * We need to fix SAMPLE actions OVS_SAMPLE_ATTR_ACTIONS attribute, i.e. nested + * USERSPACE action's user-cookie which is required for sflow. */ static void -xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port) +fix_sflow_action(struct action_xlate_ctx *ctx) { - if (ctx->recurse < MAX_RESUBMIT_RECURSION) { - struct rule_dpif *rule; - uint16_t old_in_port; + const struct flow *base = &ctx->base_flow; + struct user_action_cookie *cookie; - /* Look up a flow with 'in_port' as the input port. Then restore the - * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will - * have surprising behavior). */ - old_in_port = ctx->flow.in_port; - ctx->flow.in_port = in_port; - rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow); - ctx->flow.in_port = old_in_port; + if (!ctx->user_cookie_offset) { + return; + } - if (ctx->resubmit_hook) { - ctx->resubmit_hook(ctx, rule); - } + cookie = ofpbuf_at(ctx->odp_actions, ctx->user_cookie_offset, + sizeof(*cookie)); + assert(cookie != NULL); + assert(cookie->type == USER_ACTION_COOKIE_SFLOW); - if (rule) { - ctx->recurse++; - do_xlate_actions(rule->up.actions, rule->up.n_actions, ctx); - ctx->recurse--; - } + if (ctx->sflow_n_outputs) { + cookie->data = dpif_sflow_odp_port_to_ifindex(ctx->ofproto->sflow, + ctx->sflow_odp_port); + } + if (ctx->sflow_n_outputs >= 255) { + cookie->n_output = 255; } else { - static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1); - - VLOG_ERR_RL(&recurse_rl, "NXAST_RESUBMIT recursed over %d times", - MAX_RESUBMIT_RECURSION); + cookie->n_output = ctx->sflow_n_outputs; } + cookie->vlan_tci = base->vlan_tci; } static void -flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) +commit_action__(struct ofpbuf *odp_actions, + enum ovs_action_attr act_type, + enum ovs_key_attr key_type, + const void *key, size_t key_size) { - struct ofport_dpif *ofport; + size_t offset = nl_msg_start_nested(odp_actions, act_type); - commit_odp_actions(ctx); - HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { - uint16_t ofp_port = ofport->up.ofp_port; - if (ofp_port != ctx->flow.in_port && !(ofport->up.opp.config & mask)) { - nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_OUTPUT, - ofport->odp_port); - } + nl_msg_put_unspec(odp_actions, key_type, key, key_size); + nl_msg_end_nested(odp_actions, offset); +} + +static void +commit_set_tun_id_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (base->tun_id == flow->tun_id) { + return; } + base->tun_id = flow->tun_id; - ctx->nf_output_iface = NF_OUT_FLOOD; + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_TUN_ID, &base->tun_id, sizeof(base->tun_id)); } static void -xlate_output_action__(struct action_xlate_ctx *ctx, - uint16_t port, uint16_t max_len) +commit_set_ether_addr_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + struct ovs_key_ethernet eth_key; + + if (eth_addr_equals(base->dl_src, flow->dl_src) && + eth_addr_equals(base->dl_dst, flow->dl_dst)) { + return; + } + + memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); + memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); + + memcpy(eth_key.eth_src, base->dl_src, ETH_ADDR_LEN); + memcpy(eth_key.eth_dst, base->dl_dst, ETH_ADDR_LEN); + + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_ETHERNET, ð_key, sizeof(eth_key)); +} + +static void +commit_vlan_action(struct action_xlate_ctx *ctx, ovs_be16 new_tci) +{ + struct flow *base = &ctx->base_flow; + + if (base->vlan_tci == new_tci) { + return; + } + + if (base->vlan_tci & htons(VLAN_CFI)) { + nl_msg_put_u16(ctx->odp_actions, OVS_ACTION_ATTR_POP, + OVS_KEY_ATTR_8021Q); + } + + if (new_tci & htons(VLAN_CFI)) { + struct ovs_key_8021q q_key; + + q_key.q_tpid = htons(ETH_TYPE_VLAN); + q_key.q_tci = new_tci & ~htons(VLAN_CFI); + + commit_action__(ctx->odp_actions, OVS_ACTION_ATTR_PUSH, + OVS_KEY_ATTR_8021Q, &q_key, sizeof(q_key)); + } + base->vlan_tci = new_tci; +} + +static void +commit_set_nw_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + int frag = base->tos_frag & FLOW_FRAG_MASK; + struct ovs_key_ipv4 ipv4_key; + + if (base->dl_type != htons(ETH_TYPE_IP) || + !base->nw_src || !base->nw_dst) { + return; + } + + if (base->nw_src == flow->nw_src && + base->nw_dst == flow->nw_dst && + base->tos_frag == flow->tos_frag) { + return; + } + + + memset(&ipv4_key, 0, sizeof(ipv4_key)); + ipv4_key.ipv4_src = base->nw_src = flow->nw_src; + ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; + ipv4_key.ipv4_proto = base->nw_proto; + ipv4_key.ipv4_tos = flow->tos_frag & IP_DSCP_MASK; + ipv4_key.ipv4_frag = (frag == 0 ? OVS_FRAG_TYPE_NONE + : frag == FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST + : OVS_FRAG_TYPE_LATER); + + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(ipv4_key)); +} + +static void +commit_set_port_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (!base->tp_src || !base->tp_dst) { + return; + } + + if (base->tp_src == flow->tp_src && + base->tp_dst == flow->tp_dst) { + return; + } + + if (flow->nw_proto == IPPROTO_TCP) { + struct ovs_key_tcp port_key; + + port_key.tcp_src = base->tp_src = flow->tp_src; + port_key.tcp_dst = base->tp_dst = flow->tp_dst; + + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_TCP, &port_key, sizeof(port_key)); + + } else if (flow->nw_proto == IPPROTO_UDP) { + struct ovs_key_udp port_key; + + port_key.udp_src = base->tp_src = flow->tp_src; + port_key.udp_dst = base->tp_dst = flow->tp_dst; + + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_UDP, &port_key, sizeof(port_key)); + } +} + +static void +commit_priority_action(struct action_xlate_ctx *ctx) +{ + if (ctx->base_priority == ctx->priority) { + return; + } + + if (ctx->priority) { + nl_msg_put_u32(ctx->odp_actions, + OVS_ACTION_ATTR_SET_PRIORITY, ctx->priority); + } else { + nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_PRIORITY); + } + ctx->base_priority = ctx->priority; +} + +static void +commit_odp_actions(struct action_xlate_ctx *ctx) +{ + const struct flow *flow = &ctx->flow; + struct flow *base = &ctx->base_flow; + struct ofpbuf *odp_actions = ctx->odp_actions; + + commit_set_tun_id_action(flow, base, odp_actions); + commit_set_ether_addr_action(flow, base, odp_actions); + commit_vlan_action(ctx, flow->vlan_tci); + commit_set_nw_action(flow, base, odp_actions); + commit_set_port_action(flow, base, odp_actions); + commit_priority_action(ctx); +} + +static void +compose_output_action(struct action_xlate_ctx *ctx, uint16_t odp_port) +{ + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); + ctx->sflow_odp_port = odp_port; + ctx->sflow_n_outputs++; +} + +static void +add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) +{ + const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port); + uint16_t odp_port = ofp_port_to_odp_port(ofp_port); + + if (ofport) { + if (ofport->up.opp.config & htonl(OFPPC_NO_FWD) + || !stp_forward_in_state(ofport->stp_state)) { + /* Forwarding disabled on port. */ + return; + } + } else { + /* + * We don't have an ofport record for this port, but it doesn't hurt to + * allow forwarding to it anyhow. Maybe such a port will appear later + * and we're pre-populating the flow table. + */ + } + + commit_odp_actions(ctx); + compose_output_action(ctx, odp_port); + ctx->nf_output_iface = ofp_port; +} + +static void +xlate_table_action(struct action_xlate_ctx *ctx, + uint16_t in_port, uint8_t table_id) +{ + if (ctx->recurse < MAX_RESUBMIT_RECURSION) { + struct ofproto_dpif *ofproto = ctx->ofproto; + struct rule_dpif *rule; + uint16_t old_in_port; + uint8_t old_table_id; + + old_table_id = ctx->table_id; + ctx->table_id = table_id; + + /* Look up a flow with 'in_port' as the input port. */ + old_in_port = ctx->flow.in_port; + ctx->flow.in_port = in_port; + rule = rule_dpif_lookup(ofproto, &ctx->flow, table_id); + + /* Tag the flow. */ + if (table_id > 0 && table_id < N_TABLES) { + struct table_dpif *table = &ofproto->tables[table_id]; + if (table->other_table) { + ctx->tags |= (rule + ? rule->tag + : rule_calculate_tag(&ctx->flow, + &table->other_table->wc, + table->basis)); + } + } + + /* Restore the original input port. Otherwise OFPP_NORMAL and + * OFPP_IN_PORT will have surprising behavior. */ + ctx->flow.in_port = old_in_port; + + if (ctx->resubmit_hook) { + ctx->resubmit_hook(ctx, rule); + } + + if (rule) { + ctx->recurse++; + do_xlate_actions(rule->up.actions, rule->up.n_actions, ctx); + ctx->recurse--; + } + + ctx->table_id = old_table_id; + } else { + static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1); + + VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times", + MAX_RESUBMIT_RECURSION); + } +} + +static void +xlate_resubmit_table(struct action_xlate_ctx *ctx, + const struct nx_action_resubmit *nar) +{ + uint16_t in_port; + uint8_t table_id; + + in_port = (nar->in_port == htons(OFPP_IN_PORT) + ? ctx->flow.in_port + : ntohs(nar->in_port)); + table_id = nar->table == 255 ? ctx->table_id : nar->table; + + xlate_table_action(ctx, in_port, table_id); +} + +static void +flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) +{ + struct ofport_dpif *ofport; + + commit_odp_actions(ctx); + HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { + uint16_t ofp_port = ofport->up.ofp_port; + if (ofp_port != ctx->flow.in_port + && !(ofport->up.opp.config & mask) + && stp_forward_in_state(ofport->stp_state)) { + compose_output_action(ctx, ofport->odp_port); + } + } + + ctx->nf_output_iface = NF_OUT_FLOOD; +} + +static void +compose_controller_action(struct action_xlate_ctx *ctx, int len) +{ + struct user_action_cookie cookie; + + cookie.type = USER_ACTION_COOKIE_CONTROLLER; + cookie.data = len; + cookie.n_output = 0; + cookie.vlan_tci = 0; + put_userspace_action(ctx->ofproto, ctx->odp_actions, &ctx->flow, &cookie); +} + +static void +xlate_output_action__(struct action_xlate_ctx *ctx, + uint16_t port, uint16_t max_len) { uint16_t prev_nf_output_iface = ctx->nf_output_iface; @@ -2860,7 +3896,7 @@ xlate_output_action__(struct action_xlate_ctx *ctx, add_output_action(ctx, ctx->flow.in_port); break; case OFPP_TABLE: - xlate_table_action(ctx, ctx->flow.in_port); + xlate_table_action(ctx, ctx->flow.in_port, ctx->table_id); break; case OFPP_NORMAL: xlate_normal(ctx); @@ -2873,11 +3909,13 @@ xlate_output_action__(struct action_xlate_ctx *ctx, break; case OFPP_CONTROLLER: commit_odp_actions(ctx); - nl_msg_put_u64(ctx->odp_actions, ODP_ACTION_ATTR_CONTROLLER, max_len); + compose_controller_action(ctx, max_len); break; case OFPP_LOCAL: add_output_action(ctx, OFPP_LOCAL); break; + case OFPP_NONE: + break; default: if (port != ctx->flow.in_port) { add_output_action(ctx, port); @@ -2895,6 +3933,19 @@ xlate_output_action__(struct action_xlate_ctx *ctx, } } +static void +xlate_output_reg_action(struct action_xlate_ctx *ctx, + const struct nx_action_output_reg *naor) +{ + uint64_t ofp_port; + + ofp_port = nxm_read_field_bits(naor->src, naor->ofs_nbits, &ctx->flow); + + if (ofp_port <= UINT16_MAX) { + xlate_output_action__(ctx, ofp_port, ntohs(naor->max_len)); + } +} + static void xlate_output_action(struct action_xlate_ctx *ctx, const struct ofp_action_output *oao) @@ -2918,14 +3969,16 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, return; } - /* Figure out ODP output port. */ + /* Figure out datapath output port. */ ofp_port = ntohs(oae->port); if (ofp_port == OFPP_IN_PORT) { ofp_port = ctx->flow.in_port; + } else if (ofp_port == ctx->flow.in_port) { + return; } odp_port = ofp_port_to_odp_port(ofp_port); - /* Add ODP actions. */ + /* Add datapath actions. */ ctx_priority = ctx->priority; ctx->priority = priority; add_output_action(ctx, odp_port); @@ -2974,7 +4027,7 @@ xlate_autopath(struct action_xlate_ctx *ctx, } else if (port->bundle->bond) { /* Autopath does not support VLAN hashing. */ struct ofport_dpif *slave = bond_choose_output_slave( - port->bundle->bond, &ctx->flow, OFP_VLAN_NONE, &ctx->tags); + port->bundle->bond, &ctx->flow, 0, &ctx->tags); if (slave) { ofp_port = slave->up.ofp_port; } @@ -2982,89 +4035,67 @@ xlate_autopath(struct action_xlate_ctx *ctx, autopath_execute(naa, &ctx->flow, ofp_port); } -static void -xlate_nicira_action(struct action_xlate_ctx *ctx, - const struct nx_action_header *nah) +static bool +slave_enabled_cb(uint16_t ofp_port, void *ofproto_) { - const struct nx_action_resubmit *nar; - const struct nx_action_set_tunnel *nast; - const struct nx_action_set_queue *nasq; - const struct nx_action_multipath *nam; - const struct nx_action_autopath *naa; - enum nx_action_subtype subtype = ntohs(nah->subtype); - ovs_be64 tun_id; - - assert(nah->vendor == htonl(NX_VENDOR_ID)); - switch (subtype) { - case NXAST_RESUBMIT: - nar = (const struct nx_action_resubmit *) nah; - xlate_table_action(ctx, ntohs(nar->in_port)); - break; - - case NXAST_SET_TUNNEL: - nast = (const struct nx_action_set_tunnel *) nah; - tun_id = htonll(ntohl(nast->tun_id)); - ctx->flow.tun_id = tun_id; - break; - - case NXAST_DROP_SPOOFED_ARP: - if (ctx->flow.dl_type == htons(ETH_TYPE_ARP)) { - /* XXX: It's not entirely clear whether or not we need to commit - * here. The safer thing to do is commit of course. Hopefully in - * the near future we can rip out NXAST_DROP_SPOOFED_ARP altogether - * and the point will be moot. */ - commit_odp_actions(ctx); - nl_msg_put_flag(ctx->odp_actions, - ODP_ACTION_ATTR_DROP_SPOOFED_ARP); - } - break; - - case NXAST_SET_QUEUE: - nasq = (const struct nx_action_set_queue *) nah; - xlate_set_queue_action(ctx, nasq); - break; - - case NXAST_POP_QUEUE: - ctx->priority = 0; - break; - - case NXAST_REG_MOVE: - nxm_execute_reg_move((const struct nx_action_reg_move *) nah, - &ctx->flow); - break; + struct ofproto_dpif *ofproto = ofproto_; + struct ofport_dpif *port; - case NXAST_REG_LOAD: - nxm_execute_reg_load((const struct nx_action_reg_load *) nah, - &ctx->flow); - break; + switch (ofp_port) { + case OFPP_IN_PORT: + case OFPP_TABLE: + case OFPP_NORMAL: + case OFPP_FLOOD: + case OFPP_ALL: + case OFPP_NONE: + return true; + case OFPP_CONTROLLER: /* Not supported by the bundle action. */ + return false; + default: + port = get_ofp_port(ofproto, ofp_port); + return port ? port->may_enable : false; + } +} - case NXAST_NOTE: - /* Nothing to do. */ - break; +static void +xlate_learn_action(struct action_xlate_ctx *ctx, + const struct nx_action_learn *learn) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + struct ofputil_flow_mod fm; + int error; - case NXAST_SET_TUNNEL64: - tun_id = ((const struct nx_action_set_tunnel64 *) nah)->tun_id; - ctx->flow.tun_id = tun_id; - break; + learn_execute(learn, &ctx->flow, &fm); - case NXAST_MULTIPATH: - nam = (const struct nx_action_multipath *) nah; - multipath_execute(nam, &ctx->flow); - break; + error = ofproto_flow_mod(&ctx->ofproto->up, &fm); + if (error && !VLOG_DROP_WARN(&rl)) { + char *msg = ofputil_error_to_string(error); + VLOG_WARN("learning action failed to modify flow table (%s)", msg); + free(msg); + } - case NXAST_AUTOPATH: - naa = (const struct nx_action_autopath *) nah; - xlate_autopath(ctx, naa); - break; + free(fm.actions); +} - /* If you add a new action here that modifies flow data, don't forget to - * update the flow key in ctx->flow at the same time. */ +static bool +may_receive(const struct ofport_dpif *port, struct action_xlate_ctx *ctx) +{ + if (port->up.opp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp) + ? htonl(OFPPC_NO_RECV_STP) + : htonl(OFPPC_NO_RECV))) { + return false; + } - case NXAST_SNAT__OBSOLETE: - default: - VLOG_DBG_RL(&rl, "unknown Nicira action type %d", (int) subtype); - break; + /* Only drop packets here if both forwarding and learning are + * disabled. If just learning is enabled, we need to have + * OFPP_NORMAL and the learning action have a look at the packet + * before we can drop it. */ + if (!stp_forward_in_state(port->stp_state) + && !stp_learn_in_state(port->stp_state)) { + return false; } + + return true; } static void @@ -3072,86 +4103,179 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx) { const struct ofport_dpif *port; - struct actions_iterator iter; const union ofp_action *ia; + size_t left; port = get_ofp_port(ctx->ofproto, ctx->flow.in_port); - if (port - && port->up.opp.config & htonl(OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && - port->up.opp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp) - ? htonl(OFPPC_NO_RECV_STP) - : htonl(OFPPC_NO_RECV))) { + if (port && !may_receive(port, ctx)) { /* Drop this flow. */ return; } - for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { - enum ofp_action_type type = ntohs(ia->type); + OFPUTIL_ACTION_FOR_EACH_UNSAFE (ia, left, in, n_in) { const struct ofp_action_dl_addr *oada; + const struct nx_action_resubmit *nar; + const struct nx_action_set_tunnel *nast; + const struct nx_action_set_queue *nasq; + const struct nx_action_multipath *nam; + const struct nx_action_autopath *naa; + const struct nx_action_bundle *nab; + const struct nx_action_output_reg *naor; + enum ofputil_action_code code; + ovs_be64 tun_id; + + if (ctx->exit) { + break; + } - switch (type) { - case OFPAT_OUTPUT: + code = ofputil_decode_action_unsafe(ia); + switch (code) { + case OFPUTIL_OFPAT_OUTPUT: xlate_output_action(ctx, &ia->output); break; - case OFPAT_SET_VLAN_VID: + case OFPUTIL_OFPAT_SET_VLAN_VID: ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK); ctx->flow.vlan_tci |= ia->vlan_vid.vlan_vid | htons(VLAN_CFI); break; - case OFPAT_SET_VLAN_PCP: + case OFPUTIL_OFPAT_SET_VLAN_PCP: ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK); ctx->flow.vlan_tci |= htons( (ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) | VLAN_CFI); break; - case OFPAT_STRIP_VLAN: + case OFPUTIL_OFPAT_STRIP_VLAN: ctx->flow.vlan_tci = htons(0); break; - case OFPAT_SET_DL_SRC: + case OFPUTIL_OFPAT_SET_DL_SRC: oada = ((struct ofp_action_dl_addr *) ia); memcpy(ctx->flow.dl_src, oada->dl_addr, ETH_ADDR_LEN); break; - case OFPAT_SET_DL_DST: + case OFPUTIL_OFPAT_SET_DL_DST: oada = ((struct ofp_action_dl_addr *) ia); memcpy(ctx->flow.dl_dst, oada->dl_addr, ETH_ADDR_LEN); break; - case OFPAT_SET_NW_SRC: + case OFPUTIL_OFPAT_SET_NW_SRC: ctx->flow.nw_src = ia->nw_addr.nw_addr; break; - case OFPAT_SET_NW_DST: + case OFPUTIL_OFPAT_SET_NW_DST: ctx->flow.nw_dst = ia->nw_addr.nw_addr; break; - case OFPAT_SET_NW_TOS: - ctx->flow.nw_tos = ia->nw_tos.nw_tos; + case OFPUTIL_OFPAT_SET_NW_TOS: + ctx->flow.tos_frag &= ~IP_DSCP_MASK; + ctx->flow.tos_frag |= ia->nw_tos.nw_tos & IP_DSCP_MASK; break; - case OFPAT_SET_TP_SRC: + case OFPUTIL_OFPAT_SET_TP_SRC: ctx->flow.tp_src = ia->tp_port.tp_port; break; - case OFPAT_SET_TP_DST: + case OFPUTIL_OFPAT_SET_TP_DST: ctx->flow.tp_dst = ia->tp_port.tp_port; break; - case OFPAT_VENDOR: - xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + case OFPUTIL_OFPAT_ENQUEUE: + xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); break; - case OFPAT_ENQUEUE: - xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); + case OFPUTIL_NXAST_RESUBMIT: + nar = (const struct nx_action_resubmit *) ia; + xlate_table_action(ctx, ntohs(nar->in_port), ctx->table_id); + break; + + case OFPUTIL_NXAST_RESUBMIT_TABLE: + xlate_resubmit_table(ctx, (const struct nx_action_resubmit *) ia); + break; + + case OFPUTIL_NXAST_SET_TUNNEL: + nast = (const struct nx_action_set_tunnel *) ia; + tun_id = htonll(ntohl(nast->tun_id)); + ctx->flow.tun_id = tun_id; + break; + + case OFPUTIL_NXAST_SET_QUEUE: + nasq = (const struct nx_action_set_queue *) ia; + xlate_set_queue_action(ctx, nasq); + break; + + case OFPUTIL_NXAST_POP_QUEUE: + ctx->priority = 0; + break; + + case OFPUTIL_NXAST_REG_MOVE: + nxm_execute_reg_move((const struct nx_action_reg_move *) ia, + &ctx->flow); + break; + + case OFPUTIL_NXAST_REG_LOAD: + nxm_execute_reg_load((const struct nx_action_reg_load *) ia, + &ctx->flow); + break; + + case OFPUTIL_NXAST_NOTE: + /* Nothing to do. */ + break; + + case OFPUTIL_NXAST_SET_TUNNEL64: + tun_id = ((const struct nx_action_set_tunnel64 *) ia)->tun_id; + ctx->flow.tun_id = tun_id; break; - default: - VLOG_DBG_RL(&rl, "unknown action type %d", (int) type); + case OFPUTIL_NXAST_MULTIPATH: + nam = (const struct nx_action_multipath *) ia; + multipath_execute(nam, &ctx->flow); + break; + + case OFPUTIL_NXAST_AUTOPATH: + naa = (const struct nx_action_autopath *) ia; + xlate_autopath(ctx, naa); + break; + + case OFPUTIL_NXAST_BUNDLE: + ctx->ofproto->has_bundle_action = true; + nab = (const struct nx_action_bundle *) ia; + xlate_output_action__(ctx, bundle_execute(nab, &ctx->flow, + slave_enabled_cb, + ctx->ofproto), 0); + break; + + case OFPUTIL_NXAST_BUNDLE_LOAD: + ctx->ofproto->has_bundle_action = true; + nab = (const struct nx_action_bundle *) ia; + bundle_execute_load(nab, &ctx->flow, slave_enabled_cb, + ctx->ofproto); + break; + + case OFPUTIL_NXAST_OUTPUT_REG: + naor = (const struct nx_action_output_reg *) ia; + xlate_output_reg_action(ctx, naor); + break; + + case OFPUTIL_NXAST_LEARN: + ctx->has_learn = true; + if (ctx->may_learn) { + xlate_learn_action(ctx, (const struct nx_action_learn *) ia); + } + break; + + case OFPUTIL_NXAST_EXIT: + ctx->exit = true; break; } } + + /* We've let OFPP_NORMAL and the learning action look at the packet, + * so drop it now if forwarding is disabled. */ + if (port && !stp_forward_in_state(port->stp_state)) { + ofpbuf_clear(ctx->odp_actions); + add_sflow_action(ctx); + } } static void @@ -3162,6 +4286,7 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx, ctx->ofproto = ofproto; ctx->flow = *flow; ctx->packet = packet; + ctx->may_learn = packet != NULL; ctx->resubmit_hook = NULL; } @@ -3172,26 +4297,58 @@ xlate_actions(struct action_xlate_ctx *ctx, COVERAGE_INC(ofproto_dpif_xlate); ctx->odp_actions = ofpbuf_new(512); + ofpbuf_reserve(ctx->odp_actions, NL_A_U32_SIZE); ctx->tags = 0; ctx->may_set_up_flow = true; + ctx->has_learn = false; + ctx->has_normal = false; ctx->nf_output_iface = NF_OUT_DROP; ctx->recurse = 0; ctx->priority = 0; ctx->base_priority = 0; ctx->base_flow = ctx->flow; + ctx->base_flow.tun_id = 0; + ctx->table_id = 0; + ctx->exit = false; + + if (ctx->flow.tos_frag & FLOW_FRAG_ANY) { + switch (ctx->ofproto->up.frag_handling) { + case OFPC_FRAG_NORMAL: + /* We must pretend that transport ports are unavailable. */ + ctx->flow.tp_src = ctx->base_flow.tp_src = htons(0); + ctx->flow.tp_dst = ctx->base_flow.tp_dst = htons(0); + break; + + case OFPC_FRAG_DROP: + return ctx->odp_actions; + + case OFPC_FRAG_REASM: + NOT_REACHED(); + + case OFPC_FRAG_NX_MATCH: + /* Nothing to do. */ + break; + } + } if (process_special(ctx->ofproto, &ctx->flow, ctx->packet)) { ctx->may_set_up_flow = false; + return ctx->odp_actions; } else { + add_sflow_action(ctx); do_xlate_actions(in, n_in, ctx); - } - /* Check with in-band control to see if we're allowed to set up this - * flow. */ - if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow, - ctx->odp_actions->data, - ctx->odp_actions->size)) { - ctx->may_set_up_flow = false; + if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow, + ctx->odp_actions->data, + ctx->odp_actions->size)) { + ctx->may_set_up_flow = false; + if (ctx->packet + && connmgr_msg_in_hook(ctx->ofproto->up.connmgr, &ctx->flow, + ctx->packet)) { + compose_output_action(ctx, OVSP_LOCAL); + } + } + fix_sflow_action(ctx); } return ctx->odp_actions; @@ -3201,7 +4358,7 @@ xlate_actions(struct action_xlate_ctx *ctx, struct dst { struct ofport_dpif *port; - uint16_t vlan; + uint16_t vid; }; struct dst_set { @@ -3216,20 +4373,71 @@ static void dst_set_free(struct dst_set *); static struct ofport_dpif *ofbundle_get_a_port(const struct ofbundle *); +/* Given 'vid', the VID obtained from the 802.1Q header that was received as + * part of a packet (specify 0 if there was no 802.1Q header), and 'in_bundle', + * the bundle on which the packet was received, returns the VLAN to which the + * packet belongs. + * + * Both 'vid' and the return value are in the range 0...4095. */ +static uint16_t +input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid) +{ + switch (in_bundle->vlan_mode) { + case PORT_VLAN_ACCESS: + return in_bundle->vlan; + break; + + case PORT_VLAN_TRUNK: + return vid; + + case PORT_VLAN_NATIVE_UNTAGGED: + case PORT_VLAN_NATIVE_TAGGED: + return vid ? vid : in_bundle->vlan; + + default: + NOT_REACHED(); + } +} + +/* Given 'vlan', the VLAN that a packet belongs to, and + * 'out_bundle', a bundle on which the packet is to be output, returns the VID + * that should be included in the 802.1Q header. (If the return value is 0, + * then the 802.1Q header should only be included in the packet if there is a + * nonzero PCP.) + * + * Both 'vlan' and the return value are in the range 0...4095. */ +static uint16_t +output_vlan_to_vid(const struct ofbundle *out_bundle, uint16_t vlan) +{ + switch (out_bundle->vlan_mode) { + case PORT_VLAN_ACCESS: + return 0; + + case PORT_VLAN_TRUNK: + case PORT_VLAN_NATIVE_TAGGED: + return vlan; + + case PORT_VLAN_NATIVE_UNTAGGED: + return vlan == out_bundle->vlan ? 0 : vlan; + + default: + NOT_REACHED(); + } +} + static bool set_dst(struct action_xlate_ctx *ctx, struct dst *dst, const struct ofbundle *in_bundle, const struct ofbundle *out_bundle) { - dst->vlan = (out_bundle->vlan >= 0 ? OFP_VLAN_NONE - : in_bundle->vlan >= 0 ? in_bundle->vlan - : ctx->flow.vlan_tci == 0 ? OFP_VLAN_NONE - : vlan_tci_to_vid(ctx->flow.vlan_tci)); + uint16_t vlan; + + vlan = input_vid_to_vlan(in_bundle, vlan_tci_to_vid(ctx->flow.vlan_tci)); + dst->vid = output_vlan_to_vid(out_bundle, vlan); dst->port = (!out_bundle->bond ? ofbundle_get_a_port(out_bundle) : bond_choose_output_slave(out_bundle->bond, &ctx->flow, - dst->vlan, &ctx->tags)); - + dst->vid, &ctx->tags)); return dst->port != NULL; } @@ -3280,7 +4488,7 @@ dst_is_duplicate(const struct dst_set *set, const struct dst *test) { size_t i; for (i = 0; i < set->n; i++) { - if (set->dsts[i].vlan == test->vlan + if (set->dsts[i].vid == test->vid && set->dsts[i].port == test->port) { return true; } @@ -3291,7 +4499,8 @@ dst_is_duplicate(const struct dst_set *set, const struct dst *test) static bool ofbundle_trunks_vlan(const struct ofbundle *bundle, uint16_t vlan) { - return bundle->vlan < 0 && vlan_bitmap_contains(bundle->trunks, vlan); + return (bundle->vlan_mode != PORT_VLAN_ACCESS + && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan))); } static bool @@ -3337,7 +4546,48 @@ compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan, static bool vlan_is_mirrored(const struct ofmirror *m, int vlan) { - return vlan_bitmap_contains(m->vlans, vlan); + return !m->vlans || bitmap_is_set(m->vlans, vlan); +} + +/* Returns true if a packet with Ethernet destination MAC 'dst' may be mirrored + * to a VLAN. In general most packets may be mirrored but we want to drop + * protocols that may confuse switches. */ +static bool +eth_dst_may_rspan(const uint8_t dst[ETH_ADDR_LEN]) +{ + /* If you change this function's behavior, please update corresponding + * documentation in vswitch.xml at the same time. */ + if (dst[0] != 0x01) { + /* All the currently banned MACs happen to start with 01 currently, so + * this is a quick way to eliminate most of the good ones. */ + } else { + if (eth_addr_is_reserved(dst)) { + /* Drop STP, IEEE pause frames, and other reserved protocols + * (01-80-c2-00-00-0x). */ + return false; + } + + if (dst[0] == 0x01 && dst[1] == 0x00 && dst[2] == 0x0c) { + /* Cisco OUI. */ + if ((dst[3] & 0xfe) == 0xcc && + (dst[4] & 0xfe) == 0xcc && + (dst[5] & 0xfe) == 0xcc) { + /* Drop the following protocols plus others following the same + pattern: + + CDP, VTP, DTP, PAgP (01-00-0c-cc-cc-cc) + Spanning Tree PVSTP+ (01-00-0c-cc-cc-cd) + STP Uplink Fast (01-00-0c-cd-cd-cd) */ + return false; + } + + if (!(dst[3] | dst[4] | dst[5])) { + /* Drop Inter Switch Link packets (01-00-0c-00-00-00). */ + return false; + } + } + } + return true; } static void @@ -3347,7 +4597,7 @@ compose_mirror_dsts(struct action_xlate_ctx *ctx, { struct ofproto_dpif *ofproto = ctx->ofproto; mirror_mask_t mirrors; - int flow_vlan; + uint16_t flow_vid; size_t i; mirrors = in_bundle->src_mirrors; @@ -3359,11 +4609,7 @@ compose_mirror_dsts(struct action_xlate_ctx *ctx, return; } - flow_vlan = vlan_tci_to_vid(ctx->flow.vlan_tci); - if (flow_vlan == 0) { - flow_vlan = OFP_VLAN_NONE; - } - + flow_vid = vlan_tci_to_vid(ctx->flow.vlan_tci); while (mirrors) { struct ofmirror *m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1]; if (vlan_is_mirrored(m, vlan)) { @@ -3374,27 +4620,22 @@ compose_mirror_dsts(struct action_xlate_ctx *ctx, && !dst_is_duplicate(set, &dst)) { dst_set_add(set, &dst); } - } else { + } else if (eth_dst_may_rspan(ctx->flow.dl_dst)) { struct ofbundle *bundle; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { if (ofbundle_includes_vlan(bundle, m->out_vlan) && set_dst(ctx, &dst, in_bundle, bundle)) { - if (bundle->vlan < 0) { - dst.vlan = m->out_vlan; - } + /* set_dst() got dst->vid from the input packet's VLAN, + * not from m->out_vlan, so recompute it. */ + dst.vid = output_vlan_to_vid(bundle, m->out_vlan); + if (dst_is_duplicate(set, &dst)) { continue; } - /* Use the vlan tag on the original flow instead of - * the one passed in the vlan parameter. This ensures - * that we compare the vlan from before any implicit - * tagging tags place. This is necessary because - * dst->vlan is the final vlan, after removing implicit - * tags. */ - if (bundle == in_bundle && dst.vlan == flow_vlan) { + if (bundle == in_bundle && dst.vid == flow_vid) { /* Don't send out input port on same VLAN. */ continue; } @@ -3412,47 +4653,47 @@ compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan, const struct ofbundle *in_bundle, const struct ofbundle *out_bundle) { - uint16_t initial_vlan, cur_vlan; + uint16_t initial_vid, cur_vid; const struct dst *dst; struct dst_set set; dst_set_init(&set); compose_dsts(ctx, vlan, in_bundle, out_bundle, &set); compose_mirror_dsts(ctx, vlan, in_bundle, &set); + if (!set.n) { + dst_set_free(&set); + return; + } /* Output all the packets we can without having to change the VLAN. */ - initial_vlan = vlan_tci_to_vid(ctx->flow.vlan_tci); - if (initial_vlan == 0) { - initial_vlan = OFP_VLAN_NONE; - } + commit_odp_actions(ctx); + initial_vid = vlan_tci_to_vid(ctx->flow.vlan_tci); for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) { - if (dst->vlan != initial_vlan) { + if (dst->vid != initial_vid) { continue; } - nl_msg_put_u32(ctx->odp_actions, - ODP_ACTION_ATTR_OUTPUT, dst->port->odp_port); + compose_output_action(ctx, dst->port->odp_port); } /* Then output the rest. */ - cur_vlan = initial_vlan; + cur_vid = initial_vid; for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) { - if (dst->vlan == initial_vlan) { + if (dst->vid == initial_vid) { continue; } - if (dst->vlan != cur_vlan) { - if (dst->vlan == OFP_VLAN_NONE) { - nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_STRIP_VLAN); - } else { - ovs_be16 tci; - tci = htons(dst->vlan & VLAN_VID_MASK); - tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK); - nl_msg_put_be16(ctx->odp_actions, - ODP_ACTION_ATTR_SET_DL_TCI, tci); + if (dst->vid != cur_vid) { + ovs_be16 tci; + + tci = htons(dst->vid); + tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK); + if (tci) { + tci |= htons(VLAN_CFI); } - cur_vlan = dst->vlan; + commit_vlan_action(ctx, tci); + + cur_vid = dst->vid; } - nl_msg_put_u32(ctx->odp_actions, - ODP_ACTION_ATTR_OUTPUT, dst->port->odp_port); + compose_output_action(ctx, dst->port->odp_port); } dst_set_free(&set); @@ -3467,8 +4708,9 @@ flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow, struct ofbundle *in_bundle, bool have_packet) { int vlan = vlan_tci_to_vid(flow->vlan_tci); - if (in_bundle->vlan >= 0) { - if (vlan) { + if (vlan) { + if (in_bundle->vlan_mode == PORT_VLAN_ACCESS) { + /* Drop tagged packet on access port */ if (have_packet) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged " @@ -3478,10 +4720,10 @@ flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow, in_bundle->name, in_bundle->vlan); } return -1; - } - vlan = in_bundle->vlan; - } else { - if (!ofbundle_includes_vlan(in_bundle, vlan)) { + } else if (ofbundle_includes_vlan(in_bundle, vlan)) { + return vlan; + } else { + /* Drop packets from a VLAN not member of the trunk */ if (have_packet) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged " @@ -3491,9 +4733,13 @@ flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow, } return -1; } + } else { + if (in_bundle->vlan_mode != PORT_VLAN_TRUNK) { + return in_bundle->vlan; + } else { + return ofbundle_includes_vlan(in_bundle, 0) ? 0 : -1; + } } - - return vlan; } /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after @@ -3598,6 +4844,7 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, "port %"PRIu16, ofproto->up.name, flow->in_port); } + *vlanp = -1; return false; } *vlanp = vlan = flow_get_vlan(ofproto, flow, in_bundle, have_packet); @@ -3605,8 +4852,9 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, return false; } - /* Drop frames for reserved multicast addresses. */ - if (eth_addr_is_reserved(flow->dl_dst)) { + /* Drop frames for reserved multicast addresses only if forward_bpdu + * option is absent. */ + if (eth_addr_is_reserved(flow->dl_dst) && !ofproto->up.forward_bpdu) { return false; } @@ -3646,10 +4894,7 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, return true; } -/* If the composed actions may be applied to any packet in the given 'flow', - * returns true. Otherwise, the actions should only be applied to 'packet', or - * not at all, if 'packet' was NULL. */ -static bool +static void xlate_normal(struct action_xlate_ctx *ctx) { struct ofbundle *in_bundle; @@ -3657,6 +4902,8 @@ xlate_normal(struct action_xlate_ctx *ctx) struct mac_entry *mac; int vlan; + ctx->has_normal = true; + /* Check whether we should drop packets in this flow. */ if (!is_admissible(ctx->ofproto, &ctx->flow, ctx->packet != NULL, &ctx->tags, &vlan, &in_bundle)) { @@ -3664,8 +4911,8 @@ xlate_normal(struct action_xlate_ctx *ctx) goto done; } - /* Learn source MAC (but don't try to learn from revalidation). */ - if (ctx->packet) { + /* Learn source MAC. */ + if (ctx->may_learn) { update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle); } @@ -3680,7 +4927,8 @@ xlate_normal(struct action_xlate_ctx *ctx) * of time where we could learn from a packet reflected on a bond and * blackhole packets before the learning table is updated to reflect * the correct port. */ - return false; + ctx->may_set_up_flow = false; + return; } else { out_bundle = OFBUNDLE_FLOOD; } @@ -3694,26 +4942,134 @@ done: if (in_bundle) { compose_actions(ctx, vlan, in_bundle, out_bundle); } - - return true; } -static bool -get_drop_frags(struct ofproto *ofproto_) +/* Optimized flow revalidation. + * + * It's a difficult problem, in general, to tell which facets need to have + * their actions recalculated whenever the OpenFlow flow table changes. We + * don't try to solve that general problem: for most kinds of OpenFlow flow + * table changes, we recalculate the actions for every facet. This is + * relatively expensive, but it's good enough if the OpenFlow flow table + * doesn't change very often. + * + * However, we can expect one particular kind of OpenFlow flow table change to + * happen frequently: changes caused by MAC learning. To avoid wasting a lot + * of CPU on revalidating every facet whenever MAC learning modifies the flow + * table, we add a special case that applies to flow tables in which every rule + * has the same form (that is, the same wildcards), except that the table is + * also allowed to have a single "catch-all" flow that matches all packets. We + * optimize this case by tagging all of the facets that resubmit into the table + * and invalidating the same tag whenever a flow changes in that table. The + * end result is that we revalidate just the facets that need it (and sometimes + * a few more, but not all of the facets or even all of the facets that + * resubmit to the table modified by MAC learning). */ + +/* Calculates the tag to use for 'flow' and wildcards 'wc' when it is inserted + * into an OpenFlow table with the given 'basis'. */ +static uint32_t +rule_calculate_tag(const struct flow *flow, const struct flow_wildcards *wc, + uint32_t secret) +{ + if (flow_wildcards_is_catchall(wc)) { + return 0; + } else { + struct flow tag_flow = *flow; + flow_zero_wildcards(&tag_flow, wc); + return tag_create_deterministic(flow_hash(&tag_flow, secret)); + } +} + +/* Following a change to OpenFlow table 'table_id' in 'ofproto', update the + * taggability of that table. + * + * This function must be called after *each* change to a flow table. If you + * skip calling it on some changes then the pointer comparisons at the end can + * be invalid if you get unlucky. For example, if a flow removal causes a + * cls_table to be destroyed and then a flow insertion causes a cls_table with + * different wildcards to be created with the same address, then this function + * will incorrectly skip revalidation. */ +static void +table_update_taggable(struct ofproto_dpif *ofproto, uint8_t table_id) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - bool drop_frags; + struct table_dpif *table = &ofproto->tables[table_id]; + const struct classifier *cls = &ofproto->up.tables[table_id]; + struct cls_table *catchall, *other; + struct cls_table *t; + + catchall = other = NULL; + + switch (hmap_count(&cls->tables)) { + case 0: + /* We could tag this OpenFlow table but it would make the logic a + * little harder and it's a corner case that doesn't seem worth it + * yet. */ + break; + + case 1: + case 2: + HMAP_FOR_EACH (t, hmap_node, &cls->tables) { + if (cls_table_is_catchall(t)) { + catchall = t; + } else if (!other) { + other = t; + } else { + /* Indicate that we can't tag this by setting both tables to + * NULL. (We know that 'catchall' is already NULL.) */ + other = NULL; + } + } + break; + + default: + /* Can't tag this table. */ + break; + } - dpif_get_drop_frags(ofproto->dpif, &drop_frags); - return drop_frags; + if (table->catchall_table != catchall || table->other_table != other) { + table->catchall_table = catchall; + table->other_table = other; + ofproto->need_revalidate = true; + } } +/* Given 'rule' that has changed in some way (either it is a rule being + * inserted, a rule being deleted, or a rule whose actions are being + * modified), marks facets for revalidation to ensure that packets will be + * forwarded correctly according to the new state of the flow table. + * + * This function must be called after *each* change to a flow table. See + * the comment on table_update_taggable() for more information. */ static void -set_drop_frags(struct ofproto *ofproto_, bool drop_frags) +rule_invalidate(const struct rule_dpif *rule) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + table_update_taggable(ofproto, rule->up.table_id); + + if (!ofproto->need_revalidate) { + struct table_dpif *table = &ofproto->tables[rule->up.table_id]; + + if (table->other_table && rule->tag) { + tag_set_add(&ofproto->revalidate_set, rule->tag); + } else { + ofproto->need_revalidate = true; + } + } +} + +static bool +set_frag_handling(struct ofproto *ofproto_, + enum ofp_config_flags frag_handling) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - dpif_set_drop_frags(ofproto->dpif, drop_frags); + if (frag_handling != OFPC_FRAG_REASM) { + ofproto->need_revalidate = true; + return true; + } else { + return false; + } } static int @@ -3794,7 +5150,8 @@ struct ofproto_trace { }; static void -trace_format_rule(struct ds *result, int level, const struct rule *rule) +trace_format_rule(struct ds *result, uint8_t table_id, int level, + const struct rule_dpif *rule) { ds_put_char_multiple(result, '\t', level); if (!rule) { @@ -3802,15 +5159,14 @@ trace_format_rule(struct ds *result, int level, const struct rule *rule) return; } - ds_put_format(result, "Rule: cookie=%#"PRIx64" ", - ntohll(rule->flow_cookie)); - cls_rule_format(&rule->cr, result); + ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ", + table_id, ntohll(rule->up.flow_cookie)); + cls_rule_format(&rule->up.cr, result); ds_put_char(result, '\n'); ds_put_char_multiple(result, '\t', level); ds_put_cstr(result, "OpenFlow "); - ofp_print_actions(result, (const struct ofp_action_header *) rule->actions, - rule->n_actions * sizeof *rule->actions); + ofp_print_actions(result, rule->up.actions, rule->up.n_actions); ds_put_char(result, '\n'); } @@ -3829,6 +5185,20 @@ trace_format_flow(struct ds *result, int level, const char *title, ds_put_char(result, '\n'); } +static void +trace_format_regs(struct ds *result, int level, const char *title, + struct ofproto_trace *trace) +{ + size_t i; + + ds_put_char_multiple(result, '\t', level); + ds_put_format(result, "%s:", title); + for (i = 0; i < FLOW_N_REGS; i++) { + ds_put_format(result, " reg%zu=0x%"PRIx32, i, trace->flow.regs[i]); + } + ds_put_char(result, '\n'); +} + static void trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) { @@ -3837,33 +5207,85 @@ trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) ds_put_char(result, '\n'); trace_format_flow(result, ctx->recurse + 1, "Resubmitted flow", trace); - trace_format_rule(result, ctx->recurse + 1, &rule->up); + trace_format_regs(result, ctx->recurse + 1, "Resubmitted regs", trace); + trace_format_rule(result, ctx->table_id, ctx->recurse + 1, rule); } static void ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, void *aux OVS_UNUSED) { - char *dpname, *in_port_s, *tun_id_s, *packet_s; + char *dpname, *arg1, *arg2, *arg3; char *args = xstrdup(args_); char *save_ptr = NULL; struct ofproto_dpif *ofproto; - struct ofpbuf packet; + struct ofpbuf odp_key; + struct ofpbuf *packet; struct rule_dpif *rule; struct ds result; struct flow flow; - uint16_t in_port; - ovs_be64 tun_id; char *s; - ofpbuf_init(&packet, strlen(args) / 2); + packet = NULL; + ofpbuf_init(&odp_key, 0); ds_init(&result); dpname = strtok_r(args, " ", &save_ptr); - tun_id_s = strtok_r(NULL, " ", &save_ptr); - in_port_s = strtok_r(NULL, " ", &save_ptr); - packet_s = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ - if (!dpname || !in_port_s || !packet_s) { + arg1 = strtok_r(NULL, " ", &save_ptr); + arg2 = strtok_r(NULL, " ", &save_ptr); + arg3 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ + if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) { + /* ofproto/trace dpname flow [-generate] */ + int error; + + /* Convert string to datapath key. */ + ofpbuf_init(&odp_key, 0); + error = odp_flow_key_from_string(arg1, &odp_key); + if (error) { + unixctl_command_reply(conn, 501, "Bad flow syntax"); + goto exit; + } + + /* Convert odp_key to flow. */ + error = odp_flow_key_to_flow(odp_key.data, odp_key.size, &flow); + if (error) { + unixctl_command_reply(conn, 501, "Invalid flow"); + goto exit; + } + + /* Generate a packet, if requested. */ + if (arg2) { + packet = ofpbuf_new(0); + flow_compose(packet, &flow); + } + } else if (dpname && arg1 && arg2 && arg3) { + /* ofproto/trace dpname tun_id in_port packet */ + uint16_t in_port; + ovs_be64 tun_id; + + tun_id = htonll(strtoull(arg1, NULL, 0)); + in_port = ofp_port_to_odp_port(atoi(arg2)); + + packet = ofpbuf_new(strlen(args) / 2); + arg3 = ofpbuf_put_hex(packet, arg3, NULL); + arg3 += strspn(arg3, " "); + if (*arg3 != '\0') { + unixctl_command_reply(conn, 501, "Trailing garbage in command"); + goto exit; + } + if (packet->size < ETH_HEADER_LEN) { + unixctl_command_reply(conn, 501, + "Packet data too short for Ethernet"); + goto exit; + } + + ds_put_cstr(&result, "Packet: "); + s = ofp_packet_to_string(packet->data, packet->size, packet->size); + ds_put_cstr(&result, s); + free(s); + + flow_extract(packet, tun_id, in_port, &flow); + } else { unixctl_command_reply(conn, 501, "Bad command syntax"); goto exit; } @@ -3875,39 +5297,19 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, goto exit; } - tun_id = htonll(strtoull(tun_id_s, NULL, 0)); - in_port = ofp_port_to_odp_port(atoi(in_port_s)); - - packet_s = ofpbuf_put_hex(&packet, packet_s, NULL); - packet_s += strspn(packet_s, " "); - if (*packet_s != '\0') { - unixctl_command_reply(conn, 501, "Trailing garbage in command"); - goto exit; - } - if (packet.size < ETH_HEADER_LEN) { - unixctl_command_reply(conn, 501, "Packet data too short for Ethernet"); - goto exit; - } - - ds_put_cstr(&result, "Packet: "); - s = ofp_packet_to_string(packet.data, packet.size, packet.size); - ds_put_cstr(&result, s); - free(s); - - flow_extract(&packet, tun_id, in_port, &flow); ds_put_cstr(&result, "Flow: "); flow_format(&result, &flow); ds_put_char(&result, '\n'); - rule = rule_dpif_lookup(ofproto, &flow); - trace_format_rule(&result, 0, &rule->up); + rule = rule_dpif_lookup(ofproto, &flow, 0); + trace_format_rule(&result, 0, 0, rule); if (rule) { struct ofproto_trace trace; struct ofpbuf *odp_actions; trace.result = &result; trace.flow = flow; - action_xlate_ctx_init(&trace.ctx, ofproto, &flow, &packet); + action_xlate_ctx_init(&trace.ctx, ofproto, &flow, packet); trace.ctx.resubmit_hook = trace_resubmit; odp_actions = xlate_actions(&trace.ctx, rule->up.actions, rule->up.n_actions); @@ -3917,16 +5319,42 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, ds_put_cstr(&result, "Datapath actions: "); format_odp_actions(&result, odp_actions->data, odp_actions->size); ofpbuf_delete(odp_actions); + + if (!trace.ctx.may_set_up_flow) { + if (packet) { + ds_put_cstr(&result, "\nThis flow is not cachable."); + } else { + ds_put_cstr(&result, "\nThe datapath actions are incomplete--" + "for complete actions, please supply a packet."); + } + } } unixctl_command_reply(conn, 200, ds_cstr(&result)); exit: ds_destroy(&result); - ofpbuf_uninit(&packet); + ofpbuf_delete(packet); + ofpbuf_uninit(&odp_key); free(args); } +static void +ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, + const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +{ + clogged = true; + unixctl_command_reply(conn, 200, NULL); +} + +static void +ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, + const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +{ + clogged = false; + unixctl_command_reply(conn, 200, NULL); +} + static void ofproto_dpif_unixctl_init(void) { @@ -3936,8 +5364,13 @@ ofproto_dpif_unixctl_init(void) } registered = true; - unixctl_command_register("ofproto/trace", ofproto_unixctl_trace, NULL); - unixctl_command_register("fdb/show", ofproto_unixctl_fdb_show, NULL); + unixctl_command_register("ofproto/trace", + "bridge {tun_id in_port packet | odp_flow [-generate]}", + ofproto_unixctl_trace, NULL); + unixctl_command_register("fdb/show", "bridge", ofproto_unixctl_fdb_show, + NULL); + unixctl_command_register("ofproto/clog", "", ofproto_dpif_clog, NULL); + unixctl_command_register("ofproto/unclog", "", ofproto_dpif_unclog, NULL); } const struct ofproto_class ofproto_dpif_class = { @@ -3976,17 +5409,22 @@ const struct ofproto_class ofproto_dpif_class = { rule_get_stats, rule_execute, rule_modify_actions, - get_drop_frags, - set_drop_frags, + set_frag_handling, packet_out, set_netflow, get_netflow_ids, set_sflow, set_cfm, get_cfm_fault, + get_cfm_remote_mpids, + set_stp, + get_stp_status, + set_stp_port, + get_stp_port_status, bundle_set, bundle_remove, mirror_set, set_flood_vlans, is_mirror_output_bundle, + forward_bpdu_changed, };