X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=cca23cac7be20b5d1a152333a2ea5ae62363b7af;hb=2716b6379dc86331330d78b496985e908320ddfa;hp=c54cc5f42bd922c91adecfb73ba58de6edef3c15;hpb=530e7c1c40a425136ab2e1ab5211f95e12f26f64;p=sliver-openvswitch.git

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index c54cc5f42..cca23cac7 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -176,8 +176,18 @@ static void bundle_destroy(struct ofbundle *);
 static void bundle_del_port(struct ofport_dpif *);
 static void bundle_run(struct ofbundle *);
 static void bundle_wait(struct ofbundle *);
-static struct ofport_dpif *lookup_input_bundle(struct ofproto_dpif *,
-                                               uint16_t in_port, bool warn);
+static struct ofbundle *lookup_input_bundle(struct ofproto_dpif *,
+                                            uint16_t in_port, bool warn);
+
+/* A controller may use OFPP_NONE as the ingress port to indicate that
+ * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
+ * when an input bundle is needed for validation (e.g., mirroring or
+ * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
+ * any 'port' structs, so care must be taken when dealing with it. */
+static struct ofbundle ofpp_none_bundle = {
+    .name      = "OFPP_NONE",
+    .vlan_mode = PORT_VLAN_TRUNK
+};
 
 static void stp_run(struct ofproto_dpif *ofproto);
 static void stp_wait(struct ofproto_dpif *ofproto);
@@ -477,6 +487,7 @@ struct table_dpif {
 };
 
 struct ofproto_dpif {
+    struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
     struct ofproto up;
     struct dpif *dpif;
     int max_ports;
@@ -522,6 +533,9 @@ struct ofproto_dpif {
  * for debugging the asynchronous flow_mod implementation.) */
 static bool clogged;
 
+/* All existing ofproto_dpif instances, indexed by ->up.name. */
+static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
+
 static void ofproto_dpif_unixctl_init(void);
 
 static struct ofproto_dpif *
@@ -667,6 +681,9 @@ construct(struct ofproto *ofproto_, int *n_tablesp)
     hmap_init(&ofproto->vlandev_map);
     hmap_init(&ofproto->realdev_vid_map);
 
+    hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
+                hash_string(ofproto->up.name, 0));
+
     *n_tablesp = N_TABLES;
     return 0;
 }
@@ -691,6 +708,7 @@ destruct(struct ofproto *ofproto_)
     struct classifier *table;
     int i;
 
+    hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
     complete_operations(ofproto);
 
     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
@@ -1388,10 +1406,17 @@ set_queues(struct ofport *ofport_,
 
 /* Bundles. */
 
-/* Expires all MAC learning entries associated with 'port' and forces ofproto
- * to revalidate every flow. */
+/* Expires all MAC learning entries associated with 'bundle' and forces its
+ * ofproto to revalidate every flow.
+ *
+ * Normally MAC learning entries are removed only from the ofproto associated
+ * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
+ * are removed from every ofproto.  When patch ports and SLB bonds are in use
+ * and a VM migration happens and the gratuitous ARPs are somehow lost, this
+ * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
+ * with the host from which it migrated. */
 static void
-bundle_flush_macs(struct ofbundle *bundle)
+bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
 {
     struct ofproto_dpif *ofproto = bundle->ofproto;
     struct mac_learning *ml = ofproto->ml;
@@ -1400,6 +1425,23 @@ bundle_flush_macs(struct ofbundle *bundle)
     ofproto->need_revalidate = true;
     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
         if (mac->port.p == bundle) {
+            if (all_ofprotos) {
+                struct ofproto_dpif *o;
+
+                HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+                    if (o != ofproto) {
+                        struct mac_entry *e;
+
+                        e = mac_learning_lookup(o->ml, mac->mac, mac->vlan,
+                                                NULL);
+                        if (e) {
+                            tag_set_add(&o->revalidate_set, e->tag);
+                            mac_learning_expire(o->ml, e);
+                        }
+                    }
+                }
+            }
+
             mac_learning_expire(ml, mac);
         }
     }
@@ -1533,7 +1575,7 @@ bundle_destroy(struct ofbundle *bundle)
         bundle_del_port(port);
     }
 
-    bundle_flush_macs(bundle);
+    bundle_flush_macs(bundle, true);
     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
     free(bundle->name);
     free(bundle->trunks);
@@ -1721,7 +1763,7 @@ bundle_set(struct ofproto *ofproto_, void *aux,
     /* If we changed something that would affect MAC learning, un-learn
      * everything on this port and force flow revalidation. */
     if (need_flush) {
-        bundle_flush_macs(bundle);
+        bundle_flush_macs(bundle, false);
     }
 
     return 0;
@@ -2487,6 +2529,15 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
             struct flow_miss_op *op = &ops[(*n_ops)++];
             struct dpif_execute *execute = &op->dpif_op.execute;
 
+            if (flow->vlan_tci != subfacet->initial_tci) {
+                /* This packet was received on a VLAN splinter port.  We added
+                 * a VLAN to the packet to make the packet resemble the flow,
+                 * but the actions were composed assuming that the packet
+                 * contained no VLAN.  So, we must remove the VLAN header from
+                 * the packet before trying to execute the actions. */
+                eth_pop_vlan(packet);
+            }
+
             op->subfacet = subfacet;
             execute->type = DPIF_OP_EXECUTE;
             execute->key = miss->key;
@@ -2515,10 +2566,27 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
     }
 }
 
+/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of
+ * OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns
+ * an ODP_FIT_* value that indicates how well 'key' fits our expectations for
+ * what a flow key should contain.
+ *
+ * This function also includes some logic to help make VLAN splinters
+ * transparent to the rest of the upcall processing logic.  In particular, if
+ * the extracted in_port is a VLAN splinter port, it replaces flow->in_port by
+ * the "real" port, sets flow->vlan_tci correctly for the VLAN of the VLAN
+ * splinter port, and pushes a VLAN header onto 'packet' (if it is nonnull).
+ *
+ * Sets '*initial_tci' to the VLAN TCI with which the packet was really
+ * received, that is, the actual VLAN TCI extracted by odp_flow_key_to_flow().
+ * (This differs from the value returned in flow->vlan_tci only for packets
+ * received on VLAN splinters.)
+ */
 static enum odp_key_fitness
 ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
                               const struct nlattr *key, size_t key_len,
-                              struct flow *flow, ovs_be16 *initial_tci)
+                              struct flow *flow, ovs_be16 *initial_tci,
+                              struct ofpbuf *packet)
 {
     enum odp_key_fitness fitness;
     uint16_t realdev;
@@ -2536,6 +2604,23 @@ ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
          * with the VLAN device's VLAN ID. */
         flow->in_port = realdev;
         flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
+        if (packet) {
+            /* Make the packet resemble the flow, so that it gets sent to an
+             * OpenFlow controller properly, so that it looks correct for
+             * sFlow, and so that flow_extract() will get the correct vlan_tci
+             * if it is called on 'packet'.
+             *
+             * The allocated space inside 'packet' probably also contains
+             * 'key', that is, both 'packet' and 'key' are probably part of a
+             * struct dpif_upcall (see the large comment on that structure
+             * definition), so pushing data on 'packet' is in general not a
+             * good idea since it could overwrite 'key' or free it as a side
+             * effect.  However, it's OK in this special case because we know
+             * that 'packet' is inside a Netlink attribute: pushing 4 bytes
+             * will just overwrite the 4-byte "struct nlattr", which is fine
+             * since we don't need that header anymore. */
+            eth_push_vlan(packet, flow->vlan_tci);
+        }
 
         /* Let the caller know that we can't reproduce 'key' from 'flow'. */
         if (fitness == ODP_FIT_PERFECT) {
@@ -2578,7 +2663,8 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls,
          * then set 'flow''s header pointers. */
         fitness = ofproto_dpif_extract_flow_key(ofproto,
                                                 upcall->key, upcall->key_len,
-                                                &flow, &initial_tci);
+                                                &flow, &initial_tci,
+                                                upcall->packet);
         if (fitness == ODP_FIT_ERROR) {
             ofpbuf_delete(upcall->packet);
             continue;
@@ -2655,7 +2741,7 @@ handle_userspace_upcall(struct ofproto_dpif *ofproto,
 
     fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key,
                                             upcall->key_len, &flow,
-                                            &initial_tci);
+                                            &initial_tci, upcall->packet);
     if (fitness == ODP_FIT_ERROR) {
         ofpbuf_delete(upcall->packet);
         return;
@@ -4716,6 +4802,11 @@ input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid)
 static bool
 input_vid_is_valid(uint16_t vid, struct ofbundle *in_bundle, bool warn)
 {
+    /* Allow any VID on the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return true;
+    }
+
     switch (in_bundle->vlan_mode) {
     case PORT_VLAN_ACCESS:
         if (vid) {
@@ -4898,22 +4989,17 @@ add_mirror_actions(struct action_xlate_ctx *ctx, const struct flow *orig_flow)
 {
     struct ofproto_dpif *ofproto = ctx->ofproto;
     mirror_mask_t mirrors;
-    struct ofport_dpif *in_port;
     struct ofbundle *in_bundle;
     uint16_t vlan;
     uint16_t vid;
     const struct nlattr *a;
     size_t left;
 
-    /* Obtain in_port from orig_flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
-                                  ctx->packet != NULL);
-    if (!in_port) {
+    in_bundle = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
+                                    ctx->packet != NULL);
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
     mirrors = in_bundle->src_mirrors;
 
     /* Drop frames on bundles reserved for mirroring. */
@@ -5034,6 +5120,11 @@ update_learning_table(struct ofproto_dpif *ofproto,
 {
     struct mac_entry *mac;
 
+    /* Don't learn the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return;
+    }
+
     if (!mac_learning_may_learn(ofproto->ml, flow->dl_src, vlan)) {
         return;
     }
@@ -5064,15 +5155,21 @@ update_learning_table(struct ofproto_dpif *ofproto,
     }
 }
 
-static struct ofport_dpif *
+static struct ofbundle *
 lookup_input_bundle(struct ofproto_dpif *ofproto, uint16_t in_port, bool warn)
 {
     struct ofport_dpif *ofport;
 
+    /* Special-case OFPP_NONE, which a controller may use as the ingress
+     * port for traffic that it is sourcing. */
+    if (in_port == OFPP_NONE) {
+        return &ofpp_none_bundle;
+    }
+
     /* Find the port and bundle for the received packet. */
     ofport = get_ofp_port(ofproto, in_port);
     if (ofport && ofport->bundle) {
-        return ofport;
+        return ofport->bundle;
     }
 
     /* Odd.  A few possible reasons here:
@@ -5156,15 +5253,15 @@ xlate_normal(struct action_xlate_ctx *ctx)
 
     ctx->has_normal = true;
 
-    /* Obtain in_port from ctx->flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
+    in_bundle = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
                                   ctx->packet != NULL);
-    if (!in_port) {
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
+
+    /* We know 'in_port' exists unless it is "ofpp_none_bundle",
+     * since lookup_input_bundle() succeeded. */
+    in_port = get_ofp_port(ctx->ofproto, ctx->flow.in_port);
 
     /* Drop malformed frames. */
     if (ctx->flow.dl_type == htons(ETH_TYPE_VLAN) &&
@@ -5197,7 +5294,8 @@ xlate_normal(struct action_xlate_ctx *ctx)
     vlan = input_vid_to_vlan(in_bundle, vid);
 
     /* Check other admissibility requirements. */
-    if (!is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
+    if (in_port &&
+         !is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
         return;
     }
 
@@ -5464,10 +5562,15 @@ send_netflow_active_timeouts(struct ofproto_dpif *ofproto)
 static struct ofproto_dpif *
 ofproto_dpif_lookup(const char *name)
 {
-    struct ofproto *ofproto = ofproto_lookup(name);
-    return (ofproto && ofproto->ofproto_class == &ofproto_dpif_class
-            ? ofproto_dpif_cast(ofproto)
-            : NULL);
+    struct ofproto_dpif *ofproto;
+
+    HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
+                             hash_string(name, 0), &all_ofproto_dpifs) {
+        if (!strcmp(ofproto->up.name, name)) {
+            return ofproto;
+        }
+    }
+    return NULL;
 }
 
 static void
@@ -5630,7 +5733,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_,
         /* Convert odp_key to flow. */
         error = ofproto_dpif_extract_flow_key(ofproto, odp_key.data,
                                               odp_key.size, &flow,
-                                              &initial_tci);
+                                              &initial_tci, NULL);
         if (error == ODP_FIT_ERROR) {
             unixctl_command_reply(conn, 501, "Invalid flow");
             goto exit;