ofproto-dpif: Fake-up OFPP_NONE input bundle for mirroring and normal.
[sliver-openvswitch.git] / ofproto / ofproto-dpif.c
index 56c3baf..baa191e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -176,8 +176,18 @@ static void bundle_destroy(struct ofbundle *);
 static void bundle_del_port(struct ofport_dpif *);
 static void bundle_run(struct ofbundle *);
 static void bundle_wait(struct ofbundle *);
-static struct ofport_dpif *lookup_input_bundle(struct ofproto_dpif *,
-                                               uint16_t in_port, bool warn);
+static struct ofbundle *lookup_input_bundle(struct ofproto_dpif *,
+                                            uint16_t in_port, bool warn);
+
+/* A controller may use OFPP_NONE as the ingress port to indicate that
+ * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
+ * when an input bundle is needed for validation (e.g., mirroring or
+ * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
+ * any 'port' structs, so care must be taken when dealing with it. */
+static struct ofbundle ofpp_none_bundle = {
+    .name      = "OFPP_NONE",
+    .vlan_mode = PORT_VLAN_TRUNK
+};
 
 static void stp_run(struct ofproto_dpif *ofproto);
 static void stp_wait(struct ofproto_dpif *ofproto);
@@ -1816,11 +1826,15 @@ bundle_send_learning_packets(struct ofbundle *bundle)
         if (e->port.p != bundle) {
             struct ofpbuf *learning_packet;
             struct ofport_dpif *port;
+            void *port_void;
             int ret;
 
-            learning_packet = bond_compose_learning_packet(bundle->bond, e->mac,
-                                                           e->vlan,
-                                                           (void **)&port);
+            /* The assignment to "port" is unnecessary but makes "grep"ing for
+             * struct ofport_dpif more effective. */
+            learning_packet = bond_compose_learning_packet(bundle->bond,
+                                                           e->mac, e->vlan,
+                                                           &port_void);
+            port = port_void;
             ret = send_packet(port, learning_packet);
             ofpbuf_delete(learning_packet);
             if (ret) {
@@ -2573,10 +2587,20 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
 
         if (!execute_controller_action(ofproto, &facet->flow,
                                        subfacet->actions,
-                                       subfacet->actions_len, packet, true)) {
+                                       subfacet->actions_len, packet, true)
+            && subfacet->actions_len > 0) {
             struct flow_miss_op *op = &ops[(*n_ops)++];
             struct dpif_execute *execute = &op->dpif_op.execute;
 
+            if (flow->vlan_tci != subfacet->initial_tci) {
+                /* This packet was received on a VLAN splinter port.  We added
+                 * a VLAN to the packet to make the packet resemble the flow,
+                 * but the actions were composed assuming that the packet
+                 * contained no VLAN.  So, we must remove the VLAN header from
+                 * the packet before trying to execute the actions. */
+                eth_pop_vlan(packet);
+            }
+
             op->subfacet = subfacet;
             execute->type = DPIF_OP_EXECUTE;
             execute->key = miss->key;
@@ -2605,10 +2629,27 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
     }
 }
 
+/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of
+ * OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns
+ * an ODP_FIT_* value that indicates how well 'key' fits our expectations for
+ * what a flow key should contain.
+ *
+ * This function also includes some logic to help make VLAN splinters
+ * transparent to the rest of the upcall processing logic.  In particular, if
+ * the extracted in_port is a VLAN splinter port, it replaces flow->in_port by
+ * the "real" port, sets flow->vlan_tci correctly for the VLAN of the VLAN
+ * splinter port, and pushes a VLAN header onto 'packet' (if it is nonnull).
+ *
+ * Sets '*initial_tci' to the VLAN TCI with which the packet was really
+ * received, that is, the actual VLAN TCI extracted by odp_flow_key_to_flow().
+ * (This differs from the value returned in flow->vlan_tci only for packets
+ * received on VLAN splinters.)
+ */
 static enum odp_key_fitness
 ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
                               const struct nlattr *key, size_t key_len,
-                              struct flow *flow, ovs_be16 *initial_tci)
+                              struct flow *flow, ovs_be16 *initial_tci,
+                              struct ofpbuf *packet)
 {
     enum odp_key_fitness fitness;
     uint16_t realdev;
@@ -2626,6 +2667,23 @@ ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
          * with the VLAN device's VLAN ID. */
         flow->in_port = realdev;
         flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
+        if (packet) {
+            /* Make the packet resemble the flow, so that it gets sent to an
+             * OpenFlow controller properly, so that it looks correct for
+             * sFlow, and so that flow_extract() will get the correct vlan_tci
+             * if it is called on 'packet'.
+             *
+             * The allocated space inside 'packet' probably also contains
+             * 'key', that is, both 'packet' and 'key' are probably part of a
+             * struct dpif_upcall (see the large comment on that structure
+             * definition), so pushing data on 'packet' is in general not a
+             * good idea since it could overwrite 'key' or free it as a side
+             * effect.  However, it's OK in this special case because we know
+             * that 'packet' is inside a Netlink attribute: pushing 4 bytes
+             * will just overwrite the 4-byte "struct nlattr", which is fine
+             * since we don't need that header anymore. */
+            eth_push_vlan(packet, flow->vlan_tci);
+        }
 
         /* Let the caller know that we can't reproduce 'key' from 'flow'. */
         if (fitness == ODP_FIT_PERFECT) {
@@ -2668,7 +2726,8 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls,
          * then set 'flow''s header pointers. */
         fitness = ofproto_dpif_extract_flow_key(ofproto,
                                                 upcall->key, upcall->key_len,
-                                                &flow, &initial_tci);
+                                                &flow, &initial_tci,
+                                                upcall->packet);
         if (fitness == ODP_FIT_ERROR) {
             ofpbuf_delete(upcall->packet);
             continue;
@@ -2747,7 +2806,7 @@ handle_userspace_upcall(struct ofproto_dpif *ofproto,
 
     fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key,
                                             upcall->key_len, &flow,
-                                            &initial_tci);
+                                            &initial_tci, upcall->packet);
     if (fitness == ODP_FIT_ERROR) {
         ofpbuf_delete(upcall->packet);
         return;
@@ -4809,6 +4868,11 @@ input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid)
 static bool
 input_vid_is_valid(uint16_t vid, struct ofbundle *in_bundle, bool warn)
 {
+    /* Allow any VID on the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return true;
+    }
+
     switch (in_bundle->vlan_mode) {
     case PORT_VLAN_ACCESS:
         if (vid) {
@@ -4991,22 +5055,17 @@ add_mirror_actions(struct action_xlate_ctx *ctx, const struct flow *orig_flow)
 {
     struct ofproto_dpif *ofproto = ctx->ofproto;
     mirror_mask_t mirrors;
-    struct ofport_dpif *in_port;
     struct ofbundle *in_bundle;
     uint16_t vlan;
     uint16_t vid;
     const struct nlattr *a;
     size_t left;
 
-    /* Obtain in_port from orig_flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
-                                  ctx->packet != NULL);
-    if (!in_port) {
+    in_bundle = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
+                                    ctx->packet != NULL);
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
     mirrors = in_bundle->src_mirrors;
 
     /* Drop frames on bundles reserved for mirroring. */
@@ -5127,6 +5186,11 @@ update_learning_table(struct ofproto_dpif *ofproto,
 {
     struct mac_entry *mac;
 
+    /* Don't learn the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return;
+    }
+
     if (!mac_learning_may_learn(ofproto->ml, flow->dl_src, vlan)) {
         return;
     }
@@ -5157,15 +5221,21 @@ update_learning_table(struct ofproto_dpif *ofproto,
     }
 }
 
-static struct ofport_dpif *
+static struct ofbundle *
 lookup_input_bundle(struct ofproto_dpif *ofproto, uint16_t in_port, bool warn)
 {
     struct ofport_dpif *ofport;
 
+    /* Special-case OFPP_NONE, which a controller may use as the ingress
+     * port for traffic that it is sourcing. */
+    if (in_port == OFPP_NONE) {
+        return &ofpp_none_bundle;
+    }
+
     /* Find the port and bundle for the received packet. */
     ofport = get_ofp_port(ofproto, in_port);
     if (ofport && ofport->bundle) {
-        return ofport;
+        return ofport->bundle;
     }
 
     /* Odd.  A few possible reasons here:
@@ -5249,15 +5319,15 @@ xlate_normal(struct action_xlate_ctx *ctx)
 
     ctx->has_normal = true;
 
-    /* Obtain in_port from ctx->flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
+    in_bundle = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
                                   ctx->packet != NULL);
-    if (!in_port) {
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
+
+    /* We know 'in_port' exists unless it is "ofpp_none_bundle",
+     * since lookup_input_bundle() succeeded. */
+    in_port = get_ofp_port(ctx->ofproto, ctx->flow.in_port);
 
     /* Drop malformed frames. */
     if (ctx->flow.dl_type == htons(ETH_TYPE_VLAN) &&
@@ -5290,7 +5360,8 @@ xlate_normal(struct action_xlate_ctx *ctx)
     vlan = input_vid_to_vlan(in_bundle, vid);
 
     /* Check other admissibility requirements. */
-    if (!is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
+    if (in_port &&
+         !is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
         return;
     }
 
@@ -5306,14 +5377,6 @@ xlate_normal(struct action_xlate_ctx *ctx)
         if (mac->port.p != in_bundle) {
             output_normal(ctx, mac->port.p, vlan);
         }
-    } else if (!ctx->packet && !eth_addr_is_multicast(ctx->flow.dl_dst)) {
-        /* If we are revalidating but don't have a learning entry then eject
-         * the flow.  Installing a flow that floods packets opens up a window
-         * of time where we could learn from a packet reflected on a bond and
-         * blackhole packets before the learning table is updated to reflect
-         * the correct port. */
-        ctx->may_set_up_flow = false;
-        return;
     } else {
         struct ofbundle *bundle;
 
@@ -5718,7 +5781,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
         /* Convert odp_key to flow. */
         error = ofproto_dpif_extract_flow_key(ofproto, odp_key.data,
                                               odp_key.size, &flow,
-                                              &initial_tci);
+                                              &initial_tci, NULL);
         if (error == ODP_FIT_ERROR) {
             unixctl_command_reply(conn, 501, "Invalid flow");
             goto exit;