ofproto-dpif: Fake-up OFPP_NONE input bundle for mirroring and normal.
[sliver-openvswitch.git] / ofproto / ofproto-dpif.c
index 84ddc9d..baa191e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -176,8 +176,18 @@ static void bundle_destroy(struct ofbundle *);
 static void bundle_del_port(struct ofport_dpif *);
 static void bundle_run(struct ofbundle *);
 static void bundle_wait(struct ofbundle *);
-static struct ofport_dpif *lookup_input_bundle(struct ofproto_dpif *,
-                                               uint16_t in_port, bool warn);
+static struct ofbundle *lookup_input_bundle(struct ofproto_dpif *,
+                                            uint16_t in_port, bool warn);
+
+/* A controller may use OFPP_NONE as the ingress port to indicate that
+ * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
+ * when an input bundle is needed for validation (e.g., mirroring or
+ * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
+ * any 'port' structs, so care must be taken when dealing with it. */
+static struct ofbundle ofpp_none_bundle = {
+    .name      = "OFPP_NONE",
+    .vlan_mode = PORT_VLAN_TRUNK
+};
 
 static void stp_run(struct ofproto_dpif *ofproto);
 static void stp_wait(struct ofproto_dpif *ofproto);
@@ -227,7 +237,7 @@ struct action_xlate_ctx {
 
     int recurse;                /* Recursion level, via xlate_table_action. */
     struct flow base_flow;      /* Flow at the last commit. */
-    uint32_t original_priority; /* Priority when packet arrived. */
+    uint32_t orig_skb_priority; /* Priority when packet arrived. */
     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
     uint32_t sflow_n_outputs;   /* Number of output ports. */
     uint16_t sflow_odp_port;    /* Output port for composing sFlow action. */
@@ -1816,11 +1826,15 @@ bundle_send_learning_packets(struct ofbundle *bundle)
         if (e->port.p != bundle) {
             struct ofpbuf *learning_packet;
             struct ofport_dpif *port;
+            void *port_void;
             int ret;
 
-            learning_packet = bond_compose_learning_packet(bundle->bond, e->mac,
-                                                           e->vlan,
-                                                           (void **)&port);
+            /* The assignment to "port" is unnecessary but makes "grep"ing for
+             * struct ofport_dpif more effective. */
+            learning_packet = bond_compose_learning_packet(bundle->bond,
+                                                           e->mac, e->vlan,
+                                                           &port_void);
+            port = port_void;
             ret = send_packet(port, learning_packet);
             ofpbuf_delete(learning_packet);
             if (ret) {
@@ -2573,10 +2587,20 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
 
         if (!execute_controller_action(ofproto, &facet->flow,
                                        subfacet->actions,
-                                       subfacet->actions_len, packet, true)) {
+                                       subfacet->actions_len, packet, true)
+            && subfacet->actions_len > 0) {
             struct flow_miss_op *op = &ops[(*n_ops)++];
             struct dpif_execute *execute = &op->dpif_op.execute;
 
+            if (flow->vlan_tci != subfacet->initial_tci) {
+                /* This packet was received on a VLAN splinter port.  We added
+                 * a VLAN to the packet to make the packet resemble the flow,
+                 * but the actions were composed assuming that the packet
+                 * contained no VLAN.  So, we must remove the VLAN header from
+                 * the packet before trying to execute the actions. */
+                eth_pop_vlan(packet);
+            }
+
             op->subfacet = subfacet;
             execute->type = DPIF_OP_EXECUTE;
             execute->key = miss->key;
@@ -2605,10 +2629,27 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
     }
 }
 
+/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of
+ * OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns
+ * an ODP_FIT_* value that indicates how well 'key' fits our expectations for
+ * what a flow key should contain.
+ *
+ * This function also includes some logic to help make VLAN splinters
+ * transparent to the rest of the upcall processing logic.  In particular, if
+ * the extracted in_port is a VLAN splinter port, it replaces flow->in_port by
+ * the "real" port, sets flow->vlan_tci correctly for the VLAN of the VLAN
+ * splinter port, and pushes a VLAN header onto 'packet' (if it is nonnull).
+ *
+ * Sets '*initial_tci' to the VLAN TCI with which the packet was really
+ * received, that is, the actual VLAN TCI extracted by odp_flow_key_to_flow().
+ * (This differs from the value returned in flow->vlan_tci only for packets
+ * received on VLAN splinters.)
+ */
 static enum odp_key_fitness
 ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
                               const struct nlattr *key, size_t key_len,
-                              struct flow *flow, ovs_be16 *initial_tci)
+                              struct flow *flow, ovs_be16 *initial_tci,
+                              struct ofpbuf *packet)
 {
     enum odp_key_fitness fitness;
     uint16_t realdev;
@@ -2626,6 +2667,23 @@ ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
          * with the VLAN device's VLAN ID. */
         flow->in_port = realdev;
         flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
+        if (packet) {
+            /* Make the packet resemble the flow, so that it gets sent to an
+             * OpenFlow controller properly, so that it looks correct for
+             * sFlow, and so that flow_extract() will get the correct vlan_tci
+             * if it is called on 'packet'.
+             *
+             * The allocated space inside 'packet' probably also contains
+             * 'key', that is, both 'packet' and 'key' are probably part of a
+             * struct dpif_upcall (see the large comment on that structure
+             * definition), so pushing data on 'packet' is in general not a
+             * good idea since it could overwrite 'key' or free it as a side
+             * effect.  However, it's OK in this special case because we know
+             * that 'packet' is inside a Netlink attribute: pushing 4 bytes
+             * will just overwrite the 4-byte "struct nlattr", which is fine
+             * since we don't need that header anymore. */
+            eth_push_vlan(packet, flow->vlan_tci);
+        }
 
         /* Let the caller know that we can't reproduce 'key' from 'flow'. */
         if (fitness == ODP_FIT_PERFECT) {
@@ -2668,12 +2726,13 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls,
          * then set 'flow''s header pointers. */
         fitness = ofproto_dpif_extract_flow_key(ofproto,
                                                 upcall->key, upcall->key_len,
-                                                &flow, &initial_tci);
+                                                &flow, &initial_tci,
+                                                upcall->packet);
         if (fitness == ODP_FIT_ERROR) {
             ofpbuf_delete(upcall->packet);
             continue;
         }
-        flow_extract(upcall->packet, flow.priority, flow.tun_id,
+        flow_extract(upcall->packet, flow.skb_priority, flow.tun_id,
                      flow.in_port, &flow);
 
         /* Handle 802.1ag, LACP, and STP specially. */
@@ -2747,7 +2806,7 @@ handle_userspace_upcall(struct ofproto_dpif *ofproto,
 
     fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key,
                                             upcall->key_len, &flow,
-                                            &initial_tci);
+                                            &initial_tci, upcall->packet);
     if (fitness == ODP_FIT_ERROR) {
         ofpbuf_delete(upcall->packet);
         return;
@@ -4161,7 +4220,7 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
             return;
         }
 
-        pdscp = get_priority(ofport, ctx->flow.priority);
+        pdscp = get_priority(ofport, ctx->flow.skb_priority);
         if (pdscp) {
             ctx->flow.nw_tos &= ~IP_DSCP_MASK;
             ctx->flow.nw_tos |= pdscp->dscp;
@@ -4390,10 +4449,10 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx,
     }
 
     /* Add datapath actions. */
-    flow_priority = ctx->flow.priority;
-    ctx->flow.priority = priority;
+    flow_priority = ctx->flow.skb_priority;
+    ctx->flow.skb_priority = priority;
     compose_output_action(ctx, ofp_port);
-    ctx->flow.priority = flow_priority;
+    ctx->flow.skb_priority = flow_priority;
 
     /* Update NetFlow output port. */
     if (ctx->nf_output_iface == NF_OUT_DROP) {
@@ -4418,7 +4477,7 @@ xlate_set_queue_action(struct action_xlate_ctx *ctx,
         return;
     }
 
-    ctx->flow.priority = priority;
+    ctx->flow.skb_priority = priority;
 }
 
 struct xlate_reg_state {
@@ -4616,7 +4675,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
             break;
 
         case OFPUTIL_NXAST_POP_QUEUE:
-            ctx->flow.priority = ctx->original_priority;
+            ctx->flow.skb_priority = ctx->orig_skb_priority;
             break;
 
         case OFPUTIL_NXAST_REG_MOVE:
@@ -4721,7 +4780,7 @@ xlate_actions(struct action_xlate_ctx *ctx,
     ctx->nf_output_iface = NF_OUT_DROP;
     ctx->mirrors = 0;
     ctx->recurse = 0;
-    ctx->original_priority = ctx->flow.priority;
+    ctx->orig_skb_priority = ctx->flow.skb_priority;
     ctx->table_id = 0;
     ctx->exit = false;
 
@@ -4809,6 +4868,11 @@ input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid)
 static bool
 input_vid_is_valid(uint16_t vid, struct ofbundle *in_bundle, bool warn)
 {
+    /* Allow any VID on the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return true;
+    }
+
     switch (in_bundle->vlan_mode) {
     case PORT_VLAN_ACCESS:
         if (vid) {
@@ -4991,22 +5055,17 @@ add_mirror_actions(struct action_xlate_ctx *ctx, const struct flow *orig_flow)
 {
     struct ofproto_dpif *ofproto = ctx->ofproto;
     mirror_mask_t mirrors;
-    struct ofport_dpif *in_port;
     struct ofbundle *in_bundle;
     uint16_t vlan;
     uint16_t vid;
     const struct nlattr *a;
     size_t left;
 
-    /* Obtain in_port from orig_flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
-                                  ctx->packet != NULL);
-    if (!in_port) {
+    in_bundle = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
+                                    ctx->packet != NULL);
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
     mirrors = in_bundle->src_mirrors;
 
     /* Drop frames on bundles reserved for mirroring. */
@@ -5127,6 +5186,11 @@ update_learning_table(struct ofproto_dpif *ofproto,
 {
     struct mac_entry *mac;
 
+    /* Don't learn the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return;
+    }
+
     if (!mac_learning_may_learn(ofproto->ml, flow->dl_src, vlan)) {
         return;
     }
@@ -5157,15 +5221,21 @@ update_learning_table(struct ofproto_dpif *ofproto,
     }
 }
 
-static struct ofport_dpif *
+static struct ofbundle *
 lookup_input_bundle(struct ofproto_dpif *ofproto, uint16_t in_port, bool warn)
 {
     struct ofport_dpif *ofport;
 
+    /* Special-case OFPP_NONE, which a controller may use as the ingress
+     * port for traffic that it is sourcing. */
+    if (in_port == OFPP_NONE) {
+        return &ofpp_none_bundle;
+    }
+
     /* Find the port and bundle for the received packet. */
     ofport = get_ofp_port(ofproto, in_port);
     if (ofport && ofport->bundle) {
-        return ofport;
+        return ofport->bundle;
     }
 
     /* Odd.  A few possible reasons here:
@@ -5249,15 +5319,15 @@ xlate_normal(struct action_xlate_ctx *ctx)
 
     ctx->has_normal = true;
 
-    /* Obtain in_port from ctx->flow.in_port.
-     *
-     * lookup_input_bundle() also ensures that in_port belongs to a bundle. */
-    in_port = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
+    in_bundle = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
                                   ctx->packet != NULL);
-    if (!in_port) {
+    if (!in_bundle) {
         return;
     }
-    in_bundle = in_port->bundle;
+
+    /* We know 'in_port' exists unless it is "ofpp_none_bundle",
+     * since lookup_input_bundle() succeeded. */
+    in_port = get_ofp_port(ctx->ofproto, ctx->flow.in_port);
 
     /* Drop malformed frames. */
     if (ctx->flow.dl_type == htons(ETH_TYPE_VLAN) &&
@@ -5290,7 +5360,8 @@ xlate_normal(struct action_xlate_ctx *ctx)
     vlan = input_vid_to_vlan(in_bundle, vid);
 
     /* Check other admissibility requirements. */
-    if (!is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
+    if (in_port &&
+         !is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) {
         return;
     }
 
@@ -5306,14 +5377,6 @@ xlate_normal(struct action_xlate_ctx *ctx)
         if (mac->port.p != in_bundle) {
             output_normal(ctx, mac->port.p, vlan);
         }
-    } else if (!ctx->packet && !eth_addr_is_multicast(ctx->flow.dl_dst)) {
-        /* If we are revalidating but don't have a learning entry then eject
-         * the flow.  Installing a flow that floods packets opens up a window
-         * of time where we could learn from a packet reflected on a bond and
-         * blackhole packets before the learning table is updated to reflect
-         * the correct port. */
-        ctx->may_set_up_flow = false;
-        return;
     } else {
         struct ofbundle *bundle;
 
@@ -5569,12 +5632,12 @@ ofproto_dpif_lookup(const char *name)
 }
 
 static void
-ofproto_unixctl_fdb_flush(struct unixctl_conn *conn,
-                         const char *args, void *aux OVS_UNUSED)
+ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                          const char *argv[], void *aux OVS_UNUSED)
 {
     const struct ofproto_dpif *ofproto;
 
-    ofproto = ofproto_dpif_lookup(args);
+    ofproto = ofproto_dpif_lookup(argv[1]);
     if (!ofproto) {
         unixctl_command_reply(conn, 501, "no such bridge");
         return;
@@ -5585,14 +5648,14 @@ ofproto_unixctl_fdb_flush(struct unixctl_conn *conn,
 }
 
 static void
-ofproto_unixctl_fdb_show(struct unixctl_conn *conn,
-                         const char *args, void *aux OVS_UNUSED)
+ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                         const char *argv[], void *aux OVS_UNUSED)
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     const struct ofproto_dpif *ofproto;
     const struct mac_entry *e;
 
-    ofproto = ofproto_dpif_lookup(args);
+    ofproto = ofproto_dpif_lookup(argv[1]);
     if (!ofproto) {
         unixctl_command_reply(conn, 501, "no such bridge");
         return;
@@ -5678,12 +5741,10 @@ trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule)
 }
 
 static void
-ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_,
+ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
                       void *aux OVS_UNUSED)
 {
-    char *dpname, *arg1, *arg2, *arg3, *arg4;
-    char *args = xstrdup(args_);
-    char *save_ptr = NULL;
+    const char *dpname = argv[1];
     struct ofproto_dpif *ofproto;
     struct ofpbuf odp_key;
     struct ofpbuf *packet;
@@ -5697,29 +5758,21 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_,
     ofpbuf_init(&odp_key, 0);
     ds_init(&result);
 
-    dpname = strtok_r(args, " ", &save_ptr);
-    if (!dpname) {
-        unixctl_command_reply(conn, 501, "Bad command syntax");
-        goto exit;
-    }
-
     ofproto = ofproto_dpif_lookup(dpname);
     if (!ofproto) {
         unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list "
                               "for help)");
         goto exit;
     }
-    arg1 = strtok_r(NULL, " ", &save_ptr);
-    arg2 = strtok_r(NULL, " ", &save_ptr);
-    arg3 = strtok_r(NULL, " ", &save_ptr);
-    arg4 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */
-    if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) {
+    if (argc == 3 || (argc == 4 && !strcmp(argv[3], "-generate"))) {
         /* ofproto/trace dpname flow [-generate] */
+        const char *flow_s = argv[2];
+        const char *generate_s = argv[3];
         int error;
 
         /* Convert string to datapath key. */
         ofpbuf_init(&odp_key, 0);
-        error = odp_flow_key_from_string(arg1, NULL, &odp_key);
+        error = odp_flow_key_from_string(flow_s, NULL, &odp_key);
         if (error) {
             unixctl_command_reply(conn, 501, "Bad flow syntax");
             goto exit;
@@ -5728,37 +5781,31 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_,
         /* Convert odp_key to flow. */
         error = ofproto_dpif_extract_flow_key(ofproto, odp_key.data,
                                               odp_key.size, &flow,
-                                              &initial_tci);
+                                              &initial_tci, NULL);
         if (error == ODP_FIT_ERROR) {
             unixctl_command_reply(conn, 501, "Invalid flow");
             goto exit;
         }
 
         /* Generate a packet, if requested. */
-        if (arg2) {
+        if (generate_s) {
             packet = ofpbuf_new(0);
             flow_compose(packet, &flow);
         }
-    } else if (dpname && arg1 && arg2 && arg3 && arg4) {
+    } else if (argc == 6) {
         /* ofproto/trace dpname priority tun_id in_port packet */
-        uint16_t in_port;
-        ovs_be64 tun_id;
-        uint32_t priority;
-
-        priority = atoi(arg1);
-        tun_id = htonll(strtoull(arg2, NULL, 0));
-        in_port = ofp_port_to_odp_port(atoi(arg3));
-
-        packet = ofpbuf_new(strlen(args) / 2);
-        arg4 = ofpbuf_put_hex(packet, arg4, NULL);
-        arg4 += strspn(arg4, " ");
-        if (*arg4 != '\0') {
-            unixctl_command_reply(conn, 501, "Trailing garbage in command");
-            goto exit;
-        }
-        if (packet->size < ETH_HEADER_LEN) {
-            unixctl_command_reply(conn, 501,
-                                  "Packet data too short for Ethernet");
+        const char *priority_s = argv[2];
+        const char *tun_id_s = argv[3];
+        const char *in_port_s = argv[4];
+        const char *packet_s = argv[5];
+        uint16_t in_port = ofp_port_to_odp_port(atoi(in_port_s));
+        ovs_be64 tun_id = htonll(strtoull(tun_id_s, NULL, 0));
+        uint32_t priority = atoi(priority_s);
+        const char *msg;
+
+        msg = eth_from_hex(packet_s, &packet);
+        if (msg) {
+            unixctl_command_reply(conn, 501, msg);
             goto exit;
         }
 
@@ -5813,20 +5860,19 @@ exit:
     ds_destroy(&result);
     ofpbuf_delete(packet);
     ofpbuf_uninit(&odp_key);
-    free(args);
 }
 
 static void
-ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED,
-                  const char *args_ OVS_UNUSED, void *aux OVS_UNUSED)
+ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
+                  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 {
     clogged = true;
     unixctl_command_reply(conn, 200, NULL);
 }
 
 static void
-ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED,
-                    const char *args_ OVS_UNUSED, void *aux OVS_UNUSED)
+ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
+                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 {
     clogged = false;
     unixctl_command_reply(conn, 200, NULL);
@@ -5841,15 +5887,18 @@ ofproto_dpif_unixctl_init(void)
     }
     registered = true;
 
-    unixctl_command_register("ofproto/trace",
-                      "bridge {tun_id in_port packet | odp_flow [-generate]}",
-                      ofproto_unixctl_trace, NULL);
-    unixctl_command_register("fdb/flush", "bridge", ofproto_unixctl_fdb_flush,
-                             NULL);
-    unixctl_command_register("fdb/show", "bridge", ofproto_unixctl_fdb_show,
-                             NULL);
-    unixctl_command_register("ofproto/clog", "", ofproto_dpif_clog, NULL);
-    unixctl_command_register("ofproto/unclog", "", ofproto_dpif_unclog, NULL);
+    unixctl_command_register(
+        "ofproto/trace",
+        "bridge {tun_id in_port packet | odp_flow [-generate]}",
+        2, 4, ofproto_unixctl_trace, NULL);
+    unixctl_command_register("fdb/flush", "bridge", 1, 1,
+                             ofproto_unixctl_fdb_flush, NULL);
+    unixctl_command_register("fdb/show", "bridge", 1, 1,
+                             ofproto_unixctl_fdb_show, NULL);
+    unixctl_command_register("ofproto/clog", "", 0, 0,
+                             ofproto_dpif_clog, NULL);
+    unixctl_command_register("ofproto/unclog", "", 0, 0,
+                             ofproto_dpif_unclog, NULL);
 }
 \f
 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)