ofproto-dpif-xlate: Take control of the qdscp map.
[sliver-openvswitch.git] / ofproto / ofproto-dpif-xlate.c
index eb4ed69..6ce30cb 100644 (file)
@@ -65,9 +65,9 @@ struct xbridge {
     struct mbridge *mbridge;      /* Mirroring. */
     struct dpif_sflow *sflow;     /* SFlow handle, or null. */
     struct dpif_ipfix *ipfix;     /* Ipfix handle, or null. */
+    struct stp *stp;              /* STP or null if disabled. */
 
     enum ofp_config_flags frag;   /* Fragmentation handling. */
-    bool has_stp;                 /* Bridge runs stp? */
     bool has_netflow;             /* Bridge runs netflow? */
     bool has_in_band;             /* Bridge has in band control? */
     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
@@ -112,7 +112,9 @@ struct xport {
     struct xport *peer;              /* Patch port peer or null. */
 
     enum ofputil_port_config config; /* OpenFlow port configuration. */
-    enum stp_state stp_state;        /* STP_DISABLED if STP not in use. */
+    int stp_port_no;                 /* STP port number or 0 if not in use. */
+
+    struct hmap skb_priorities;      /* Map of 'skb_priority_to_dscp's. */
 
     bool may_enable;                 /* May be enabled in bonds. */
     bool is_tunnel;                  /* Is a tunnel port. */
@@ -164,6 +166,16 @@ struct xlate_ctx {
  * The bundle's name and vlan mode are initialized in lookup_input_bundle() */
 static struct xbundle ofpp_none_bundle;
 
+/* Node in 'xport''s 'skb_priorities' map.  Used to maintain a map from
+ * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
+ * traffic egressing the 'ofport' with that priority should be marked with. */
+struct skb_priority_to_dscp {
+    struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
+    uint32_t skb_priority;      /* Priority of this queue (see struct flow). */
+
+    uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
+};
+
 static struct hmap xbridges = HMAP_INITIALIZER(&xbridges);
 static struct hmap xbundles = HMAP_INITIALIZER(&xbundles);
 static struct hmap xports = HMAP_INITIALIZER(&xports);
@@ -187,14 +199,19 @@ static struct xbridge *xbridge_lookup(const struct ofproto_dpif *);
 static struct xbundle *xbundle_lookup(const struct ofbundle *);
 static struct xport *xport_lookup(struct ofport_dpif *);
 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
+static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
+                                                     uint32_t skb_priority);
+static void clear_skb_priorities(struct xport *);
+static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
+                                   uint8_t *dscp);
 
 void
 xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
-                  const struct mac_learning *ml, const struct mbridge *mbridge,
+                  const struct mac_learning *ml, struct stp *stp,
+                  const struct mbridge *mbridge,
                   const struct dpif_sflow *sflow,
                   const struct dpif_ipfix *ipfix, enum ofp_config_flags frag,
-                  bool forward_bpdu, bool has_in_band, bool has_netflow,
-                  bool has_stp)
+                  bool forward_bpdu, bool has_in_band, bool has_netflow)
 {
     struct xbridge *xbridge = xbridge_lookup(ofproto);
 
@@ -227,13 +244,17 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
         xbridge->ipfix = dpif_ipfix_ref(ipfix);
     }
 
+    if (xbridge->stp != stp) {
+        stp_unref(xbridge->stp);
+        xbridge->stp = stp_ref(stp);
+    }
+
     free(xbridge->name);
     xbridge->name = xstrdup(name);
 
     xbridge->forward_bpdu = forward_bpdu;
     xbridge->has_in_band = has_in_band;
     xbridge->has_netflow = has_netflow;
-    xbridge->has_stp = has_stp;
     xbridge->frag = frag;
 }
 
@@ -330,10 +351,13 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
                  struct ofport_dpif *ofport, ofp_port_t ofp_port,
                  odp_port_t odp_port, const struct netdev *netdev,
                  const struct cfm *cfm, const struct bfd *bfd,
-                 struct ofport_dpif *peer, enum ofputil_port_config config,
-                 enum stp_state stp_state, bool is_tunnel, bool may_enable)
+                 struct ofport_dpif *peer, int stp_port_no,
+                 const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
+                 enum ofputil_port_config config, bool is_tunnel,
+                 bool may_enable)
 {
     struct xport *xport = xport_lookup(ofport);
+    size_t i;
 
     if (!xport) {
         xport = xzalloc(sizeof *xport);
@@ -341,6 +365,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
         xport->xbridge = xbridge_lookup(ofproto);
         xport->ofp_port = ofp_port;
 
+        hmap_init(&xport->skb_priorities);
         hmap_insert(&xports, &xport->hmap_node, hash_pointer(ofport, 0));
         hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
                     hash_ofp_port(xport->ofp_port));
@@ -349,7 +374,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
     ovs_assert(xport->ofp_port == ofp_port);
 
     xport->config = config;
-    xport->stp_state = stp_state;
+    xport->stp_port_no = stp_port_no;
     xport->is_tunnel = is_tunnel;
     xport->may_enable = may_enable;
     xport->odp_port = odp_port;
@@ -384,6 +409,24 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
     if (xport->xbundle) {
         list_insert(&xport->xbundle->xports, &xport->bundle_node);
     }
+
+    clear_skb_priorities(xport);
+    for (i = 0; i < n_qdscp; i++) {
+        struct skb_priority_to_dscp *pdscp;
+        uint32_t skb_priority;
+
+        if (ofproto_dpif_queue_to_priority(xport->xbridge->ofproto,
+                                           qdscp_list[i].queue,
+                                           &skb_priority)) {
+            continue;
+        }
+
+        pdscp = xmalloc(sizeof *pdscp);
+        pdscp->skb_priority = skb_priority;
+        pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
+        hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
+                    hash_int(pdscp->skb_priority, 0));
+    }
 }
 
 void
@@ -400,7 +443,13 @@ xlate_ofport_remove(struct ofport_dpif *ofport)
         xport->peer = NULL;
     }
 
-    list_remove(&xport->bundle_node);
+    if (xport->xbundle) {
+        list_remove(&xport->bundle_node);
+    }
+
+    clear_skb_priorities(xport);
+    hmap_destroy(&xport->skb_priorities);
+
     hmap_remove(&xports, &xport->hmap_node);
     hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
 
@@ -452,6 +501,61 @@ xport_lookup(struct ofport_dpif *ofport)
     return NULL;
 }
 
+
+static enum stp_state
+xport_stp_learn_state(const struct xport *xport)
+{
+    enum stp_state stp_state = xport->xbridge->stp && xport->stp_port_no
+        ? stp_port_get_state(stp_get_port(xport->xbridge->stp,
+                                          xport->stp_port_no))
+        : STP_DISABLED;
+    return stp_learn_in_state(stp_state);
+}
+
+static bool
+xport_stp_forward_state(const struct xport *xport)
+{
+    enum stp_state stp_state = xport->xbridge->stp && xport->stp_port_no
+        ? stp_port_get_state(stp_get_port(xport->xbridge->stp,
+                                          xport->stp_port_no))
+        : STP_DISABLED;
+    return stp_forward_in_state(stp_state);
+}
+
+/* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
+ * were used to make the determination.*/
+static bool
+stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
+{
+    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    return eth_addr_equals(flow->dl_dst, eth_addr_stp);
+}
+
+static void
+stp_process_packet(const struct xport *xport, const struct ofpbuf *packet)
+{
+    struct ofpbuf payload = *packet;
+    struct eth_header *eth = payload.data;
+    struct stp_port *sp = xport->xbridge->stp && xport->stp_port_no
+        ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
+        : NULL;
+
+    /* Sink packets on ports that have STP disabled when the bridge has
+     * STP enabled. */
+    if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
+        return;
+    }
+
+    /* Trim off padding on payload. */
+    if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
+        payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN;
+    }
+
+    if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
+        stp_received_bpdu(sp, payload.data, payload.size);
+    }
+}
+
 static struct xport *
 get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
 {
@@ -770,8 +874,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
         struct ofport_dpif *ofport;
 
         ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow,
-                                          &ctx->xout->wc, vid,
-                                          &ctx->xout->tags);
+                                          &ctx->xout->wc, vid);
         xport = ofport ? xport_lookup(ofport) : NULL;
 
         if (!xport) {
@@ -834,8 +937,9 @@ update_learning_table(const struct xbridge *xbridge,
         return;
     }
 
+    ovs_rwlock_wrlock(&xbridge->ml->rwlock);
     if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
-        return;
+        goto out;
     }
 
     mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
@@ -845,11 +949,11 @@ update_learning_table(const struct xbridge *xbridge,
         if (!in_xbundle->bond) {
             mac_entry_set_grat_arp_lock(mac);
         } else if (mac_entry_is_grat_arp_locked(mac)) {
-            return;
+            goto out;
         }
     }
 
-    if (mac_entry_is_new(mac) || mac->port.p != in_xbundle->ofbundle) {
+    if (mac->port.p != in_xbundle->ofbundle) {
         /* The log messages here could actually be useful in debugging,
          * so keep the rate limit relatively high. */
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
@@ -859,8 +963,10 @@ update_learning_table(const struct xbridge *xbridge,
                     in_xbundle->name, vlan);
 
         mac->port.p = in_xbundle->ofbundle;
-        mac_learning_changed(xbridge->ml, mac);
+        mac_learning_changed(xbridge->ml);
     }
+out:
+    ovs_rwlock_unlock(&xbridge->ml->rwlock);
 }
 
 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
@@ -896,7 +1002,7 @@ is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
         struct mac_entry *mac;
 
         switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
-                                         flow->dl_dst, &ctx->xout->tags)) {
+                                         flow->dl_dst)) {
         case BV_ACCEPT:
             break;
 
@@ -905,14 +1011,17 @@ is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
             return false;
 
         case BV_DROP_IF_MOVED:
-            mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan, NULL);
+            ovs_rwlock_rdlock(&xbridge->ml->rwlock);
+            mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
             if (mac && mac->port.p != in_xbundle->ofbundle &&
                 (!is_gratuitous_arp(flow, &ctx->xout->wc)
                  || mac_entry_is_grat_arp_locked(mac))) {
+                ovs_rwlock_unlock(&xbridge->ml->rwlock);
                 xlate_report(ctx, "SLB bond thinks this packet looped back, "
                             "dropping");
                 return false;
             }
+            ovs_rwlock_unlock(&xbridge->ml->rwlock);
             break;
         }
     }
@@ -988,8 +1097,8 @@ xlate_normal(struct xlate_ctx *ctx)
     }
 
     /* Determine output bundle. */
-    mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan,
-                              &ctx->xout->tags);
+    ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
+    mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
     if (mac) {
         struct xbundle *mac_xbundle = xbundle_lookup(mac->port.p);
         if (mac_xbundle && mac_xbundle != in_xbundle) {
@@ -1014,6 +1123,7 @@ xlate_normal(struct xlate_ctx *ctx)
         }
         ctx->xout->nf_output_iface = NF_OUT_FLOOD;
     }
+    ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
 }
 
 /* Compose SAMPLE action for sFlow or IPFIX.  The given probability is
@@ -1204,9 +1314,9 @@ process_special(struct xlate_ctx *ctx, const struct flow *flow,
             lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
         }
         return SLOW_LACP;
-    } else if (xbridge->has_stp && stp_should_process_flow(flow, wc)) {
+    } else if (xbridge->stp && stp_should_process_flow(flow, wc)) {
         if (packet) {
-            stp_process_packet(xport->ofport, packet);
+            stp_process_packet(xport, packet);
         }
         return SLOW_STP;
     } else {
@@ -1237,7 +1347,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
     } else if (xport->config & OFPUTIL_PC_NO_FWD) {
         xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
         return;
-    } else if (check_stp && !stp_forward_in_state(xport->stp_state)) {
+    } else if (check_stp && !xport_stp_forward_state(xport)) {
         xlate_report(ctx, "STP not in forwarding state, skipping output");
         return;
     }
@@ -1263,7 +1373,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         if (special) {
             ctx->xout->slow = special;
         } else if (may_receive(peer, ctx)) {
-            if (stp_forward_in_state(peer->stp_state)) {
+            if (xport_stp_forward_state(peer)) {
                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true);
             } else {
                 /* Forwarding is disabled by STP.  Let OFPP_NORMAL and the
@@ -1293,8 +1403,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
     flow_skb_mark = flow->skb_mark;
     flow_nw_tos = flow->nw_tos;
 
-    if (ofproto_dpif_dscp_from_priority(xport->ofport, flow->skb_priority,
-                                        &dscp)) {
+    if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
         wc->masks.nw_tos |= IP_ECN_MASK;
         flow->nw_tos &= ~IP_DSCP_MASK;
         flow->nw_tos |= dscp;
@@ -1405,10 +1514,6 @@ xlate_table_action(struct xlate_ctx *ctx,
                                          &ctx->xin->flow, &ctx->xout->wc,
                                          table_id);
 
-        ctx->xout->tags |= calculate_flow_tag(ctx->xbridge->ofproto,
-                                              &ctx->xin->flow, ctx->table_id,
-                                              rule);
-
         /* Restore the original input port.  Otherwise OFPP_NORMAL and
          * OFPP_IN_PORT will have surprising behavior. */
         ctx->xin->flow.in_port.ofp_port = old_in_port;
@@ -1875,8 +1980,7 @@ may_receive(const struct xport *xport, struct xlate_ctx *ctx)
      * disabled.  If just learning is enabled, we need to have
      * OFPP_NORMAL and the learning action have a look at the packet
      * before we can drop it. */
-    if (!stp_forward_in_state(xport->stp_state)
-        && !stp_learn_in_state(xport->stp_state)) {
+    if (!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) {
         return false;
     }
 
@@ -1916,7 +2020,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
         ctx->rule->up.evictable = false;
     }
 
- do_xlate_actions_again:
     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
         struct ofpact_controller *controller;
         const struct ofpact_metadata *metadata;
@@ -2117,35 +2220,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
         case OFPACT_GOTO_TABLE: {
             /* It is assumed that goto-table is the last action. */
             struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
-            struct rule_dpif *rule;
 
             ovs_assert(ctx->table_id < ogt->table_id);
-
-            ctx->table_id = ogt->table_id;
-
-            /* Look up a flow from the new table. */
-            rule = rule_dpif_lookup_in_table(ctx->xbridge->ofproto, flow, wc,
-                                             ctx->table_id);
-
-            ctx->xout->tags |= calculate_flow_tag(ctx->xbridge->ofproto,
-                                                  &ctx->xin->flow,
-                                                  ctx->table_id, rule);
-
-            rule = ctx_rule_hooks(ctx, rule, true);
-
-            if (rule) {
-                if (ctx->rule) {
-                    ctx->rule->up.evictable = was_evictable;
-                }
-                ctx->rule = rule;
-                was_evictable = rule->up.evictable;
-                rule->up.evictable = false;
-
-                /* Tail recursion removal. */
-                ofpacts = rule->up.ofpacts;
-                ofpacts_len = rule->up.ofpacts_len;
-                goto do_xlate_actions_again;
-            }
+            xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
+                               ogt->table_id, true);
             break;
         }
 
@@ -2210,7 +2288,6 @@ void
 xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
 {
     dst->wc = src->wc;
-    dst->tags = src->tags;
     dst->slow = src->slow;
     dst->has_learn = src->has_learn;
     dst->has_normal = src->has_normal;
@@ -2224,6 +2301,41 @@ xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
                src->odp_actions.size);
 }
 \f
+static struct skb_priority_to_dscp *
+get_skb_priority(const struct xport *xport, uint32_t skb_priority)
+{
+    struct skb_priority_to_dscp *pdscp;
+    uint32_t hash;
+
+    hash = hash_int(skb_priority, 0);
+    HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
+        if (pdscp->skb_priority == skb_priority) {
+            return pdscp;
+        }
+    }
+    return NULL;
+}
+
+static bool
+dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
+                       uint8_t *dscp)
+{
+    struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
+    *dscp = pdscp ? pdscp->dscp : 0;
+    return pdscp != NULL;
+}
+
+static void
+clear_skb_priorities(struct xport *xport)
+{
+    struct skb_priority_to_dscp *pdscp, *next;
+
+    HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) {
+        hmap_remove(&xport->skb_priorities, &pdscp->hmap_node);
+        free(pdscp);
+    }
+}
+
 static bool
 actions_output_to_local_port(const struct xlate_ctx *ctx)
 {
@@ -2286,7 +2398,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
     ctx.xin = xin;
     ctx.xout = xout;
-    ctx.xout->tags = 0;
     ctx.xout->slow = 0;
     ctx.xout->has_learn = false;
     ctx.xout->has_normal = false;
@@ -2316,6 +2427,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
     if (tnl_port_should_receive(&ctx.xin->flow)) {
         memset(&wc->masks.tunnel, 0xff, sizeof wc->masks.tunnel);
+        /* skb_mark is currently used only by tunnels but that will likely
+         * change in the future. */
+        memset(&wc->masks.skb_mark, 0xff, sizeof wc->masks.skb_mark);
     }
     if (ctx.xbridge->has_netflow) {
         netflow_mask_wc(flow, wc);
@@ -2392,7 +2506,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
             /* We've let OFPP_NORMAL and the learning action look at the
              * packet, so drop it now if forwarding is disabled. */
-            if (in_port && !stp_forward_in_state(in_port->stp_state)) {
+            if (in_port && !xport_stp_forward_state(in_port)) {
                 ctx.xout->odp_actions.size = sample_actions_len;
             }
         }