ofproto/bond: Minor cleanups.
[sliver-openvswitch.git] / ofproto / ofproto-dpif-upcall.c
index 343181b..938cfde 100644 (file)
@@ -32,6 +32,7 @@
 #include "ofproto-dpif-ipfix.h"
 #include "ofproto-dpif-sflow.h"
 #include "ofproto-dpif-xlate.h"
+#include "ovs-rcu.h"
 #include "packets.h"
 #include "poll-loop.h"
 #include "seq.h"
@@ -41,7 +42,6 @@
 #define MAX_QUEUE_LENGTH 512
 #define FLOW_MISS_MAX_BATCH 50
 #define REVALIDATE_MAX_BATCH 50
-#define MAX_IDLE 1500
 
 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
 
@@ -230,6 +230,7 @@ static void *udpif_revalidator(void *);
 static uint64_t udpif_get_n_flows(struct udpif *);
 static void revalidate_udumps(struct revalidator *, struct list *udumps);
 static void revalidator_sweep(struct revalidator *);
+static void revalidator_purge(struct revalidator *);
 static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
                                 const char *argv[], void *aux);
 static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
@@ -279,15 +280,12 @@ void
 udpif_destroy(struct udpif *udpif)
 {
     udpif_set_threads(udpif, 0, 0);
-    udpif_flush();
+    udpif_flush(udpif);
 
     list_remove(&udpif->list_node);
     latch_destroy(&udpif->exit_latch);
     seq_destroy(udpif->reval_seq);
     seq_destroy(udpif->dump_seq);
-    atomic_destroy(&udpif->flow_limit);
-    atomic_destroy(&udpif->n_flows);
-    atomic_destroy(&udpif->n_flows_timestamp);
     ovs_mutex_destroy(&udpif->n_flows_mutex);
     free(udpif);
 }
@@ -300,6 +298,9 @@ void
 udpif_set_threads(struct udpif *udpif, size_t n_handlers,
                   size_t n_revalidators)
 {
+    int error;
+
+    ovsrcu_quiesce_start();
     /* Stop the old threads (if any). */
     if (udpif->handlers &&
         (udpif->n_handlers != n_handlers
@@ -332,7 +333,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers,
         for (i = 0; i < udpif->n_revalidators; i++) {
             struct revalidator *revalidator = &udpif->revalidators[i];
             struct udpif_flow_dump *udump, *next_udump;
-            struct udpif_key *ukey, *next_ukey;
 
             LIST_FOR_EACH_SAFE (udump, next_udump, list_node,
                                 &revalidator->udumps) {
@@ -340,10 +340,9 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers,
                 free(udump);
             }
 
-            HMAP_FOR_EACH_SAFE (ukey, next_ukey, hmap_node,
-                                &revalidator->ukeys) {
-                ukey_delete(revalidator, ukey);
-            }
+            /* Delete ukeys, and delete all flows from the datapath to prevent
+             * double-counting stats. */
+            revalidator_purge(revalidator);
             hmap_destroy(&revalidator->ukeys);
             ovs_mutex_destroy(&revalidator->mutex);
 
@@ -374,6 +373,13 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers,
         udpif->n_handlers = 0;
     }
 
+    error = dpif_handlers_set(udpif->dpif, 1);
+    if (error) {
+        VLOG_ERR("failed to configure handlers in dpif %s: %s",
+                 dpif_name(udpif->dpif), ovs_strerror(error));
+        return;
+    }
+
     /* Start new threads (if necessary). */
     if (!udpif->handlers && n_handlers) {
         size_t i;
@@ -410,6 +416,8 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers,
         xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif);
         xpthread_create(&udpif->flow_dumper, NULL, udpif_flow_dumper, udpif);
     }
+
+    ovsrcu_quiesce_end();
 }
 
 /* Waits for all ongoing upcall translations to complete.  This ensures that
@@ -475,16 +483,31 @@ udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
     }
 }
 
-/* Removes all flows from all datapaths. */
+/* Remove flows from a single datapath. */
 void
-udpif_flush(void)
+udpif_flush(struct udpif *udpif)
+{
+    size_t n_handlers, n_revalidators;
+
+    n_handlers = udpif->n_handlers;
+    n_revalidators = udpif->n_revalidators;
+
+    udpif_set_threads(udpif, 0, 0);
+    dpif_flow_flush(udpif->dpif);
+    udpif_set_threads(udpif, n_handlers, n_revalidators);
+}
+
+/* Removes all flows from all datapaths. */
+static void
+udpif_flush_all_datapaths(void)
 {
     struct udpif *udpif;
 
     LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-        dpif_flow_flush(udpif->dpif);
+        udpif_flush(udpif);
     }
 }
+
 \f
 /* Destroys and deallocates 'upcall'. */
 static void
@@ -529,7 +552,7 @@ udpif_dispatcher(void *arg)
     set_subprogram_name("dispatcher");
     while (!latch_is_set(&udpif->exit_latch)) {
         recv_upcalls(udpif);
-        dpif_recv_wait(udpif->dpif);
+        dpif_recv_wait(udpif->dpif, 0);
         latch_wait(&udpif->exit_latch);
         poll_block();
     }
@@ -553,6 +576,8 @@ udpif_flow_dumper(void *arg)
         bool need_revalidate;
         uint64_t reval_seq;
         size_t n_flows, i;
+        int error;
+        void *state = NULL;
 
         reval_seq = seq_read(udpif->reval_seq);
         need_revalidate = udpif->last_reval_seq != reval_seq;
@@ -563,9 +588,14 @@ udpif_flow_dumper(void *arg)
         udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
 
         start_time = time_msec();
-        dpif_flow_dump_start(&dump, udpif->dpif);
-        while (dpif_flow_dump_next(&dump, &key, &key_len, &mask, &mask_len,
-                                   NULL, NULL, &stats)
+        error = dpif_flow_dump_start(&dump, udpif->dpif);
+        if (error) {
+            VLOG_INFO("Failed to start flow dump (%s)", ovs_strerror(error));
+            goto skip;
+        }
+        dpif_flow_dump_state_init(udpif->dpif, &state);
+        while (dpif_flow_dump_next(&dump, state, &key, &key_len,
+                                   &mask, &mask_len, NULL, NULL, &stats)
                && !latch_is_set(&udpif->exit_latch)) {
             struct udpif_flow_dump *udump = xmalloc(sizeof *udump);
             struct revalidator *revalidator;
@@ -596,6 +626,7 @@ udpif_flow_dumper(void *arg)
             xpthread_cond_signal(&revalidator->wake_cond);
             ovs_mutex_unlock(&revalidator->mutex);
         }
+        dpif_flow_dump_state_uninit(udpif->dpif, state);
         dpif_flow_dump_done(&dump);
 
         /* Let all the revalidators finish and garbage collect. */
@@ -638,7 +669,8 @@ udpif_flow_dumper(void *arg)
                       duration);
         }
 
-        poll_timer_wait_until(start_time + MIN(MAX_IDLE, 500));
+skip:
+        poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500));
         seq_wait(udpif->reval_seq, udpif->last_reval_seq);
         latch_wait(&udpif->exit_latch);
         poll_block();
@@ -663,7 +695,10 @@ udpif_upcall_handler(void *arg)
         size_t i;
 
         ovs_mutex_lock(&handler->mutex);
-        if (!handler->n_upcalls) {
+        /* Must check the 'exit_latch' again to make sure the main thread is
+         * not joining on the handler thread. */
+        if (!handler->n_upcalls
+            && !latch_is_set(&handler->udpif->exit_latch)) {
             ovs_mutex_cond_wait(&handler->wake_cond, &handler->mutex);
         }
 
@@ -801,7 +836,7 @@ recv_upcalls(struct udpif *udpif)
         upcall = xmalloc(sizeof *upcall);
         ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
                         sizeof upcall->upcall_stub);
-        error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
+        error = dpif_recv(udpif->dpif, 0, &upcall->dpif_upcall,
                           &upcall->upcall_buf);
         if (error) {
             /* upcall_destroy() can only be called on successfully received
@@ -889,7 +924,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
     port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
         ? ODPP_NONE
         : odp_in_port;
-    pid = dpif_port_get_pid(udpif->dpif, port);
+    pid = dpif_port_get_pid(udpif->dpif, port, 0);
     odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
 }
 
@@ -984,10 +1019,9 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
         type = classify_upcall(upcall);
         if (type == MISS_UPCALL) {
             uint32_t hash;
+            struct pkt_metadata md = pkt_metadata_from_flow(&flow);
 
-            flow_extract(packet, flow.skb_priority, flow.pkt_mark,
-                         &flow.tunnel, &flow.in_port, &miss->flow);
-
+            flow_extract(packet, &md, &miss->flow);
             hash = flow_hash(&miss->flow, 0);
             existing_miss = flow_miss_find(&misses, ofproto, &miss->flow,
                                            hash);
@@ -1008,8 +1042,8 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
             } else {
                 miss = existing_miss;
             }
-            miss->stats.tcp_flags |= packet_get_tcp_flags(packet, &miss->flow);
-            miss->stats.n_bytes += packet->size;
+            miss->stats.tcp_flags |= ntohs(miss->flow.tcp_flags);
+            miss->stats.n_bytes += ofpbuf_size(packet);
             miss->stats.n_packets++;
 
             upcall->flow_miss = miss;
@@ -1130,7 +1164,7 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
              * the packet contained no VLAN.  So, we must remove the
              * VLAN header from the packet before trying to execute the
              * actions. */
-            if (miss->xout.odp_actions.size) {
+            if (ofpbuf_size(&miss->xout.odp_actions)) {
                 eth_pop_vlan(packet);
             }
 
@@ -1170,21 +1204,21 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
             op->u.flow_put.flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
             op->u.flow_put.key = miss->key;
             op->u.flow_put.key_len = miss->key_len;
-            op->u.flow_put.mask = mask.data;
-            op->u.flow_put.mask_len = mask.size;
+            op->u.flow_put.mask = ofpbuf_data(&mask);
+            op->u.flow_put.mask_len = ofpbuf_size(&mask);
             op->u.flow_put.stats = NULL;
 
             if (!miss->xout.slow) {
-                op->u.flow_put.actions = miss->xout.odp_actions.data;
-                op->u.flow_put.actions_len = miss->xout.odp_actions.size;
+                op->u.flow_put.actions = ofpbuf_data(&miss->xout.odp_actions);
+                op->u.flow_put.actions_len = ofpbuf_size(&miss->xout.odp_actions);
             } else {
                 struct ofpbuf buf;
 
                 ofpbuf_use_stack(&buf, miss->slow_path_buf,
                                  sizeof miss->slow_path_buf);
                 compose_slow_path(udpif, &miss->xout, miss->odp_in_port, &buf);
-                op->u.flow_put.actions = buf.data;
-                op->u.flow_put.actions_len = buf.size;
+                op->u.flow_put.actions = ofpbuf_data(&buf);
+                op->u.flow_put.actions_len = ofpbuf_size(&buf);
             }
         }
 
@@ -1194,15 +1228,15 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
          * upcall. */
         miss->flow.vlan_tci = flow_vlan_tci;
 
-        if (miss->xout.odp_actions.size) {
+        if (ofpbuf_size(&miss->xout.odp_actions)) {
 
             op = &ops[n_ops++];
             op->type = DPIF_OP_EXECUTE;
             op->u.execute.packet = packet;
             odp_key_to_pkt_metadata(miss->key, miss->key_len,
                                     &op->u.execute.md);
-            op->u.execute.actions = miss->xout.odp_actions.data;
-            op->u.execute.actions_len = miss->xout.odp_actions.size;
+            op->u.execute.actions = ofpbuf_data(&miss->xout.odp_actions);
+            op->u.execute.actions_len = ofpbuf_size(&miss->xout.odp_actions);
             op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0;
         }
     }
@@ -1223,14 +1257,14 @@ handle_upcalls(struct handler *handler, struct list *upcalls)
             struct ofproto_packet_in *pin;
 
             pin = xmalloc(sizeof *pin);
-            pin->up.packet = xmemdup(packet->data, packet->size);
-            pin->up.packet_len = packet->size;
+            pin->up.packet = xmemdup(ofpbuf_data(packet), ofpbuf_size(packet));
+            pin->up.packet_len = ofpbuf_size(packet);
             pin->up.reason = OFPR_NO_MATCH;
             pin->up.table_id = 0;
             pin->up.cookie = OVS_BE64_MAX;
             flow_get_metadata(&miss->flow, &pin->up.fmd);
             pin->send_len = 0; /* Not used for flow table misses. */
-            pin->generated_by_table_miss = false;
+            pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
             ofproto_dpif_send_packet_in(miss->ofproto, pin);
         }
     }
@@ -1298,6 +1332,7 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
     struct ofpbuf xout_actions, *actions;
     uint64_t slow_path_buf[128 / 8];
     struct xlate_out xout, *xoutp;
+    struct netflow *netflow;
     struct flow flow, udump_mask;
     struct ofproto_dpif *ofproto;
     struct dpif_flow_stats push;
@@ -1311,6 +1346,7 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
     ok = false;
     xoutp = NULL;
     actions = NULL;
+    netflow = NULL;
 
     /* If we don't need to revalidate, we can simply push the stats contained
      * in the udump, otherwise we'll have to get the actions so we can check
@@ -1338,7 +1374,7 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
     }
 
     error = xlate_receive(udpif->backer, NULL, ukey->key, ukey->key_len, &flow,
-                          &ofproto, NULL, NULL, NULL, &odp_in_port);
+                          &ofproto, NULL, NULL, &netflow, &odp_in_port);
     if (error) {
         goto exit;
     }
@@ -1356,8 +1392,8 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
     }
 
     if (!xout.slow) {
-        ofpbuf_use_const(&xout_actions, xout.odp_actions.data,
-                         xout.odp_actions.size);
+        ofpbuf_use_const(&xout_actions, ofpbuf_data(&xout.odp_actions),
+                         ofpbuf_size(&xout.odp_actions));
     } else {
         ofpbuf_use_stack(&xout_actions, slow_path_buf, sizeof slow_path_buf);
         compose_slow_path(udpif, &xout, odp_in_port, &xout_actions);
@@ -1387,6 +1423,13 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
     ok = true;
 
 exit:
+    if (netflow) {
+        if (!ok) {
+            netflow_expire(netflow, &flow);
+            netflow_flow_clear(netflow, &flow);
+        }
+        netflow_unref(netflow);
+    }
     ofpbuf_delete(actions);
     xlate_out_uninit(xoutp);
     return ok;
@@ -1467,12 +1510,17 @@ push_dump_ops(struct revalidator *revalidator,
     }
 
     for (i = 0; i < n_ops; i++) {
-        struct udpif_key *ukey = ops[i].ukey;
+        struct udpif_key *ukey;
 
-        /* Look up the ukey to prevent double-free in case 'ops' contains a
-         * given ukey more than once (which can happen if the datapath dumps a
-         * given flow more than once). */
-        ukey = ukey_lookup(revalidator, ops[i].udump);
+        /* If there's a udump, this ukey came directly from a datapath flow
+         * dump.  Sometimes a datapath can send duplicates in flow dumps, in
+         * which case we wouldn't want to double-free a ukey, so avoid that by
+         * looking up the ukey again.
+         *
+         * If there's no udump then we know what we're doing. */
+        ukey = (ops[i].udump
+                ? ukey_lookup(revalidator, ops[i].udump)
+                : ops[i].ukey);
         if (ukey) {
             ukey_delete(revalidator, ukey);
         }
@@ -1496,7 +1544,7 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps)
     n_flows = udpif_get_n_flows(udpif);
 
     must_del = false;
-    max_idle = MAX_IDLE;
+    max_idle = ofproto_max_idle;
     if (n_flows > flow_limit) {
         must_del = n_flows > 2 * flow_limit;
         max_idle = 100;
@@ -1547,17 +1595,46 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps)
 }
 
 static void
-revalidator_sweep(struct revalidator *revalidator)
+revalidator_sweep__(struct revalidator *revalidator, bool purge)
 {
+    struct dump_op ops[REVALIDATE_MAX_BATCH];
     struct udpif_key *ukey, *next;
+    size_t n_ops;
+
+    n_ops = 0;
 
     HMAP_FOR_EACH_SAFE (ukey, next, hmap_node, &revalidator->ukeys) {
-        if (ukey->mark) {
+        if (!purge && ukey->mark) {
             ukey->mark = false;
         } else {
-            ukey_delete(revalidator, ukey);
+            struct dump_op *op = &ops[n_ops++];
+
+            /* If we have previously seen a flow in the datapath, but didn't
+             * see it during the most recent dump, delete it. This allows us
+             * to clean up the ukey and keep the statistics consistent. */
+            dump_op_init(op, ukey->key, ukey->key_len, ukey, NULL);
+            if (n_ops == REVALIDATE_MAX_BATCH) {
+                push_dump_ops(revalidator, ops, n_ops);
+                n_ops = 0;
+            }
         }
     }
+
+    if (n_ops) {
+        push_dump_ops(revalidator, ops, n_ops);
+    }
+}
+
+static void
+revalidator_sweep(struct revalidator *revalidator)
+{
+    revalidator_sweep__(revalidator, false);
+}
+
+static void
+revalidator_purge(struct revalidator *revalidator)
+{
+    revalidator_sweep__(revalidator, true);
 }
 \f
 static void
@@ -1618,7 +1695,7 @@ upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
                                  void *aux OVS_UNUSED)
 {
     atomic_store(&enable_megaflows, false);
-    udpif_flush();
+    udpif_flush_all_datapaths();
     unixctl_command_reply(conn, "megaflows disabled");
 }
 
@@ -1633,7 +1710,7 @@ upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
                                 void *aux OVS_UNUSED)
 {
     atomic_store(&enable_megaflows, true);
-    udpif_flush();
+    udpif_flush_all_datapaths();
     unixctl_command_reply(conn, "megaflows enabled");
 }