Merge branch 'mainstream'
[sliver-openvswitch.git] / lib / dpif.c
index 0d8dd9d..1bac8e8 100644 (file)
@@ -28,6 +28,7 @@
 #include "flow.h"
 #include "netdev.h"
 #include "netlink.h"
+#include "odp-execute.h"
 #include "odp-util.h"
 #include "ofp-errors.h"
 #include "ofp-print.h"
@@ -51,16 +52,16 @@ COVERAGE_DEFINE(dpif_flow_flush);
 COVERAGE_DEFINE(dpif_flow_get);
 COVERAGE_DEFINE(dpif_flow_put);
 COVERAGE_DEFINE(dpif_flow_del);
-COVERAGE_DEFINE(dpif_flow_query_list);
-COVERAGE_DEFINE(dpif_flow_query_list_n);
 COVERAGE_DEFINE(dpif_execute);
 COVERAGE_DEFINE(dpif_purge);
+COVERAGE_DEFINE(dpif_execute_with_help);
 
 static const struct dpif_class *base_dpif_classes[] = {
 #ifdef LINUX_DATAPATH
     &dpif_linux_class,
 #endif
     &dpif_netdev_class,
+    &dpif_planetlab_class,
 };
 
 struct registered_dpif_class {
@@ -71,7 +72,7 @@ static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
 static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist);
 
 /* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */
-static pthread_mutex_t dpif_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER;
 
 /* Rate limit for individual messages going to or from the datapath, output at
  * DBG level.  This is very high because, if these are enabled, it is because
@@ -145,9 +146,9 @@ dp_register_provider(const struct dpif_class *new_class)
 {
     int error;
 
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     error = dp_register_provider__(new_class);
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 
     return error;
 }
@@ -190,9 +191,9 @@ dp_unregister_provider(const char *type)
 
     dp_initialize();
 
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     error = dp_unregister_provider__(type);
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 
     return error;
 }
@@ -202,9 +203,9 @@ dp_unregister_provider(const char *type)
 void
 dp_blacklist_provider(const char *type)
 {
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     sset_add(&dpif_blacklist, type);
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 }
 
 /* Clears 'types' and enumerates the types of all currently registered datapath
@@ -217,21 +218,21 @@ dp_enumerate_types(struct sset *types)
     dp_initialize();
     sset_clear(types);
 
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     SHASH_FOR_EACH(node, &dpif_classes) {
         const struct registered_dpif_class *registered_class = node->data;
         sset_add(types, registered_class->dpif_class->type);
     }
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 }
 
 static void
 dp_class_unref(struct registered_dpif_class *rc)
 {
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     ovs_assert(rc->refcount);
     rc->refcount--;
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 }
 
 static struct registered_dpif_class *
@@ -239,12 +240,12 @@ dp_class_lookup(const char *type)
 {
     struct registered_dpif_class *rc;
 
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     rc = shash_find_data(&dpif_classes, type);
     if (rc) {
         rc->refcount++;
     }
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 
     return rc;
 }
@@ -481,12 +482,12 @@ dpif_port_open_type(const char *datapath_type, const char *port_type)
 
     datapath_type = dpif_normalize_type(datapath_type);
 
-    xpthread_mutex_lock(&dpif_mutex);
+    ovs_mutex_lock(&dpif_mutex);
     rc = shash_find_data(&dpif_classes, datapath_type);
     if (rc && rc->dpif_class->port_open_type) {
         port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type);
     }
-    xpthread_mutex_unlock(&dpif_mutex);
+    ovs_mutex_unlock(&dpif_mutex);
 
     return port_type;
 }
@@ -634,7 +635,7 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
 
 /* Returns one greater than the maximum port number accepted in flow
  * actions. */
-odp_port_t
+uint32_t
 dpif_get_max_ports(const struct dpif *dpif)
 {
     return dpif->dpif_class->get_max_ports(dpif);
@@ -1063,6 +1064,94 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump)
     return dump->error == EOF ? 0 : dump->error;
 }
 
+struct dpif_execute_helper_aux {
+    struct dpif *dpif;
+    int error;
+};
+
+static void
+dpif_execute_helper_execute__(void *aux_, struct ofpbuf *packet,
+                              const struct flow *flow,
+                              const struct nlattr *actions, size_t actions_len)
+{
+    struct dpif_execute_helper_aux *aux = aux_;
+    struct dpif_execute execute;
+    struct odputil_keybuf key_stub;
+    struct ofpbuf key;
+    int error;
+
+    ofpbuf_use_stub(&key, &key_stub, sizeof key_stub);
+    odp_flow_key_from_flow(&key, flow, flow->in_port.odp_port);
+
+    execute.key = key.data;
+    execute.key_len = key.size;
+    execute.actions = actions;
+    execute.actions_len = actions_len;
+    execute.packet = packet;
+    execute.needs_help = false;
+
+    error = aux->dpif->dpif_class->execute(aux->dpif, &execute);
+    if (error) {
+        aux->error = error;
+    }
+}
+
+static void
+dpif_execute_helper_output_cb(void *aux, struct ofpbuf *packet,
+                              const struct flow *flow, odp_port_t out_port)
+{
+    uint64_t actions_stub[DIV_ROUND_UP(NL_A_U32_SIZE, 8)];
+    struct ofpbuf actions;
+
+    ofpbuf_use_stack(&actions, actions_stub, sizeof actions_stub);
+    nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(out_port));
+
+    dpif_execute_helper_execute__(aux, packet, flow,
+                                  actions.data, actions.size);
+}
+
+static void
+dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet,
+                                 const struct flow *flow,
+                                 const struct nlattr *action)
+{
+    dpif_execute_helper_execute__(aux, packet, flow,
+                                  action, NLA_ALIGN(action->nla_len));
+}
+
+/* Executes 'execute' by performing most of the actions in userspace and
+ * passing the fully constructed packets to 'dpif' for output and userspace
+ * actions.
+ *
+ * This helps with actions that a given 'dpif' doesn't implement directly. */
+static int
+dpif_execute_with_help(struct dpif *dpif, const struct dpif_execute *execute)
+{
+    struct dpif_execute_helper_aux aux;
+    enum odp_key_fitness fit;
+    struct ofpbuf *packet;
+    struct flow flow;
+
+    COVERAGE_INC(dpif_execute_with_help);
+
+    fit = odp_flow_key_to_flow(execute->key, execute->key_len, &flow);
+    if (fit == ODP_FIT_ERROR) {
+        return EINVAL;
+    }
+
+    aux.dpif = dpif;
+    aux.error = 0;
+
+    packet = ofpbuf_clone_with_headroom(execute->packet, VLAN_HEADER_LEN);
+    odp_execute_actions(&aux, packet, &flow,
+                        execute->actions, execute->actions_len,
+                        dpif_execute_helper_output_cb,
+                        dpif_execute_helper_userspace_cb);
+    ofpbuf_delete(packet);
+
+    return aux.error;
+}
+
 static int
 dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute)
 {
@@ -1070,7 +1159,9 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute)
 
     COVERAGE_INC(dpif_execute);
     if (execute->actions_len > 0) {
-        error = dpif->dpif_class->execute(dpif, execute);
+        error = (execute->needs_help
+                 ? dpif_execute_with_help(dpif, execute)
+                 : dpif->dpif_class->execute(dpif, execute));
     } else {
         error = 0;
     }
@@ -1086,12 +1177,20 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute)
  * it contains some metadata that cannot be recovered from 'packet', such as
  * tunnel and in_port.)
  *
+ * Some dpif providers do not implement every action.  The Linux kernel
+ * datapath, in particular, does not implement ARP field modification.  If
+ * 'needs_help' is true, the dpif layer executes in userspace all of the
+ * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
+ * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif
+ * implementation.
+ *
  * Returns 0 if successful, otherwise a positive errno value. */
 int
 dpif_execute(struct dpif *dpif,
              const struct nlattr *key, size_t key_len,
              const struct nlattr *actions, size_t actions_len,
-             const struct ofpbuf *buf)
+             const struct ofpbuf *buf,
+             bool needs_help)
 {
     struct dpif_execute execute;
 
@@ -1100,6 +1199,7 @@ dpif_execute(struct dpif *dpif,
     execute.actions = actions;
     execute.actions_len = actions_len;
     execute.packet = buf;
+    execute.needs_help = needs_help;
     return dpif_execute__(dpif, &execute);
 }
 
@@ -1112,54 +1212,83 @@ dpif_execute(struct dpif *dpif,
 void
 dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
 {
-    size_t i;
-
     if (dpif->dpif_class->operate) {
-        dpif->dpif_class->operate(dpif, ops, n_ops);
+        while (n_ops > 0) {
+            size_t chunk;
+
+            /* Count 'chunk', the number of ops that can be executed without
+             * needing any help.  Ops that need help should be rare, so we
+             * expect this to ordinarily be 'n_ops', that is, all the ops. */
+            for (chunk = 0; chunk < n_ops; chunk++) {
+                struct dpif_op *op = ops[chunk];
+
+                if (op->type == DPIF_OP_EXECUTE && op->u.execute.needs_help) {
+                    break;
+                }
+            }
+
+            if (chunk) {
+                /* Execute a chunk full of ops that the dpif provider can
+                 * handle itself, without help. */
+                size_t i;
+
+                dpif->dpif_class->operate(dpif, ops, chunk);
+
+                for (i = 0; i < chunk; i++) {
+                    struct dpif_op *op = ops[i];
+
+                    switch (op->type) {
+                    case DPIF_OP_FLOW_PUT:
+                        log_flow_put_message(dpif, &op->u.flow_put, op->error);
+                        break;
+
+                    case DPIF_OP_FLOW_DEL:
+                        log_flow_del_message(dpif, &op->u.flow_del, op->error);
+                        break;
+
+                    case DPIF_OP_EXECUTE:
+                        log_execute_message(dpif, &op->u.execute, op->error);
+                        break;
+                    }
+                }
+
+                ops += chunk;
+                n_ops -= chunk;
+            } else {
+                /* Help the dpif provider to execute one op. */
+                struct dpif_op *op = ops[0];
+
+                op->error = dpif_execute__(dpif, &op->u.execute);
+                ops++;
+                n_ops--;
+            }
+        }
+    } else {
+        size_t i;
 
         for (i = 0; i < n_ops; i++) {
             struct dpif_op *op = ops[i];
 
             switch (op->type) {
             case DPIF_OP_FLOW_PUT:
-                log_flow_put_message(dpif, &op->u.flow_put, op->error);
+                op->error = dpif_flow_put__(dpif, &op->u.flow_put);
                 break;
 
             case DPIF_OP_FLOW_DEL:
-                log_flow_del_message(dpif, &op->u.flow_del, op->error);
+                op->error = dpif_flow_del__(dpif, &op->u.flow_del);
                 break;
 
             case DPIF_OP_EXECUTE:
-                log_execute_message(dpif, &op->u.execute, op->error);
+                op->error = dpif_execute__(dpif, &op->u.execute);
                 break;
-            }
-        }
-        return;
-    }
-
-    for (i = 0; i < n_ops; i++) {
-        struct dpif_op *op = ops[i];
-
-        switch (op->type) {
-        case DPIF_OP_FLOW_PUT:
-            op->error = dpif_flow_put__(dpif, &op->u.flow_put);
-            break;
-
-        case DPIF_OP_FLOW_DEL:
-            op->error = dpif_flow_del__(dpif, &op->u.flow_del);
-            break;
 
-        case DPIF_OP_EXECUTE:
-            op->error = dpif_execute__(dpif, &op->u.execute);
-            break;
-
-        default:
-            NOT_REACHED();
+            default:
+                NOT_REACHED();
+            }
         }
     }
 }
 
-
 /* Returns a string that represents 'type', for use in log messages. */
 const char *
 dpif_upcall_type_to_string(enum dpif_upcall_type type)
@@ -1351,7 +1480,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation,
     if (error) {
         ds_put_format(&ds, "(%s) ", ovs_strerror(error));
     }
-    odp_flow_format(key, key_len, mask, mask_len, &ds);
+    odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true);
     if (stats) {
         ds_put_cstr(&ds, ", ");
         dpif_flow_stats_format(stats, &ds);