X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif.c;h=aa27d62c5b2ab53015c3d23e8570d3d55d6e641e;hb=1e827902be9194d71ea851c9ce2676f65eeed33a;hp=169be202defb97b8187d2249671f9397e27cd763;hpb=61fb711d271dd34b9dab967c7ba5700483cf568a;p=sliver-openvswitch.git diff --git a/lib/dpif.c b/lib/dpif.c index 169be202d..aa27d62c5 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include "flow.h" #include "netdev.h" #include "netlink.h" +#include "odp-execute.h" #include "odp-util.h" #include "ofp-errors.h" #include "ofp-print.h" @@ -51,16 +52,16 @@ COVERAGE_DEFINE(dpif_flow_flush); COVERAGE_DEFINE(dpif_flow_get); COVERAGE_DEFINE(dpif_flow_put); COVERAGE_DEFINE(dpif_flow_del); -COVERAGE_DEFINE(dpif_flow_query_list); -COVERAGE_DEFINE(dpif_flow_query_list_n); COVERAGE_DEFINE(dpif_execute); COVERAGE_DEFINE(dpif_purge); +COVERAGE_DEFINE(dpif_execute_with_help); static const struct dpif_class *base_dpif_classes[] = { #ifdef LINUX_DATAPATH &dpif_linux_class, #endif &dpif_netdev_class, + &dpif_planetlab_class, }; struct registered_dpif_class { @@ -70,6 +71,9 @@ struct registered_dpif_class { static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes); static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist); +/* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */ +static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER; + /* Rate limit for individual messages going to or from the datapath, output at * DBG level. This is very high because, if these are enabled, it is because * we really need to see them. */ @@ -97,22 +101,20 @@ static void log_execute_message(struct dpif *, const struct dpif_execute *, static void dp_initialize(void) { - static int status = -1; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - if (status < 0) { + if (ovsthread_once_start(&once)) { int i; - status = 0; for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) { dp_register_provider(base_dpif_classes[i]); } + ovsthread_once_done(&once); } } -/* Registers a new datapath provider. After successful registration, new - * datapaths of that type can be opened using dpif_open(). */ -int -dp_register_provider(const struct dpif_class *new_class) +static int +dp_register_provider__(const struct dpif_class *new_class) { struct registered_dpif_class *registered_class; @@ -137,11 +139,25 @@ dp_register_provider(const struct dpif_class *new_class) return 0; } +/* Registers a new datapath provider. After successful registration, new + * datapaths of that type can be opened using dpif_open(). */ +int +dp_register_provider(const struct dpif_class *new_class) +{ + int error; + + ovs_mutex_lock(&dpif_mutex); + error = dp_register_provider__(new_class); + ovs_mutex_unlock(&dpif_mutex); + + return error; +} + /* Unregisters a datapath provider. 'type' must have been previously * registered and not currently be in use by any dpifs. After unregistration * new datapaths of that type cannot be opened using dpif_open(). */ -int -dp_unregister_provider(const char *type) +static int +dp_unregister_provider__(const char *type) { struct shash_node *node; struct registered_dpif_class *registered_class; @@ -165,12 +181,31 @@ dp_unregister_provider(const char *type) return 0; } +/* Unregisters a datapath provider. 'type' must have been previously + * registered and not currently be in use by any dpifs. After unregistration + * new datapaths of that type cannot be opened using dpif_open(). */ +int +dp_unregister_provider(const char *type) +{ + int error; + + dp_initialize(); + + ovs_mutex_lock(&dpif_mutex); + error = dp_unregister_provider__(type); + ovs_mutex_unlock(&dpif_mutex); + + return error; +} + /* Blacklists a provider. Causes future calls of dp_register_provider() with * a dpif_class which implements 'type' to fail. */ void dp_blacklist_provider(const char *type) { + ovs_mutex_lock(&dpif_mutex); sset_add(&dpif_blacklist, type); + ovs_mutex_unlock(&dpif_mutex); } /* Clears 'types' and enumerates the types of all currently registered datapath @@ -183,10 +218,36 @@ dp_enumerate_types(struct sset *types) dp_initialize(); sset_clear(types); + ovs_mutex_lock(&dpif_mutex); SHASH_FOR_EACH(node, &dpif_classes) { const struct registered_dpif_class *registered_class = node->data; sset_add(types, registered_class->dpif_class->type); } + ovs_mutex_unlock(&dpif_mutex); +} + +static void +dp_class_unref(struct registered_dpif_class *rc) +{ + ovs_mutex_lock(&dpif_mutex); + ovs_assert(rc->refcount); + rc->refcount--; + ovs_mutex_unlock(&dpif_mutex); +} + +static struct registered_dpif_class * +dp_class_lookup(const char *type) +{ + struct registered_dpif_class *rc; + + ovs_mutex_lock(&dpif_mutex); + rc = shash_find_data(&dpif_classes, type); + if (rc) { + rc->refcount++; + } + ovs_mutex_unlock(&dpif_mutex); + + return rc; } /* Clears 'names' and enumerates the names of all known created datapaths with @@ -198,14 +259,14 @@ dp_enumerate_types(struct sset *types) int dp_enumerate_names(const char *type, struct sset *names) { - const struct registered_dpif_class *registered_class; + struct registered_dpif_class *registered_class; const struct dpif_class *dpif_class; int error; dp_initialize(); sset_clear(names); - registered_class = shash_find_data(&dpif_classes, type); + registered_class = dp_class_lookup(type); if (!registered_class) { VLOG_WARN("could not enumerate unknown type: %s", type); return EAFNOSUPPORT; @@ -213,11 +274,11 @@ dp_enumerate_names(const char *type, struct sset *names) dpif_class = registered_class->dpif_class; error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0; - if (error) { VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type, - strerror(error)); + ovs_strerror(error)); } + dp_class_unref(registered_class); return error; } @@ -253,8 +314,7 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) dp_initialize(); type = dpif_normalize_type(type); - - registered_class = shash_find_data(&dpif_classes, type); + registered_class = dp_class_lookup(type); if (!registered_class) { VLOG_WARN("could not create datapath %s of unknown type %s", name, type); @@ -266,7 +326,8 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) name, create, &dpif); if (!error) { ovs_assert(dpif->dpif_class == registered_class->dpif_class); - registered_class->refcount++; + } else { + dp_class_unref(registered_class); } exit: @@ -311,10 +372,11 @@ dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp) error = dpif_open(name, type, dpifp); if (error) { VLOG_WARN("datapath %s already exists but cannot be opened: %s", - name, strerror(error)); + name, ovs_strerror(error)); } } else if (error) { - VLOG_WARN("failed to create datapath %s: %s", name, strerror(error)); + VLOG_WARN("failed to create datapath %s: %s", + name, ovs_strerror(error)); } return error; } @@ -325,15 +387,11 @@ void dpif_close(struct dpif *dpif) { if (dpif) { - struct registered_dpif_class *registered_class; - - registered_class = shash_find_data(&dpif_classes, - dpif->dpif_class->type); - ovs_assert(registered_class); - ovs_assert(registered_class->refcount); + struct registered_dpif_class *rc; - registered_class->refcount--; + rc = shash_find_data(&dpif_classes, dpif->dpif_class->type); dpif_uninit(dpif, true); + dp_class_unref(rc); } } @@ -420,18 +478,18 @@ dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) const char * dpif_port_open_type(const char *datapath_type, const char *port_type) { - struct registered_dpif_class *registered_class; + struct registered_dpif_class *rc; datapath_type = dpif_normalize_type(datapath_type); - registered_class = shash_find_data(&dpif_classes, datapath_type); - if (!registered_class - || !registered_class->dpif_class->port_open_type) { - return port_type; + ovs_mutex_lock(&dpif_mutex); + rc = shash_find_data(&dpif_classes, datapath_type); + if (rc && rc->dpif_class->port_open_type) { + port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type); } + ovs_mutex_unlock(&dpif_mutex); - return registered_class->dpif_class->port_open_type( - registered_class->dpif_class, port_type); + return port_type; } /* Attempts to add 'netdev' as a port on 'dpif'. If 'port_nop' is @@ -461,7 +519,7 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) dpif_name(dpif), netdev_name, port_no); } else { VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", - dpif_name(dpif), netdev_name, strerror(error)); + dpif_name(dpif), netdev_name, ovs_strerror(error)); port_no = ODPP_NONE; } if (port_nop) { @@ -518,7 +576,7 @@ dpif_port_exists(const struct dpif *dpif, const char *devname) int error = dpif->dpif_class->port_query_by_name(dpif, devname, NULL); if (error != 0 && error != ENOENT && error != ENODEV) { VLOG_WARN_RL(&error_rl, "%s: failed to query port %s: %s", - dpif_name(dpif), devname, strerror(error)); + dpif_name(dpif), devname, ovs_strerror(error)); } return !error; @@ -541,7 +599,7 @@ dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, } else { memset(port, 0, sizeof *port); VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", - dpif_name(dpif), port_no, strerror(error)); + dpif_name(dpif), port_no, ovs_strerror(error)); } return error; } @@ -570,19 +628,11 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, VLOG_RL(&error_rl, error == ENOENT || error == ENODEV ? VLL_DBG : VLL_WARN, "%s: failed to query port %s: %s", - dpif_name(dpif), devname, strerror(error)); + dpif_name(dpif), devname, ovs_strerror(error)); } return error; } -/* Returns one greater than the maximum port number accepted in flow - * actions. */ -odp_port_t -dpif_get_max_ports(const struct dpif *dpif) -{ - return dpif->dpif_class->get_max_ports(dpif); -} - /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose * packets arrived on port 'port_no'. @@ -834,15 +884,19 @@ dpif_flow_put__(struct dpif *dpif, const struct dpif_flow_put *put) /* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink * attribute OVS_FLOW_ATTR_KEY with types OVS_KEY_ATTR_* in the 'key_len' bytes - * starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in the - * 'mask_len' bytes starting at 'mask'. The associated actions are specified by - * the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len' - * bytes starting at 'actions'. + * starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in + * the 'mask_len' bytes starting at 'mask'. The associated actions are + * specified by the Netlink attributes with types OVS_ACTION_ATTR_* in the + * 'actions_len' bytes starting at 'actions'. * * - If the flow's key does not exist in 'dpif', then the flow will be added if * 'flags' includes DPIF_FP_CREATE. Otherwise the operation will fail with * ENOENT. * + * The datapath may reject attempts to insert overlapping flows with EINVAL + * or EEXIST, but clients should not rely on this: avoiding overlapping flows + * is primarily the client's responsibility. + * * If the operation succeeds, then 'stats', if nonnull, will be zeroed. * * - If the flow's key does exist in 'dpif', then the flow's actions will be @@ -1006,14 +1060,89 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump) return dump->error == EOF ? 0 : dump->error; } +struct dpif_execute_helper_aux { + struct dpif *dpif; + int error; +}; + +/* This is called for actions that need the context of the datapath to be + * meaningful. */ +static void +dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet, + const struct pkt_metadata *md, + const struct nlattr *action, bool may_steal OVS_UNUSED) +{ + struct dpif_execute_helper_aux *aux = aux_; + struct dpif_execute execute; + int type = nl_attr_type(action); + + switch ((enum ovs_action_attr)type) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_USERSPACE: + execute.actions = action; + execute.actions_len = NLA_ALIGN(action->nla_len); + execute.packet = packet; + execute.md = *md; + execute.needs_help = false; + aux->error = aux->dpif->dpif_class->execute(aux->dpif, &execute); + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); + } +} + +/* Executes 'execute' by performing most of the actions in userspace and + * passing the fully constructed packets to 'dpif' for output and userspace + * actions. + * + * This helps with actions that a given 'dpif' doesn't implement directly. */ static int -dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) +dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute) +{ + struct dpif_execute_helper_aux aux = {dpif, 0}; + + COVERAGE_INC(dpif_execute_with_help); + + odp_execute_actions(&aux, execute->packet, &execute->md, + execute->actions, execute->actions_len, + dpif_execute_helper_cb); + return aux.error; +} + +/* Causes 'dpif' to perform the 'execute->actions_len' bytes of actions in + * 'execute->actions' on the Ethernet frame in 'execute->packet' and on packet + * metadata in 'execute->md'. The implementation is allowed to modify both the + * '*execute->packet' and 'execute->md'. + * + * Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. If + * 'needs_help' is true, the dpif layer executes in userspace all of the + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif + * implementation. + * + * This works even if 'execute->actions_len' is too long for a Netlink + * attribute. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +dpif_execute(struct dpif *dpif, struct dpif_execute *execute) { int error; COVERAGE_INC(dpif_execute); if (execute->actions_len > 0) { - error = dpif->dpif_class->execute(dpif, execute); + error = (execute->needs_help || nl_attr_oversized(execute->actions_len) + ? dpif_execute_with_help(dpif, execute) + : dpif->dpif_class->execute(dpif, execute)); } else { error = 0; } @@ -1023,29 +1152,6 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) return error; } -/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on - * the Ethernet frame specified in 'packet' taken from the flow specified in - * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but - * it contains some metadata that cannot be recovered from 'packet', such as - * tunnel and in_port.) - * - * Returns 0 if successful, otherwise a positive errno value. */ -int -dpif_execute(struct dpif *dpif, - const struct nlattr *key, size_t key_len, - const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *buf) -{ - struct dpif_execute execute; - - execute.key = key; - execute.key_len = key_len; - execute.actions = actions; - execute.actions_len = actions_len; - execute.packet = buf; - return dpif_execute__(dpif, &execute); -} - /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in * which they are specified, placing each operation's results in the "output" * members documented in comments. @@ -1055,54 +1161,83 @@ dpif_execute(struct dpif *dpif, void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) { - size_t i; - if (dpif->dpif_class->operate) { - dpif->dpif_class->operate(dpif, ops, n_ops); + while (n_ops > 0) { + size_t chunk; + + /* Count 'chunk', the number of ops that can be executed without + * needing any help. Ops that need help should be rare, so we + * expect this to ordinarily be 'n_ops', that is, all the ops. */ + for (chunk = 0; chunk < n_ops; chunk++) { + struct dpif_op *op = ops[chunk]; + + if (op->type == DPIF_OP_EXECUTE && op->u.execute.needs_help) { + break; + } + } + + if (chunk) { + /* Execute a chunk full of ops that the dpif provider can + * handle itself, without help. */ + size_t i; + + dpif->dpif_class->operate(dpif, ops, chunk); + + for (i = 0; i < chunk; i++) { + struct dpif_op *op = ops[i]; + + switch (op->type) { + case DPIF_OP_FLOW_PUT: + log_flow_put_message(dpif, &op->u.flow_put, op->error); + break; + + case DPIF_OP_FLOW_DEL: + log_flow_del_message(dpif, &op->u.flow_del, op->error); + break; + + case DPIF_OP_EXECUTE: + log_execute_message(dpif, &op->u.execute, op->error); + break; + } + } + + ops += chunk; + n_ops -= chunk; + } else { + /* Help the dpif provider to execute one op. */ + struct dpif_op *op = ops[0]; + + op->error = dpif_execute(dpif, &op->u.execute); + ops++; + n_ops--; + } + } + } else { + size_t i; for (i = 0; i < n_ops; i++) { struct dpif_op *op = ops[i]; switch (op->type) { case DPIF_OP_FLOW_PUT: - log_flow_put_message(dpif, &op->u.flow_put, op->error); + op->error = dpif_flow_put__(dpif, &op->u.flow_put); break; case DPIF_OP_FLOW_DEL: - log_flow_del_message(dpif, &op->u.flow_del, op->error); + op->error = dpif_flow_del__(dpif, &op->u.flow_del); break; case DPIF_OP_EXECUTE: - log_execute_message(dpif, &op->u.execute, op->error); + op->error = dpif_execute(dpif, &op->u.execute); break; - } - } - return; - } - - for (i = 0; i < n_ops; i++) { - struct dpif_op *op = ops[i]; - switch (op->type) { - case DPIF_OP_FLOW_PUT: - op->error = dpif_flow_put__(dpif, &op->u.flow_put); - break; - - case DPIF_OP_FLOW_DEL: - op->error = dpif_flow_del__(dpif, &op->u.flow_del); - break; - - case DPIF_OP_EXECUTE: - op->error = dpif_execute__(dpif, &op->u.execute); - break; - - default: - NOT_REACHED(); + default: + OVS_NOT_REACHED(); + } } } } - /* Returns a string that represents 'type', for use in log messages. */ const char * dpif_upcall_type_to_string(enum dpif_upcall_type type) @@ -1133,10 +1268,15 @@ dpif_recv_set(struct dpif *dpif, bool enable) * '*upcall', using 'buf' for storage. Should only be called if * dpif_recv_set() has been used to enable receiving packets on 'dpif'. * - * 'upcall->packet' and 'upcall->key' point into data in the caller-provided - * 'buf', so their memory cannot be freed separately from 'buf'. (This is - * hardly a great way to do things but it works out OK for the dpif providers - * and clients that exist so far.) + * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided + * 'buf', so their memory cannot be freed separately from 'buf'. + * + * The caller owns the data of 'upcall->packet' and may modify it. If + * packet's headroom is exhausted as it is manipulated, 'upcall->packet' + * will be reallocated. This requires the data of 'upcall->packet' to be + * released with ofpbuf_uninit() before 'upcall' is destroyed. However, + * when an error is returned, the 'upcall->packet' may be uninitialized + * and should not be released. * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */ @@ -1148,8 +1288,8 @@ dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) struct ds flow; char *packet; - packet = ofp_packet_to_string(upcall->packet->data, - upcall->packet->size); + packet = ofp_packet_to_string(upcall->packet.data, + upcall->packet.size); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1256,7 +1396,7 @@ log_operation(const struct dpif *dpif, const char *operation, int error) dpif_name(dpif), operation, ofperr_get_name(error)); } else { VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)", - dpif_name(dpif), operation, strerror(error)); + dpif_name(dpif), operation, ovs_strerror(error)); } } @@ -1292,9 +1432,9 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation, } ds_put_format(&ds, "%s ", operation); if (error) { - ds_put_format(&ds, "(%s) ", strerror(error)); + ds_put_format(&ds, "(%s) ", ovs_strerror(error)); } - odp_flow_format(key, key_len, mask, mask_len, &ds); + odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); @@ -1355,7 +1495,7 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); format_odp_actions(&ds, execute->actions, execute->actions_len); if (error) { - ds_put_format(&ds, " failed (%s)", strerror(error)); + ds_put_format(&ds, " failed (%s)", ovs_strerror(error)); } ds_put_format(&ds, " on packet %s", packet); vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds));