X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif.c;h=aa27d62c5b2ab53015c3d23e8570d3d55d6e641e;hb=1e827902be9194d71ea851c9ce2676f65eeed33a;hp=1c1a5243fd008e10b86c10d33fea3049a86649ad;hpb=b0fb94a346e52f36aeef238dd5f9bef9a10c14ef;p=sliver-openvswitch.git diff --git a/lib/dpif.c b/lib/dpif.c index 1c1a5243f..aa27d62c5 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -28,6 +28,7 @@ #include "flow.h" #include "netdev.h" #include "netlink.h" +#include "odp-execute.h" #include "odp-util.h" #include "ofp-errors.h" #include "ofp-print.h" @@ -51,10 +52,9 @@ COVERAGE_DEFINE(dpif_flow_flush); COVERAGE_DEFINE(dpif_flow_get); COVERAGE_DEFINE(dpif_flow_put); COVERAGE_DEFINE(dpif_flow_del); -COVERAGE_DEFINE(dpif_flow_query_list); -COVERAGE_DEFINE(dpif_flow_query_list_n); COVERAGE_DEFINE(dpif_execute); COVERAGE_DEFINE(dpif_purge); +COVERAGE_DEFINE(dpif_execute_with_help); static const struct dpif_class *base_dpif_classes[] = { #ifdef LINUX_DATAPATH @@ -633,14 +633,6 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, return error; } -/* Returns one greater than the maximum port number accepted in flow - * actions. */ -odp_port_t -dpif_get_max_ports(const struct dpif *dpif) -{ - return dpif->dpif_class->get_max_ports(dpif); -} - /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose * packets arrived on port 'port_no'. @@ -892,15 +884,19 @@ dpif_flow_put__(struct dpif *dpif, const struct dpif_flow_put *put) /* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink * attribute OVS_FLOW_ATTR_KEY with types OVS_KEY_ATTR_* in the 'key_len' bytes - * starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in the - * 'mask_len' bytes starting at 'mask'. The associated actions are specified by - * the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len' - * bytes starting at 'actions'. + * starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in + * the 'mask_len' bytes starting at 'mask'. The associated actions are + * specified by the Netlink attributes with types OVS_ACTION_ATTR_* in the + * 'actions_len' bytes starting at 'actions'. * * - If the flow's key does not exist in 'dpif', then the flow will be added if * 'flags' includes DPIF_FP_CREATE. Otherwise the operation will fail with * ENOENT. * + * The datapath may reject attempts to insert overlapping flows with EINVAL + * or EEXIST, but clients should not rely on this: avoiding overlapping flows + * is primarily the client's responsibility. + * * If the operation succeeds, then 'stats', if nonnull, will be zeroed. * * - If the flow's key does exist in 'dpif', then the flow's actions will be @@ -1064,14 +1060,89 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump) return dump->error == EOF ? 0 : dump->error; } +struct dpif_execute_helper_aux { + struct dpif *dpif; + int error; +}; + +/* This is called for actions that need the context of the datapath to be + * meaningful. */ +static void +dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet, + const struct pkt_metadata *md, + const struct nlattr *action, bool may_steal OVS_UNUSED) +{ + struct dpif_execute_helper_aux *aux = aux_; + struct dpif_execute execute; + int type = nl_attr_type(action); + + switch ((enum ovs_action_attr)type) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_USERSPACE: + execute.actions = action; + execute.actions_len = NLA_ALIGN(action->nla_len); + execute.packet = packet; + execute.md = *md; + execute.needs_help = false; + aux->error = aux->dpif->dpif_class->execute(aux->dpif, &execute); + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); + } +} + +/* Executes 'execute' by performing most of the actions in userspace and + * passing the fully constructed packets to 'dpif' for output and userspace + * actions. + * + * This helps with actions that a given 'dpif' doesn't implement directly. */ static int -dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) +dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute) +{ + struct dpif_execute_helper_aux aux = {dpif, 0}; + + COVERAGE_INC(dpif_execute_with_help); + + odp_execute_actions(&aux, execute->packet, &execute->md, + execute->actions, execute->actions_len, + dpif_execute_helper_cb); + return aux.error; +} + +/* Causes 'dpif' to perform the 'execute->actions_len' bytes of actions in + * 'execute->actions' on the Ethernet frame in 'execute->packet' and on packet + * metadata in 'execute->md'. The implementation is allowed to modify both the + * '*execute->packet' and 'execute->md'. + * + * Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. If + * 'needs_help' is true, the dpif layer executes in userspace all of the + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif + * implementation. + * + * This works even if 'execute->actions_len' is too long for a Netlink + * attribute. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +dpif_execute(struct dpif *dpif, struct dpif_execute *execute) { int error; COVERAGE_INC(dpif_execute); if (execute->actions_len > 0) { - error = dpif->dpif_class->execute(dpif, execute); + error = (execute->needs_help || nl_attr_oversized(execute->actions_len) + ? dpif_execute_with_help(dpif, execute) + : dpif->dpif_class->execute(dpif, execute)); } else { error = 0; } @@ -1081,29 +1152,6 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) return error; } -/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on - * the Ethernet frame specified in 'packet' taken from the flow specified in - * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but - * it contains some metadata that cannot be recovered from 'packet', such as - * tunnel and in_port.) - * - * Returns 0 if successful, otherwise a positive errno value. */ -int -dpif_execute(struct dpif *dpif, - const struct nlattr *key, size_t key_len, - const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *buf) -{ - struct dpif_execute execute; - - execute.key = key; - execute.key_len = key_len; - execute.actions = actions; - execute.actions_len = actions_len; - execute.packet = buf; - return dpif_execute__(dpif, &execute); -} - /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in * which they are specified, placing each operation's results in the "output" * members documented in comments. @@ -1113,54 +1161,83 @@ dpif_execute(struct dpif *dpif, void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) { - size_t i; - if (dpif->dpif_class->operate) { - dpif->dpif_class->operate(dpif, ops, n_ops); + while (n_ops > 0) { + size_t chunk; + + /* Count 'chunk', the number of ops that can be executed without + * needing any help. Ops that need help should be rare, so we + * expect this to ordinarily be 'n_ops', that is, all the ops. */ + for (chunk = 0; chunk < n_ops; chunk++) { + struct dpif_op *op = ops[chunk]; + + if (op->type == DPIF_OP_EXECUTE && op->u.execute.needs_help) { + break; + } + } + + if (chunk) { + /* Execute a chunk full of ops that the dpif provider can + * handle itself, without help. */ + size_t i; + + dpif->dpif_class->operate(dpif, ops, chunk); + + for (i = 0; i < chunk; i++) { + struct dpif_op *op = ops[i]; + + switch (op->type) { + case DPIF_OP_FLOW_PUT: + log_flow_put_message(dpif, &op->u.flow_put, op->error); + break; + + case DPIF_OP_FLOW_DEL: + log_flow_del_message(dpif, &op->u.flow_del, op->error); + break; + + case DPIF_OP_EXECUTE: + log_execute_message(dpif, &op->u.execute, op->error); + break; + } + } + + ops += chunk; + n_ops -= chunk; + } else { + /* Help the dpif provider to execute one op. */ + struct dpif_op *op = ops[0]; + + op->error = dpif_execute(dpif, &op->u.execute); + ops++; + n_ops--; + } + } + } else { + size_t i; for (i = 0; i < n_ops; i++) { struct dpif_op *op = ops[i]; switch (op->type) { case DPIF_OP_FLOW_PUT: - log_flow_put_message(dpif, &op->u.flow_put, op->error); + op->error = dpif_flow_put__(dpif, &op->u.flow_put); break; case DPIF_OP_FLOW_DEL: - log_flow_del_message(dpif, &op->u.flow_del, op->error); + op->error = dpif_flow_del__(dpif, &op->u.flow_del); break; case DPIF_OP_EXECUTE: - log_execute_message(dpif, &op->u.execute, op->error); + op->error = dpif_execute(dpif, &op->u.execute); break; - } - } - return; - } - - for (i = 0; i < n_ops; i++) { - struct dpif_op *op = ops[i]; - switch (op->type) { - case DPIF_OP_FLOW_PUT: - op->error = dpif_flow_put__(dpif, &op->u.flow_put); - break; - - case DPIF_OP_FLOW_DEL: - op->error = dpif_flow_del__(dpif, &op->u.flow_del); - break; - - case DPIF_OP_EXECUTE: - op->error = dpif_execute__(dpif, &op->u.execute); - break; - - default: - NOT_REACHED(); + default: + OVS_NOT_REACHED(); + } } } } - /* Returns a string that represents 'type', for use in log messages. */ const char * dpif_upcall_type_to_string(enum dpif_upcall_type type) @@ -1191,10 +1268,15 @@ dpif_recv_set(struct dpif *dpif, bool enable) * '*upcall', using 'buf' for storage. Should only be called if * dpif_recv_set() has been used to enable receiving packets on 'dpif'. * - * 'upcall->packet' and 'upcall->key' point into data in the caller-provided - * 'buf', so their memory cannot be freed separately from 'buf'. (This is - * hardly a great way to do things but it works out OK for the dpif providers - * and clients that exist so far.) + * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided + * 'buf', so their memory cannot be freed separately from 'buf'. + * + * The caller owns the data of 'upcall->packet' and may modify it. If + * packet's headroom is exhausted as it is manipulated, 'upcall->packet' + * will be reallocated. This requires the data of 'upcall->packet' to be + * released with ofpbuf_uninit() before 'upcall' is destroyed. However, + * when an error is returned, the 'upcall->packet' may be uninitialized + * and should not be released. * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */ @@ -1206,8 +1288,8 @@ dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) struct ds flow; char *packet; - packet = ofp_packet_to_string(upcall->packet->data, - upcall->packet->size); + packet = ofp_packet_to_string(upcall->packet.data, + upcall->packet.size); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1352,7 +1434,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation, if (error) { ds_put_format(&ds, "(%s) ", ovs_strerror(error)); } - odp_flow_format(key, key_len, mask, mask_len, &ds, true); + odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds);