X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif.c;h=3f311aa25a3ce7db3614de1e1de95a0e99ff4cc2;hb=7d78f21c057ff50a823220d809ac38c3d907243c;hp=f3845db8108175464247279b07dff7f9c176a121;hpb=ee75c5460e8a038faf8b17d81facb51cd866b83d;p=sliver-openvswitch.git diff --git a/lib/dpif.c b/lib/dpif.c index f3845db81..3f311aa25 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,10 +57,11 @@ COVERAGE_DEFINE(dpif_purge); COVERAGE_DEFINE(dpif_execute_with_help); static const struct dpif_class *base_dpif_classes[] = { -#ifdef LINUX_DATAPATH +#ifdef __linux__ &dpif_linux_class, #endif &dpif_netdev_class, + &dpif_planetlab_class, }; struct registered_dpif_class { @@ -632,9 +633,18 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, return error; } -/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions - * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose - * packets arrived on port 'port_no'. +/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE + * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in + * flows whose packets arrived on port 'port_no'. In the case where the + * provider allocates multiple Netlink PIDs to a single port, it may use + * 'hash' to spread load among them. The caller need not use a particular + * hash function; a 5-tuple hash is suitable. + * + * (The datapath implementation might use some different hash function for + * distributing packets received via flow misses among PIDs. This means + * that packets received via flow misses might be reordered relative to + * packets received via userspace actions. This is not ordinarily a + * problem.) * * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not * allocated to any port, that the client may use for special purposes. @@ -645,10 +655,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname, * update all of the flows that it installed that contain * OVS_ACTION_ATTR_USERSPACE actions. */ uint32_t -dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no) +dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash) { return (dpif->dpif_class->port_get_pid - ? (dpif->dpif_class->port_get_pid)(dpif, port_no) + ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash) : 0); } @@ -778,8 +788,8 @@ void dpif_flow_stats_extract(const struct flow *flow, const struct ofpbuf *packet, long long int used, struct dpif_flow_stats *stats) { - stats->tcp_flags = packet_get_tcp_flags(packet, flow); - stats->n_bytes = packet->size; + stats->tcp_flags = ntohs(flow->tcp_flags); + stats->n_bytes = ofpbuf_size(packet); stats->n_packets = 1; stats->used = used; } @@ -852,8 +862,8 @@ dpif_flow_get(const struct dpif *dpif, size_t actions_len; if (!error && actionsp) { - actions = (*actionsp)->data; - actions_len = (*actionsp)->size; + actions = ofpbuf_data(*actionsp); + actions_len = ofpbuf_size(*actionsp); } else { actions = NULL; actions_len = 0; @@ -961,26 +971,47 @@ dpif_flow_del(struct dpif *dpif, return dpif_flow_del__(dpif, &del); } -/* Initializes 'dump' to begin dumping the flows in a dpif. - * - * This function provides no status indication. An error status for the entire - * dump operation is provided when it is completed by calling - * dpif_flow_dump_done(). - */ +/* Allocates thread-local state for use with the 'flow_dump_next' function for + * 'dpif'. On return, initializes '*statep' with any private data needed for + * iteration. */ +void +dpif_flow_dump_state_init(const struct dpif *dpif, void **statep) +{ + dpif->dpif_class->flow_dump_state_init(statep); +} + +/* Releases 'state' which was initialized by a call to the + * 'flow_dump_state_init' function for 'dpif'. */ void +dpif_flow_dump_state_uninit(const struct dpif *dpif, void *state) +{ + dpif->dpif_class->flow_dump_state_uninit(state); +} + +/* Initializes 'dump' to begin dumping the flows in a dpif. On sucess, + * initializes 'dump' with any data needed for iteration and returns 0. + * Otherwise, returns a positive errno value describing the problem. */ +int dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif) { + int error; dump->dpif = dpif; - dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state); - log_operation(dpif, "flow_dump_start", dump->error); + error = dpif->dpif_class->flow_dump_start(dpif, &dump->iter); + log_operation(dpif, "flow_dump_start", error); + return error; } -/* Attempts to retrieve another flow from 'dump', which must have been - * initialized with dpif_flow_dump_start(). On success, updates the output - * parameters as described below and returns true. Otherwise, returns false. - * Failure might indicate an actual error or merely the end of the flow table. - * An error status for the entire dump operation is provided when it is - * completed by calling dpif_flow_dump_done(). +/* Attempts to retrieve another flow from 'dump', using 'state' for + * thread-local storage. 'dump' must have been initialized with a successful + * call to dpif_flow_dump_start(), and 'state' must have been initialized with + * dpif_flow_state_init(). + * + * On success, updates the output parameters as described below and returns + * true. Otherwise, returns false. Failure might indicate an actual error or + * merely the end of the flow table. An error status for the entire dump + * operation is provided when it is completed by calling dpif_flow_dump_done(). + * Multiple threads may use the same 'dump' with this function, but all other + * parameters must not be shared. * * On success, if 'key' and 'key_len' are nonnull then '*key' and '*key_len' * will be set to Netlink attributes with types OVS_KEY_ATTR_* representing the @@ -992,27 +1023,20 @@ dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif) * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' guarantees that it remains * accessible and unchanging until at least the next call to 'flow_dump_next' - * or 'flow_dump_done' for 'dump'. */ + * or 'flow_dump_done' for 'dump' and 'state'. */ bool -dpif_flow_dump_next(struct dpif_flow_dump *dump, +dpif_flow_dump_next(struct dpif_flow_dump *dump, void *state, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { const struct dpif *dpif = dump->dpif; - int error = dump->error; + int error; - if (!error) { - error = dpif->dpif_class->flow_dump_next(dpif, dump->state, - key, key_len, - mask, mask_len, - actions, actions_len, - stats); - if (error) { - dpif->dpif_class->flow_dump_done(dpif, dump->state); - } - } + error = dpif->dpif_class->flow_dump_next(dpif, dump->iter, state, + key, key_len, mask, mask_len, + actions, actions_len, stats); if (error) { if (key) { *key = NULL; @@ -1030,33 +1054,50 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, *stats = NULL; } } - if (!dump->error) { - if (error == EOF) { - VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif)); - } else if (should_log_flow_message(error)) { - log_flow_message(dpif, error, "flow_dump", - key ? *key : NULL, key ? *key_len : 0, - mask ? *mask : NULL, mask ? *mask_len : 0, - stats ? *stats : NULL, actions ? *actions : NULL, - actions ? *actions_len : 0); - } + if (error == EOF) { + VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif)); + } else if (should_log_flow_message(error)) { + log_flow_message(dpif, error, "flow_dump", + key ? *key : NULL, key ? *key_len : 0, + mask ? *mask : NULL, mask ? *mask_len : 0, + stats ? *stats : NULL, actions ? *actions : NULL, + actions ? *actions_len : 0); } - dump->error = error; return !error; } +/* Determines whether the next call to 'dpif_flow_dump_next' for 'dump' and + * 'state' will modify or free the keys that it previously returned. 'state' + * must have been initialized by a call to 'dpif_flow_dump_state_init' for + * 'dump'. + * + * 'dpif' guarantees that data returned by flow_dump_next() will remain + * accessible and unchanging until the next call. This function provides a way + * for callers to determine whether that guarantee extends beyond the next + * call. + * + * Returns true if the next call to flow_dump_next() is expected to be + * destructive to previously returned keys for 'state', false otherwise. */ +bool +dpif_flow_dump_next_may_destroy_keys(struct dpif_flow_dump *dump, void *state) +{ + const struct dpif *dpif = dump->dpif; + return (dpif->dpif_class->flow_dump_next_may_destroy_keys + ? dpif->dpif_class->flow_dump_next_may_destroy_keys(state) + : true); +} + /* Completes flow table dump operation 'dump', which must have been initialized - * with dpif_flow_dump_start(). Returns 0 if the dump operation was - * error-free, otherwise a positive errno value describing the problem. */ + * with a successful call to dpif_flow_dump_start(). Returns 0 if the dump + * operation was error-free, otherwise a positive errno value describing the + * problem. */ int dpif_flow_dump_done(struct dpif_flow_dump *dump) { const struct dpif *dpif = dump->dpif; - if (!dump->error) { - dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state); - log_operation(dpif, "flow_dump_done", dump->error); - } - return dump->error == EOF ? 0 : dump->error; + int error = dpif->dpif_class->flow_dump_done(dpif, dump->iter); + log_operation(dpif, "flow_dump_done", error); + return error == EOF ? 0 : error; } struct dpif_execute_helper_aux { @@ -1064,97 +1105,84 @@ struct dpif_execute_helper_aux { int error; }; +/* This is called for actions that need the context of the datapath to be + * meaningful. */ static void -dpif_execute_helper_execute__(void *aux_, struct ofpbuf *packet, - const struct flow *flow, - const struct nlattr *actions, size_t actions_len) +dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet, + struct pkt_metadata *md, + const struct nlattr *action, bool may_steal OVS_UNUSED) { struct dpif_execute_helper_aux *aux = aux_; struct dpif_execute execute; - struct odputil_keybuf key_stub; - struct ofpbuf key; - int error; - - ofpbuf_use_stub(&key, &key_stub, sizeof key_stub); - odp_flow_key_from_flow(&key, flow, flow->in_port.odp_port); - - execute.key = key.data; - execute.key_len = key.size; - execute.actions = actions; - execute.actions_len = actions_len; - execute.packet = packet; - execute.needs_help = false; - - error = aux->dpif->dpif_class->execute(aux->dpif, &execute); - if (error) { - aux->error = error; + int type = nl_attr_type(action); + + switch ((enum ovs_action_attr)type) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_USERSPACE: + execute.actions = action; + execute.actions_len = NLA_ALIGN(action->nla_len); + execute.packet = packet; + execute.md = *md; + execute.needs_help = false; + aux->error = aux->dpif->dpif_class->execute(aux->dpif, &execute); + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_HASH: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); } } -static void -dpif_execute_helper_output_cb(void *aux, struct ofpbuf *packet, - const struct flow *flow, odp_port_t out_port) -{ - uint64_t actions_stub[DIV_ROUND_UP(NL_A_U32_SIZE, 8)]; - struct ofpbuf actions; - - ofpbuf_use_stack(&actions, actions_stub, sizeof actions_stub); - nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(out_port)); - - dpif_execute_helper_execute__(aux, packet, flow, - actions.data, actions.size); -} - -static void -dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet, - const struct flow *flow, - const struct nlattr *action) -{ - dpif_execute_helper_execute__(aux, packet, flow, - action, NLA_ALIGN(action->nla_len)); -} - /* Executes 'execute' by performing most of the actions in userspace and * passing the fully constructed packets to 'dpif' for output and userspace * actions. * * This helps with actions that a given 'dpif' doesn't implement directly. */ static int -dpif_execute_with_help(struct dpif *dpif, const struct dpif_execute *execute) +dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute) { - struct dpif_execute_helper_aux aux; - enum odp_key_fitness fit; - struct ofpbuf *packet; - struct flow flow; + struct dpif_execute_helper_aux aux = {dpif, 0}; COVERAGE_INC(dpif_execute_with_help); - fit = odp_flow_key_to_flow(execute->key, execute->key_len, &flow); - if (fit == ODP_FIT_ERROR) { - return EINVAL; - } - - aux.dpif = dpif; - aux.error = 0; - - packet = ofpbuf_clone_with_headroom(execute->packet, VLAN_HEADER_LEN); - odp_execute_actions(&aux, packet, &flow, + odp_execute_actions(&aux, execute->packet, false, &execute->md, execute->actions, execute->actions_len, - dpif_execute_helper_output_cb, - dpif_execute_helper_userspace_cb); - ofpbuf_delete(packet); - + dpif_execute_helper_cb); return aux.error; } -static int -dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) +/* Causes 'dpif' to perform the 'execute->actions_len' bytes of actions in + * 'execute->actions' on the Ethernet frame in 'execute->packet' and on packet + * metadata in 'execute->md'. The implementation is allowed to modify both the + * '*execute->packet' and 'execute->md'. + * + * Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. If + * 'needs_help' is true, the dpif layer executes in userspace all of the + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif + * implementation. + * + * This works even if 'execute->actions_len' is too long for a Netlink + * attribute. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +dpif_execute(struct dpif *dpif, struct dpif_execute *execute) { int error; COVERAGE_INC(dpif_execute); if (execute->actions_len > 0) { - error = (execute->needs_help + error = (execute->needs_help || nl_attr_oversized(execute->actions_len) ? dpif_execute_with_help(dpif, execute) : dpif->dpif_class->execute(dpif, execute)); } else { @@ -1166,40 +1194,6 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) return error; } -/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on - * the Ethernet frame specified in 'packet' taken from the flow specified in - * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but - * it contains some metadata that cannot be recovered from 'packet', such as - * tunnel and in_port.) - * - * Some dpif providers do not implement every action. The Linux kernel - * datapath, in particular, does not implement ARP field modification. If - * 'needs_help' is true, the dpif layer executes in userspace all of the - * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and - * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif - * implementation. - * - * This works even if 'actions_len' is too long for a Netlink attribute. - * - * Returns 0 if successful, otherwise a positive errno value. */ -int -dpif_execute(struct dpif *dpif, - const struct nlattr *key, size_t key_len, - const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *buf, - bool needs_help) -{ - struct dpif_execute execute; - - execute.key = key; - execute.key_len = key_len; - execute.actions = actions; - execute.actions_len = actions_len; - execute.packet = buf; - execute.needs_help = needs_help || nl_attr_oversized(actions_len); - return dpif_execute__(dpif, &execute); -} - /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in * which they are specified, placing each operation's results in the "output" * members documented in comments. @@ -1255,7 +1249,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) /* Help the dpif provider to execute one op. */ struct dpif_op *op = ops[0]; - op->error = dpif_execute__(dpif, &op->u.execute); + op->error = dpif_execute(dpif, &op->u.execute); ops++; n_ops--; } @@ -1276,11 +1270,11 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) break; case DPIF_OP_EXECUTE: - op->error = dpif_execute__(dpif, &op->u.execute); + op->error = dpif_execute(dpif, &op->u.execute); break; default: - NOT_REACHED(); + OVS_NOT_REACHED(); } } } @@ -1312,27 +1306,63 @@ dpif_recv_set(struct dpif *dpif, bool enable) return error; } -/* Polls for an upcall from 'dpif'. If successful, stores the upcall into - * '*upcall', using 'buf' for storage. Should only be called if - * dpif_recv_set() has been used to enable receiving packets on 'dpif'. +/* Refreshes the poll loops and Netlink sockets associated to each port, + * when the number of upcall handlers (upcall receiving thread) is changed + * to 'n_handlers' and receiving packets for 'dpif' is enabled by + * recv_set(). + * + * Since multiple upcall handlers can read upcalls simultaneously from + * 'dpif', each port can have multiple Netlink sockets, one per upcall + * handler. So, handlers_set() is responsible for the following tasks: + * + * When receiving upcall is enabled, extends or creates the + * configuration to support: + * + * - 'n_handlers' Netlink sockets for each port. + * + * - 'n_handlers' poll loops, one for each upcall handler. + * + * - registering the Netlink sockets for the same upcall handler to + * the corresponding poll loop. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers) +{ + int error = dpif->dpif_class->handlers_set(dpif, n_handlers); + log_operation(dpif, "handlers_set", error); + return error; +} + +/* Polls for an upcall from 'dpif' for an upcall handler. Since there + * there can be multiple poll loops, 'handler_id' is needed as index to + * identify the corresponding poll loop. If successful, stores the upcall + * into '*upcall', using 'buf' for storage. Should only be called if + * 'recv_set' has been used to enable receiving packets from 'dpif'. + * + * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided + * 'buf', so their memory cannot be freed separately from 'buf'. * - * 'upcall->packet' and 'upcall->key' point into data in the caller-provided - * 'buf', so their memory cannot be freed separately from 'buf'. (This is - * hardly a great way to do things but it works out OK for the dpif providers - * and clients that exist so far.) + * The caller owns the data of 'upcall->packet' and may modify it. If + * packet's headroom is exhausted as it is manipulated, 'upcall->packet' + * will be reallocated. This requires the data of 'upcall->packet' to be + * released with ofpbuf_uninit() before 'upcall' is destroyed. However, + * when an error is returned, the 'upcall->packet' may be uninitialized + * and should not be released. * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */ int -dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) +dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall, + struct ofpbuf *buf) { - int error = dpif->dpif_class->recv(dpif, upcall, buf); + int error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf); if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) { struct ds flow; char *packet; - packet = ofp_packet_to_string(upcall->packet->data, - upcall->packet->size); + packet = ofp_packet_to_string(ofpbuf_data(&upcall->packet), + ofpbuf_size(&upcall->packet)); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1360,12 +1390,14 @@ dpif_recv_purge(struct dpif *dpif) } } -/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be - * received with dpif_recv(). */ +/* Arranges for the poll loop for an upcall handler to wake up when 'dpif' + * 'dpif' has a message queued to be received with the recv member + * function. Since there can be multiple poll loops, 'handler_id' is + * needed as index to identify the corresponding poll loop. */ void -dpif_recv_wait(struct dpif *dpif) +dpif_recv_wait(struct dpif *dpif, uint32_t handler_id) { - dpif->dpif_class->recv_wait(dpif); + dpif->dpif_class->recv_wait(dpif, handler_id); } /* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type' @@ -1533,8 +1565,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, struct ds ds = DS_EMPTY_INITIALIZER; char *packet; - packet = ofp_packet_to_string(execute->packet->data, - execute->packet->size); + packet = ofp_packet_to_string(ofpbuf_data(execute->packet), + ofpbuf_size(execute->packet)); ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); format_odp_actions(&ds, execute->actions, execute->actions_len); if (error) {