X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif.h;h=e7aca8e41e259cfdd64f878f5b1bec18a824bbea;hb=HEAD;hp=a478db2c43040cb3f5fde3383741df0189e76985;hpb=85b20fd6ee585f462e012fbcc7f966a81edab2ed;p=sliver-openvswitch.git diff --git a/lib/dpif.h b/lib/dpif.h index a478db2c4..e7aca8e41 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,10 +35,10 @@ * The terms written in quotes below are defined in later sections. * * When a datapath "port" receives a packet, it extracts the headers (the - * "flow"). If the datapath's "flow table" contains a "flow entry" whose flow - * is the same as the packet's, then it executes the "actions" in the flow - * entry and increments the flow's statistics. If there is no matching flow - * entry, the datapath instead appends the packet to an "upcall" queue. + * "flow"). If the datapath's "flow table" contains a "flow entry" matching + * the packet, then it executes the "actions" in the flow entry and increments + * the flow's statistics. If there is no matching flow entry, the datapath + * instead appends the packet to an "upcall" queue. * * * Ports @@ -61,7 +61,8 @@ * "internal" (for a simulated port used to connect to the TCP/IP stack), * and "gre" (for a GRE tunnel). * - * - A Netlink PID (see "Upcall Queuing and Ordering" below). + * - A Netlink PID for each upcall reading thread (see "Upcall Queuing and + * Ordering" below). * * The dpif interface has functions for adding and deleting ports. When a * datapath implements these (e.g. as the Linux and netdev datapaths do), then @@ -103,12 +104,12 @@ * Flow Table * ========== * - * The flow table is a hash table of "flow entries". Each flow entry contains: + * The flow table is a collection of "flow entries". Each flow entry contains: * * - A "flow", that is, a summary of the headers in an Ethernet packet. The - * flow is the hash key and thus must be unique within the flow table. - * Flows are fine-grained entities that include L2, L3, and L4 headers. A - * single TCP connection consists of two flows, one in each direction. + * flow must be unique within the flow table. Flows are fine-grained + * entities that include L2, L3, and L4 headers. A single TCP connection + * consists of two flows, one in each direction. * * In Open vSwitch userspace, "struct flow" is the typical way to describe * a flow, but the datapath interface uses a different data format to @@ -117,11 +118,37 @@ * "struct ovs_key_*" in include/linux/openvswitch.h for details. * lib/odp-util.h defines several functions for working with these flows. * - * (In case you are familiar with OpenFlow, datapath flows are analogous - * to OpenFlow flow matches. The most important difference is that - * OpenFlow allows fields to be wildcarded and prioritized, whereas a - * datapath's flow table is a hash table so every flow must be - * exact-match, thus without priorities.) + * - A "mask" that, for each bit in the flow, specifies whether the datapath + * should consider the corresponding flow bit when deciding whether a + * given packet matches the flow entry. The original datapath design did + * not support matching: every flow entry was exact match. With the + * addition of a mask, the interface supports datapaths with a spectrum of + * wildcard matching capabilities, from those that only support exact + * matches to those that support bitwise wildcarding on the entire flow + * key, as well as datapaths with capabilities somewhere in between. + * + * Datapaths do not provide a way to query their wildcarding capabilities, + * nor is it expected that the client should attempt to probe for the + * details of their support. Instead, a client installs flows with masks + * that wildcard as many bits as acceptable. The datapath then actually + * wildcards as many of those bits as it can and changes the wildcard bits + * that it does not support into exact match bits. A datapath that can + * wildcard any bit, for example, would install the supplied mask, an + * exact-match only datapath would install an exact-match mask regardless + * of what mask the client supplied, and a datapath in the middle of the + * spectrum would selectively change some wildcard bits into exact match + * bits. + * + * Regardless of the requested or installed mask, the datapath retains the + * original flow supplied by the client. (It does not, for example, "zero + * out" the wildcarded bits.) This allows the client to unambiguously + * identify the flow entry in later flow table operations. + * + * The flow table does not have priorities; that is, all flow entries have + * equal priority. Detecting overlapping flow entries is expensive in + * general, so the datapath is not required to do it. It is primarily the + * client's responsibility not to install flow entries whose flow and mask + * combinations overlap. * * - A list of "actions" that tell the datapath what to do with packets * within a flow. Some examples of actions are OVS_ACTION_ATTR_OUTPUT, @@ -179,10 +206,10 @@ * connection consists of two flows with 1-ms latency to set up each one. * * To receive upcalls, a client has to enable them with dpif_recv_set(). A - * datapath should generally support multiple clients at once (e.g. so that one - * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is - * also running) but need not support multiple clients enabling upcalls at - * once. + * datapath should generally support being opened multiple times (e.g. so that + * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" + * is also running) but need not support more than one of these clients + * enabling upcalls at once. * * * Upcall Queuing and Ordering @@ -235,7 +262,7 @@ * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value * and all upcalls are appended to a single queue. * - * The ideal behavior is: + * The preferred behavior is: * * - Each port has a PID that identifies the queue used for "miss" upcalls * on that port. (Thus, if each port has its own queue for "miss" @@ -249,6 +276,18 @@ * * - Upcalls that specify the "special" Netlink PID are queued separately. * + * Multiple threads may want to read upcalls simultaneously from a single + * datapath. To support multiple threads well, one extends the above preferred + * behavior: + * + * - Each port has multiple PIDs. The datapath distributes "miss" upcalls + * across the PIDs, ensuring that a given flow is mapped in a stable way + * to a single PID. + * + * - For "action" upcalls, the thread can specify its own Netlink PID or + * other threads' Netlink PID of the same port for offloading purpose + * (e.g. in a "round robin" manner). + * * * Packet Format * ============= @@ -317,6 +356,32 @@ * location. * * - Adding and removing ports to achieve a new configuration. + * + * + * Thread-safety + * ============= + * + * Most of the dpif functions are fully thread-safe: they may be called from + * any number of threads on the same or different dpif objects. The exceptions + * are: + * + * - dpif_port_poll() and dpif_port_poll_wait() are conditionally + * thread-safe: they may be called from different threads only on + * different dpif objects. + * + * - dpif_flow_dump_next() is conditionally thread-safe: It may be called + * from different threads with the same 'struct dpif_flow_dump', but all + * other parameters must be different for each thread. + * + * - dpif_flow_dump_done() is conditionally thread-safe: All threads that + * share the same 'struct dpif_flow_dump' must have finished using it. + * This function must then be called exactly once for a particular + * dpif_flow_dump to finish the corresponding flow dump operation. + * + * - Functions that operate on 'struct dpif_port_dump' are conditionally + * thread-safe with respect to those objects. That is, one may dump ports + * from any number of threads at once, but each thread must use its own + * struct dpif_port_dump. */ #ifndef DPIF_H #define DPIF_H 1 @@ -324,9 +389,10 @@ #include #include #include -#include -#include "openflow/openflow.h" #include "netdev.h" +#include "ofpbuf.h" +#include "openflow/openflow.h" +#include "packets.h" #include "util.h" #ifdef __cplusplus @@ -337,7 +403,6 @@ struct dpif; struct ds; struct flow; struct nlattr; -struct ofpbuf; struct sset; struct dpif_class; @@ -370,6 +435,9 @@ struct dpif_dp_stats { uint64_t n_missed; /* Number of flow table misses. */ uint64_t n_lost; /* Number of misses not sent to userspace. */ uint64_t n_flows; /* Number of flows present. */ + uint64_t n_mask_hit; /* Number of mega flow masks visited for + flow table matches. */ + uint32_t n_masks; /* Number of mega flow masks. */ }; int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *); @@ -378,8 +446,8 @@ int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *); const char *dpif_port_open_type(const char *datapath_type, const char *port_type); -int dpif_port_add(struct dpif *, struct netdev *, uint32_t *port_nop); -int dpif_port_del(struct dpif *, uint32_t port_no); +int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop); +int dpif_port_del(struct dpif *, odp_port_t port_no); /* A port within a datapath. * @@ -387,19 +455,19 @@ int dpif_port_del(struct dpif *, uint32_t port_no); struct dpif_port { char *name; /* Network device name, e.g. "eth0". */ char *type; /* Network device type, e.g. "system". */ - uint32_t port_no; /* Port number within datapath. */ + odp_port_t port_no; /* Port number within datapath. */ }; void dpif_port_clone(struct dpif_port *, const struct dpif_port *); void dpif_port_destroy(struct dpif_port *); bool dpif_port_exists(const struct dpif *dpif, const char *devname); -int dpif_port_query_by_number(const struct dpif *, uint32_t port_no, +int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no, struct dpif_port *); int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); -int dpif_port_get_name(struct dpif *, uint32_t port_no, +int dpif_port_get_name(struct dpif *, odp_port_t port_no, char *name, size_t name_size); -int dpif_get_max_ports(const struct dpif *); -uint32_t dpif_port_get_pid(const struct dpif *, uint32_t port_no); +uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no, + uint32_t hash); struct dpif_port_dump { const struct dpif *dpif; @@ -432,7 +500,7 @@ struct dpif_flow_stats { uint64_t n_packets; uint64_t n_bytes; long long int used; - uint8_t tcp_flags; + uint16_t tcp_flags; }; void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet, @@ -448,6 +516,7 @@ enum dpif_flow_put_flags { int dpif_flow_flush(struct dpif *); int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags, const struct nlattr *key, size_t key_len, + const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, struct dpif_flow_stats *); int dpif_flow_del(struct dpif *, @@ -459,22 +528,19 @@ int dpif_flow_get(const struct dpif *, struct dpif_flow_dump { const struct dpif *dpif; - int error; - void *state; + void *iter; }; -void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); -bool dpif_flow_dump_next(struct dpif_flow_dump *, +void dpif_flow_dump_state_init(const struct dpif *, void **statep); +int dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); +bool dpif_flow_dump_next(struct dpif_flow_dump *, void *state, const struct nlattr **key, size_t *key_len, + const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **); +bool dpif_flow_dump_next_may_destroy_keys(struct dpif_flow_dump *dump, + void *state); int dpif_flow_dump_done(struct dpif_flow_dump *); - -/* Packet operations. */ - -int dpif_execute(struct dpif *, - const struct nlattr *key, size_t key_len, - const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *); +void dpif_flow_dump_state_uninit(const struct dpif *, void *state); /* Operation batching interface. * @@ -493,6 +559,8 @@ struct dpif_flow_put { enum dpif_flow_put_flags flags; /* DPIF_FP_*. */ const struct nlattr *key; /* Flow to put. */ size_t key_len; /* Length of 'key' in bytes. */ + const struct nlattr *mask; /* Mask to put. */ + size_t mask_len; /* Length of 'mask' in bytes. */ const struct nlattr *actions; /* Actions to perform on flow. */ size_t actions_len; /* Length of 'actions' in bytes. */ @@ -510,13 +578,24 @@ struct dpif_flow_del { }; struct dpif_execute { - const struct nlattr *key; /* Partial flow key (only for metadata). */ - size_t key_len; /* Length of 'key' in bytes. */ + /* Raw support for execute passed along to the provider. */ const struct nlattr *actions; /* Actions to execute on packet. */ size_t actions_len; /* Length of 'actions' in bytes. */ - const struct ofpbuf *packet; /* Packet to execute. */ + struct ofpbuf *packet; /* Packet to execute. */ + struct pkt_metadata md; /* Packet metadata. */ + + /* Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. + * + * If this member is set to true, the dpif layer executes in userspace all + * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the + * dpif implementation. */ + bool needs_help; }; +int dpif_execute(struct dpif *, struct dpif_execute *); + struct dpif_op { enum dpif_op_type type; int error; @@ -541,26 +620,30 @@ const char *dpif_upcall_type_to_string(enum dpif_upcall_type); /* A packet passed up from the datapath to userspace. * - * If 'key' or 'actions' is nonnull, then it points into data owned by - * 'packet', so their memory cannot be freed separately. (This is hardly a - * great way to do things but it works out OK for the dpif providers and - * clients that exist so far.) + * The 'packet', 'key' and 'userdata' may point into data in a buffer + * provided by the caller, so the buffer should be released only after the + * upcall processing has been finished. + * + * While being processed, the 'packet' may be reallocated, so the packet must + * be separately released with ofpbuf_uninit(). */ struct dpif_upcall { /* All types. */ enum dpif_upcall_type type; - struct ofpbuf *packet; /* Packet data. */ + struct ofpbuf packet; /* Packet data. */ struct nlattr *key; /* Flow key. */ size_t key_len; /* Length of 'key' in bytes. */ /* DPIF_UC_ACTION only. */ - uint64_t userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ + struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ }; int dpif_recv_set(struct dpif *, bool enable); -int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *); +int dpif_handlers_set(struct dpif *, uint32_t n_handlers); +int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, + struct ofpbuf *); void dpif_recv_purge(struct dpif *); -void dpif_recv_wait(struct dpif *); +void dpif_recv_wait(struct dpif *, uint32_t handler_id); /* Miscellaneous. */