X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=dummynet2%2Finclude%2Fnetinet%2Fip_dummynet.h;fp=dummynet2%2Finclude%2Fnetinet%2Fip_dummynet.h;h=6795d7c254a55210096836088526c638de0e1e0e;hb=40445faa1db58b90083115bc315d095e7eb2fe51;hp=f01bfe229198c5a4320dda7f552032f28cbb9c72;hpb=fccf30d4bf6b00b317756a9ff9d2135b361d2599;p=ipfw.git diff --git a/dummynet2/include/netinet/ip_dummynet.h b/dummynet2/include/netinet/ip_dummynet.h index f01bfe2..6795d7c 100644 --- a/dummynet2/include/netinet/ip_dummynet.h +++ b/dummynet2/include/netinet/ip_dummynet.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * @@ -24,351 +24,240 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $ + * $FreeBSD: user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h 203321 2010-01-31 21:39:25Z luigi $ */ #ifndef _IP_DUMMYNET_H #define _IP_DUMMYNET_H /* - * Definition of dummynet data structures. In the structures, I decided - * not to use the macros in in the hope of making the code - * easier to port to other architectures. The type of lists and queue we - * use here is pretty simple anyways. - */ - -/* - * We start with a heap, which is used in the scheduler to decide when - * to transmit packets etc. + * Definition of the kernel-userland API for dummynet. * - * The key for the heap is used for two different values: + * Setsockopt() and getsockopt() pass a batch of objects, each + * of them starting with a "struct dn_id" which should fully identify + * the object and its relation with others in the sequence. + * The first object in each request should have + * type= DN_CMD_*, id = DN_API_VERSION. + * For other objects, type and subtype specify the object, len indicates + * the total length including the header, and 'id' identifies the specific + * object. * - * 1. timer ticks- max 10K/second, so 32 bits are enough; - * - * 2. virtual times. These increase in steps of len/x, where len is the - * packet length, and x is either the weight of the flow, or the - * sum of all weights. - * If we limit to max 1000 flows and a max weight of 100, then - * x needs 17 bits. The packet size is 16 bits, so we can easily - * overflow if we do not allow errors. - * So we use a key "dn_key" which is 64 bits. Some macros are used to - * compare key values and handle wraparounds. - * MAX64 returns the largest of two key values. - * MY_M is used as a shift count when doing fixed point arithmetic - * (a better name would be useful...). - */ -typedef u_int64_t dn_key ; /* sorting key */ -#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) -#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) -#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) -#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) -#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) -#define MY_M 16 /* number of left shift to obtain a larger precision */ - -/* - * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the - * virtual time wraps every 15 days. + * Most objects are numbered with an identifier in the range 1..65535. + * DN_MAX_ID indicates the first value outside the range. */ +#define DN_API_VERSION 12500000 +#define DN_MAX_ID 0x10000 -/* - * The maximum hash table size for queues. This value must be a power - * of 2. - */ -#define DN_MAX_HASH_SIZE 65536 +struct dn_id { + uint16_t len; /* total obj len including this header */ + uint8_t type; + uint8_t subtype; + uint32_t id; /* generic id */ +}; /* - * A heap entry is made of a key and a pointer to the actual - * object stored in the heap. - * The heap is an array of dn_heap_entry entries, dynamically allocated. - * Current size is "size", with "elements" actually in use. - * The heap normally supports only ordered insert and extract from the top. - * If we want to extract an object from the middle of the heap, we - * have to know where the object itself is located in the heap (or we - * need to scan the whole array). To this purpose, an object has a - * field (int) which contains the index of the object itself into the - * heap. When the object is moved, the field must also be updated. - * The offset of the index in the object is stored in the 'offset' - * field in the heap descriptor. The assumption is that this offset - * is non-zero if we want to support extract from the middle. - */ -struct dn_heap_entry { - dn_key key ; /* sorting key. Topmost element is smallest one */ - void *object ; /* object pointer */ -} ; - -struct dn_heap { - int size ; - int elements ; - int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ - struct dn_heap_entry *p ; /* really an array of "size" entries */ -} ; - -#ifdef _KERNEL -/* - * Packets processed by dummynet have an mbuf tag associated with - * them that carries their dummynet state. This is used within - * the dummynet code as well as outside when checking for special - * processing requirements. - * Note that the first part is the reinject info and is common to - * other forms of packet reinjection. + * These values are in the type field of struct dn_id. + * To preserve the ABI, never rearrange the list or delete + * entries with the exception of DN_LAST */ -struct dn_pkt_tag { - struct ipfw_rule_ref rule; /* matching rule */ - - /* second part, dummynet specific */ - int dn_dir; /* action when packet comes out. */ - /* see ip_fw_private.h */ +enum { + DN_NONE = 0, + DN_LINK = 1, + DN_FS, + DN_SCH, + DN_SCH_I, + DN_QUEUE, + DN_DELAY_LINE, + DN_PROFILE, + DN_FLOW, /* struct dn_flow */ + DN_TEXT, /* opaque text is the object */ + + DN_CMD_CONFIG = 0x80, /* objects follow */ + DN_CMD_DELETE, /* subtype + list of entries */ + DN_CMD_GET, /* subtype + list of entries */ + DN_CMD_FLUSH, + /* for compatibility with FreeBSD 7.2/8 */ + DN_COMPAT_PIPE, + DN_COMPAT_QUEUE, + DN_GET_COMPAT, + + /* special commands for emulation of sysctl variables */ + DN_SYSCTL_GET, + DN_SYSCTL_SET, + + DN_LAST, +}; + +enum { /* subtype for schedulers, flowset and the like */ + DN_SCHED_UNKNOWN = 0, + DN_SCHED_FIFO = 1, + DN_SCHED_WF2QP = 2, + /* others are in individual modules */ +}; - dn_key output_time; /* when the pkt is due for delivery */ - struct ifnet *ifp; /* interface, for ip_output */ - struct _ip6dn_args ip6opt; /* XXX ipv6 options */ +enum { /* user flags */ + DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */ + DN_NOERROR = 0x0002, /* do not report errors */ + DN_QHT_HASH = 0x0004, /* qht is a hash table */ + DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */ + DN_HAS_PROFILE = 0x0010, /* a link has a profile */ + DN_IS_RED = 0x0020, + DN_IS_GENTLE_RED= 0x0040, + DN_PIPE_CMD = 0x1000, /* pipe config... */ }; -#endif /* _KERNEL */ /* - * Overall structure of dummynet (with WF2Q+): - -In dummynet, packets are selected with the firewall rules, and passed -to two different objects: PIPE or QUEUE. - -A QUEUE is just a queue with configurable size and queue management -policy. It is also associated with a mask (to discriminate among -different flows), a weight (used to give different shares of the -bandwidth to different flows) and a "pipe", which essentially -supplies the transmit clock for all queues associated with that -pipe. - -A PIPE emulates a fixed-bandwidth link, whose bandwidth is -configurable. The "clock" for a pipe can come from either an -internal timer, or from the transmit interrupt of an interface. -A pipe is also associated with one (or more, if masks are used) -queue, where all packets for that pipe are stored. - -The bandwidth available on the pipe is shared by the queues -associated with that pipe (only one in case the packet is sent -to a PIPE) according to the WF2Q+ scheduling algorithm and the -configured weights. - -In general, incoming packets are stored in the appropriate queue, -which is then placed into one of a few heaps managed by a scheduler -to decide when the packet should be extracted. -The scheduler (a function called dummynet()) is run at every timer -tick, and grabs queues from the head of the heaps when they are -ready for processing. - -There are three data structures definining a pipe and associated queues: - - + dn_pipe, which contains the main configuration parameters related - to delay and bandwidth; - + dn_flow_set, which contains WF2Q+ configuration, flow - masks, plr and RED configuration; - + dn_flow_queue, which is the per-flow queue (containing the packets) - -Multiple dn_flow_set can be linked to the same pipe, and multiple -dn_flow_queue can be linked to the same dn_flow_set. -All data structures are linked in a linear list which is used for -housekeeping purposes. - -During configuration, we create and initialize the dn_flow_set -and dn_pipe structures (a dn_pipe also contains a dn_flow_set). - -At runtime: packets are sent to the appropriate dn_flow_set (either -WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), -which in turn dispatches them to the appropriate dn_flow_queue -(created dynamically according to the masks). - -The transmit clock for fixed rate flows (ready_event()) selects the -dn_flow_queue to be used to transmit the next packet. For WF2Q, -wfq_ready_event() extract a pipe which in turn selects the right -flow using a number of heaps defined into the pipe itself. - - * + * link template. */ +struct dn_link { + struct dn_id oid; + + /* + * Userland sets bw and delay in bits/s and milliseconds. + * The kernel converts this back and forth to bits/tick and ticks. + * XXX what about burst ? + */ + int32_t link_nr; + int bandwidth; /* bit/s or bits/tick. */ + int delay; /* ms and ticks */ + uint64_t burst; /* scaled. bits*Hz XXX */ +}; /* - * per flow queue. This contains the flow identifier, the queue - * of packets, counters, and parameters used to support both RED and - * WF2Q+. - * - * A dn_flow_queue is created and initialized whenever a packet for - * a new flow arrives. + * A flowset, which is a template for flows. Contains parameters + * from the command line: id, target scheduler, queue sizes, plr, + * flow masks, buckets for the flow hash, and possibly scheduler- + * specific parameters (weight, quantum and so on). */ -struct dn_flow_queue { - struct dn_flow_queue *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - /* - * When we emulate MAC overheads, or channel unavailability due - * to other traffic on a shared medium, we augment the packet at - * the head of the queue with an 'extra_bits' field representsing - * the additional delay the packet will be subject to: - * extra_bits = bw*unavailable_time. - * With large bandwidth and large delays, extra_bits (and also numbytes) - * can become very large, so better play safe and use 64 bit - */ - uint64_t numbytes ; /* credit for transmission (dynamic queues) */ - int64_t extra_bits; /* extra bits simulating unavailable channel */ - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - dn_key idle_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - dn_key sched_time ; /* current time when queue enters ready_heap */ - - dn_key S,F ; /* start time, finish time */ - /* - * Setting F < S means the timestamp is invalid. We only need - * to test this when the queue is empty. - */ -} ; +struct dn_fs { + struct dn_id oid; + uint32_t fs_nr; /* the flowset number */ + uint32_t flags; /* userland flags */ + int qsize; /* queue size in slots or bytes */ + int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ + uint32_t buckets; /* buckets used for the queue hash table */ + + struct ipfw_flow_id flow_mask; + uint32_t sched_nr; /* the scheduler we attach to */ + /* generic scheduler parameters. Leave them at -1 if unset. + * Now we use 0: weight, 1: lmax, 2: priority + */ + int par[4]; + + /* RED/GRED parameters. + * weight and probabilities are in the range 0..1 represented + * in fixed point arithmetic with SCALE_RED decimal bits. + */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ -/* - * flow_set descriptor. Contains the "template" parameters for the - * queue configuration, and pointers to the hash table of dn_flow_queue's. - * - * The hash table is an array of lists -- we identify the slot by - * hashing the flow-id, then scan the list looking for a match. - * The size of the hash table (buckets) is configurable on a per-queue - * basis. - * - * A dn_flow_set is created whenever a new queue or pipe is created (in the - * latter case, the structure is located inside the struct dn_pipe). - */ -struct dn_flow_set { - SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ - - u_short fs_nr ; /* flow_set number */ - u_short flags_fs; -#define DN_HAVE_FLOW_MASK 0x0001 -#define DN_IS_RED 0x0002 -#define DN_IS_GENTLE_RED 0x0004 -#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ -#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ -#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ -#define DN_IS_PIPE 0x4000 -#define DN_IS_QUEUE 0x8000 - - struct dn_pipe *pipe ; /* pointer to parent pipe */ - u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ - - int weight ; /* WFQ queue weight */ - int qsize ; /* queue size in slots or bytes */ - int plr ; /* pkt loss rate (2^31-1 means 100%) */ - - struct ipfw_flow_id flow_mask ; - - /* hash table of queues onto this flow_set */ - int rq_size ; /* number of slots */ - int rq_elements ; /* active elements */ - struct dn_flow_queue **rq; /* array of rq_size entries */ - - u_int32_t last_expired ; /* do not expire too frequently */ - int backlogged ; /* #active queues for this flowset */ - - /* RED parameters */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ }; -SLIST_HEAD(dn_flow_set_head, dn_flow_set); /* - * Pipe descriptor. Contains global parameters, delay-line queue, - * and the flow_set used for fixed-rate queues. - * - * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: - * not_eligible_heap, for queues whose start time is higher - * than the virtual time. Sorted by start time. - * scheduler_heap, for queues eligible for scheduling. Sorted by - * finish time. - * idle_heap, all flows that are idle and can be removed. We - * do that on each tick so we do not slow down too much - * operations during forwarding. - * + * dn_flow collects flow_id and stats for queues and scheduler + * instances, and is used to pass these info to userland. + * oid.type/oid.subtype describe the object, oid.id is number + * of the parent object. */ -struct dn_pipe { /* a pipe */ - SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - int bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ +struct dn_flow { + struct dn_id oid; + struct ipfw_flow_id fid; + uint64_t tot_pkts; /* statistics counters */ + uint64_t tot_bytes; + uint32_t length; /* Queue lenght, in packets */ + uint32_t len_bytes; /* Queue lenght, in bytes */ + uint32_t drops; +}; - /* WF2Q+ */ - struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap not_eligible_heap; /* top extract- key Start time */ - struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ - dn_key V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ +/* + * Scheduler template, mostly indicating the name, number, + * sched_mask and buckets. + */ +struct dn_sch { + struct dn_id oid; + uint32_t sched_nr; /* N, scheduler number */ + uint32_t buckets; /* number of buckets for the instances */ + uint32_t flags; /* have_mask, ... */ + + char name[16]; /* null terminated */ + /* mask to select the appropriate scheduler instance */ + struct ipfw_flow_id sched_mask; /* M */ +}; - /* Same as in dn_flow_queue, numbytes can become large */ - int64_t numbytes; /* bits I can transmit (more or less). */ - uint64_t burst; /* burst size, scaled: bits * hz */ - dn_key sched_time ; /* time pipe was scheduled in ready_heap */ - dn_key idle_time; /* start of pipe idle time */ +/* A delay profile is attached to a link. + * Note that a profile, as any other object, cannot be longer than 2^16 + */ +#define ED_MAX_SAMPLES_NO 1024 +struct dn_profile { + struct dn_id oid; + /* fields to simulate a delay profile */ +#define ED_MAX_NAME_LEN 32 + char name[ED_MAX_NAME_LEN]; + int link_nr; + int loss_level; + int bandwidth; // XXX use link bandwidth? + int samples_no; /* actual length of samples[] */ + int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ +}; - /* - * When the tx clock come from an interface (if_name[0] != '\0'), its name - * is stored below, whereas the ifp is filled when the rule is configured. - */ - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - struct dn_flow_set fs ; /* used with fixed-rate flows */ - /* fields to simulate a delay profile */ +/* + * Overall structure of dummynet -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int loss_level; - int samples_no; - int *samples; -}; +In dummynet, packets are selected with the firewall rules, and passed +to two different objects: PIPE or QUEUE (bad name). + +A QUEUE defines a classifier, which groups packets into flows +according to a 'mask', puts them into independent queues (one +per flow) with configurable size and queue management policy, +and passes flows to a scheduler: + + (flow_mask|sched_mask) sched_mask + +---------+ weight Wx +-------------+ + | |->-[flow]-->--| |-+ + -->--| QUEUE x | ... | | | + | |->-[flow]-->--| SCHEDuler N | | + +---------+ | | | + ... | +--[LINK N]-->-- + +---------+ weight Wy | | +--[LINK N]-->-- + | |->-[flow]-->--| | | + -->--| QUEUE y | ... | | | + | |->-[flow]-->--| | | + +---------+ +-------------+ | + +-------------+ + +Many QUEUE objects can connect to the same scheduler, each +QUEUE object can have its own set of parameters. + +In turn, the SCHEDuler 'forks' multiple instances according +to a 'sched_mask', each instance manages its own set of queues +and transmits on a private instance of a configurable LINK. + +A PIPE is a simplified version of the above, where there +is no flow_mask, and each scheduler instance handles a single queue. + +The following data structures (visible from userland) describe +the objects used by dummynet: + + + dn_link, contains the main configuration parameters related + to delay and bandwidth; + + dn_profile describes a delay profile; + + dn_flow describes the flow status (flow id, statistics) + + + dn_sch describes a scheduler + + dn_fs describes a flowset (msk, weight, queue parameters) -/* dn_pipe_max is used to pass pipe configuration from userland onto - * kernel space and back + * */ -#define ED_MAX_SAMPLES_NO 1024 -struct dn_pipe_max { - struct dn_pipe pipe; - int samples[ED_MAX_SAMPLES_NO]; -}; - -SLIST_HEAD(dn_pipe_head, dn_pipe); #endif /* _IP_DUMMYNET_H */