Major changes:
[ipfw.git] / ipfw / dummynet.c
1 /*
2  * Copyright (c) 2002-2003 Luigi Rizzo
3  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
4  * Copyright (c) 1994 Ugen J.S.Antsilevich
5  *
6  * Idea and grammar partially left from:
7  * Copyright (c) 1993 Daniel Boulet
8  *
9  * Redistribution and use in source forms, with and without modification,
10  * are permitted provided that this entire comment appears intact.
11  *
12  * Redistribution in binary form may occur without any restrictions.
13  * Obviously, it would be nice if you gave credit where credit is due
14  * but requiring it would be too onerous.
15  *
16  * This software is provided ``AS IS'' without any warranties of any kind.
17  *
18  * NEW command line interface for IP firewall facility
19  *
20  * $FreeBSD: head/sbin/ipfw/dummynet.c 187769 2009-01-27 11:06:59Z luigi $
21  *
22  * dummynet support
23  */
24
25 #include <sys/types.h>
26 #include <sys/socket.h>
27 #include <sys/queue.h>
28 /* XXX there are several sysctl leftover here */
29 #include <sys/sysctl.h>
30
31 #include "ipfw2.h"
32
33 #include <ctype.h>
34 #include <err.h>
35 #include <netdb.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <sysexits.h>
40
41 #include <net/if.h>
42 #include <netinet/in.h>
43 #include <netinet/ip_fw.h>
44 #include <netinet/ip_dummynet.h>
45 #include <arpa/inet.h>  /* inet_ntoa */
46
47 static struct _s_x dummynet_params[] = {
48         { "plr",                TOK_PLR },
49         { "noerror",            TOK_NOERROR },
50         { "buckets",            TOK_BUCKETS },
51         { "dst-ip",             TOK_DSTIP },
52         { "src-ip",             TOK_SRCIP },
53         { "dst-port",           TOK_DSTPORT },
54         { "src-port",           TOK_SRCPORT },
55         { "proto",              TOK_PROTO },
56         { "weight",             TOK_WEIGHT },
57         { "all",                TOK_ALL },
58         { "mask",               TOK_MASK },
59         { "droptail",           TOK_DROPTAIL },
60         { "red",                TOK_RED },
61         { "gred",               TOK_GRED },
62         { "bw",                 TOK_BW },
63         { "bandwidth",          TOK_BW },
64         { "delay",              TOK_DELAY },
65         { "pipe",               TOK_PIPE },
66         { "queue",              TOK_QUEUE },
67         { "flow-id",            TOK_FLOWID},
68         { "dst-ipv6",           TOK_DSTIP6},
69         { "dst-ip6",            TOK_DSTIP6},
70         { "src-ipv6",           TOK_SRCIP6},
71         { "src-ip6",            TOK_SRCIP6},
72         { "profile",            TOK_PIPE_PROFILE},
73         { "dummynet-params",    TOK_NULL },
74         { NULL, 0 }     /* terminator */
75 };
76
77 /*
78  * XXX to be updated to the new version,
79  * without the global struct command_opts variable
80  */
81 static int
82 sort_q(void * to_be_done, const void *pa, const void *pb)
83 {
84         int rev = (co.do_sort < 0);
85         int field = rev ? -co.do_sort : co.do_sort;
86         long long res = 0;
87         const struct dn_flow_queue *a = pa;
88         const struct dn_flow_queue *b = pb;
89
90         switch (field) {
91         case 1: /* pkts */
92                 res = a->len - b->len;
93                 break;
94         case 2: /* bytes */
95                 res = a->len_bytes - b->len_bytes;
96                 break;
97
98         case 3: /* tot pkts */
99                 res = a->tot_pkts - b->tot_pkts;
100                 break;
101
102         case 4: /* tot bytes */
103                 res = a->tot_bytes - b->tot_bytes;
104                 break;
105         }
106         if (res < 0)
107                 res = -1;
108         if (res > 0)
109                 res = 1;
110         return (int)(rev ? res : -res);
111 }
112
113 static void
114 list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
115 {
116         int l;
117         int index_printed, indexes = 0;
118         char buff[255];
119         struct protoent *pe;
120
121         if (fs->rq_elements == 0)
122                 return;
123
124         if (co.do_sort != 0)
125                 qsort_r(q, fs->rq_elements, sizeof *q, NULL, sort_q);
126
127         /* Print IPv4 flows */
128         index_printed = 0;
129         for (l = 0; l < fs->rq_elements; l++) {
130                 struct in_addr ina;
131
132                 /* XXX: Should check for IPv4 flows */
133                 if (IS_IP6_FLOW_ID(&(q[l].id)))
134                         continue;
135
136                 if (!index_printed) {
137                         index_printed = 1;
138                         if (indexes > 0)        /* currently a no-op */
139                                 printf("\n");
140                         indexes++;
141                         printf("    "
142                             "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
143                             fs->flow_mask.proto,
144                             fs->flow_mask.src_ip, fs->flow_mask.src_port,
145                             fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
146
147                         printf("BKT Prot ___Source IP/port____ "
148                             "____Dest. IP/port____ "
149                             "Tot_pkt/bytes Pkt/Byte Drp\n");
150                 }
151
152                 printf("%3d ", q[l].hash_slot);
153                 pe = getprotobynumber(q[l].id.proto);
154                 if (pe)
155                         printf("%-4s ", pe->p_name);
156                 else
157                         printf("%4u ", q[l].id.proto);
158                 ina.s_addr = htonl(q[l].id.src_ip);
159                 printf("%15s/%-5d ",
160                     inet_ntoa(ina), q[l].id.src_port);
161                 ina.s_addr = htonl(q[l].id.dst_ip);
162                 printf("%15s/%-5d ",
163                     inet_ntoa(ina), q[l].id.dst_port);
164                 printf("%4llu %8llu %2u %4u %3u\n",
165                     align_uint64(&q[l].tot_pkts),
166                     align_uint64(&q[l].tot_bytes),
167                     q[l].len, q[l].len_bytes, q[l].drops);
168                 if (co.verbose)
169                         printf("   S %20llu  F %20llu\n",
170                             align_uint64(&q[l].S), align_uint64(&q[l].F));
171         }
172
173         /* Print IPv6 flows */
174         index_printed = 0;
175         for (l = 0; l < fs->rq_elements; l++) {
176                 if (!IS_IP6_FLOW_ID(&(q[l].id)))
177                         continue;
178
179                 if (!index_printed) {
180                         index_printed = 1;
181                         if (indexes > 0)
182                                 printf("\n");
183                         indexes++;
184                         printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
185                             fs->flow_mask.proto, fs->flow_mask.flow_id6);
186                         inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
187                             buff, sizeof(buff));
188                         printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
189                         inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
190                             buff, sizeof(buff) );
191                         printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
192
193                         printf("BKT ___Prot___ _flow-id_ "
194                             "______________Source IPv6/port_______________ "
195                             "_______________Dest. IPv6/port_______________ "
196                             "Tot_pkt/bytes Pkt/Byte Drp\n");
197                 }
198                 printf("%3d ", q[l].hash_slot);
199                 pe = getprotobynumber(q[l].id.proto);
200                 if (pe != NULL)
201                         printf("%9s ", pe->p_name);
202                 else
203                         printf("%9u ", q[l].id.proto);
204                 printf("%7d  %39s/%-5d ", q[l].id.flow_id6,
205                     inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
206                     q[l].id.src_port);
207                 printf(" %39s/%-5d ",
208                     inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
209                     q[l].id.dst_port);
210                 printf(" %4llu %8llu %2u %4u %3u\n",
211                     align_uint64(&q[l].tot_pkts),
212                     align_uint64(&q[l].tot_bytes),
213                     q[l].len, q[l].len_bytes, q[l].drops);
214                 if (co.verbose)
215                         printf("   S %20llu  F %20llu\n",
216                             align_uint64(&q[l].S),
217                             align_uint64(&q[l].F));
218         }
219 }
220
221 static void
222 print_flowset_parms(struct dn_flow_set *fs, char *prefix)
223 {
224         int l;
225         char qs[30];
226         char plr[30];
227         char red[90];   /* Display RED parameters */
228
229         l = fs->qsize;
230         if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
231                 if (l >= 8192)
232                         sprintf(qs, "%d KB", l / 1024);
233                 else
234                         sprintf(qs, "%d B", l);
235         } else
236                 sprintf(qs, "%3d sl.", l);
237         if (fs->plr)
238                 sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
239         else
240                 plr[0] = '\0';
241         if (fs->flags_fs & DN_IS_RED)   /* RED parameters */
242                 sprintf(red,
243                     "\n\t  %cRED w_q %f min_th %d max_th %d max_p %f",
244                     (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
245                     1.0 * fs->w_q / (double)(1 << SCALE_RED),
246                     SCALE_VAL(fs->min_th),
247                     SCALE_VAL(fs->max_th),
248                     1.0 * fs->max_p / (double)(1 << SCALE_RED));
249         else
250                 sprintf(red, "droptail");
251
252         printf("%s %s%s %d queues (%d buckets) %s\n",
253             prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
254 }
255
256 static void
257 print_extra_delay_parms(struct dn_pipe *p, char *prefix)
258 {
259         double loss;
260         if (p->samples_no <= 0)
261                 return;
262
263         loss = p->loss_level;
264         loss /= p->samples_no;
265         printf("%s profile: name \"%s\" loss %f samples %d\n",
266                 prefix, p->name, loss, p->samples_no);
267 }
268
269 void
270 ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
271 {
272         int rulenum;
273         void *next = data;
274         struct dn_pipe *p = (struct dn_pipe *) data;
275         struct dn_flow_set *fs;
276         struct dn_flow_queue *q;
277         int l;
278
279         if (ac > 0)
280                 rulenum = strtoul(*av++, NULL, 10);
281         else
282                 rulenum = 0;
283         for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
284                 double b = p->bandwidth;
285                 char buf[30];
286                 char prefix[80];
287
288                 if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
289                         break;  /* done with pipes, now queues */
290
291                 /*
292                  * compute length, as pipe have variable size
293                  */
294                 l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
295                 next = (char *)p + l;
296                 nbytes -= l;
297
298                 if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2)
299                         continue;
300
301                 /*
302                  * Print rate (or clocking interface)
303                  */
304                 if (p->if_name[0] != '\0')
305                         sprintf(buf, "%s", p->if_name);
306                 else if (b == 0)
307                         sprintf(buf, "unlimited");
308                 else if (b >= 1000000)
309                         sprintf(buf, "%7.3f Mbit/s", b/1000000);
310                 else if (b >= 1000)
311                         sprintf(buf, "%7.3f Kbit/s", b/1000);
312                 else
313                         sprintf(buf, "%7.3f bit/s ", b);
314
315                 sprintf(prefix, "%05d: %s %4d ms ",
316                     p->pipe_nr, buf, p->delay);
317
318                 print_extra_delay_parms(p, prefix);
319
320                 print_flowset_parms(&(p->fs), prefix);
321
322                 q = (struct dn_flow_queue *)(p+1);
323                 list_queues(&(p->fs), q);
324         }
325         for (fs = next; nbytes >= sizeof *fs; fs = next) {
326                 char prefix[80];
327
328                 if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
329                         break;
330                 l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
331                 next = (char *)fs + l;
332                 nbytes -= l;
333
334                 if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) ||
335                     (rulenum != fs->parent_nr && co.do_pipe == 1))) {
336                         continue;
337                 }
338
339                 q = (struct dn_flow_queue *)(fs+1);
340                 sprintf(prefix, "q%05d: weight %d pipe %d ",
341                     fs->fs_nr, fs->weight, fs->parent_nr);
342                 print_flowset_parms(fs, prefix);
343                 list_queues(fs, q);
344         }
345 }
346
347 /*
348  * Delete pipe or queue i
349  */
350 int
351 ipfw_delete_pipe(int pipe_or_queue, int i)
352 {
353         struct dn_pipe p;
354
355         memset(&p, 0, sizeof p);
356         if (pipe_or_queue == 1)
357                 p.pipe_nr = i;          /* pipe */
358         else
359                 p.fs.fs_nr = i;         /* queue */
360         i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
361         if (i) {
362                 i = 1;
363                 warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
364         }
365         return i;
366 }
367
368 /*
369  * Code to parse delay profiles.
370  *
371  * Some link types introduce extra delays in the transmission
372  * of a packet, e.g. because of MAC level framing, contention on
373  * the use of the channel, MAC level retransmissions and so on.
374  * From our point of view, the channel is effectively unavailable
375  * for this extra time, which is constant or variable depending
376  * on the link type. Additionally, packets may be dropped after this
377  * time (e.g. on a wireless link after too many retransmissions).
378  * We can model the additional delay with an empirical curve
379  * that represents its distribution.
380  *
381  *      cumulative probability
382  *      1.0 ^
383  *          |
384  *      L   +-- loss-level          x
385  *          |                 ******
386  *          |                *
387  *          |           *****
388  *          |          *
389  *          |        **
390  *          |       *                         
391  *          +-------*------------------->
392  *                      delay
393  *
394  * The empirical curve may have both vertical and horizontal lines.
395  * Vertical lines represent constant delay for a range of
396  * probabilities; horizontal lines correspond to a discontinuty
397  * in the delay distribution: the pipe will use the largest delay
398  * for a given probability.
399  * 
400  * To pass the curve to dummynet, we must store the parameters
401  * in a file as described below, and issue the command
402  *
403  *      ipfw pipe <n> config ... bw XXX profile <filename> ...
404  *
405  * The file format is the following, with whitespace acting as
406  * a separator and '#' indicating the beginning a comment:
407  *
408  *      samples N
409  *              the number of samples used in the internal
410  *              representation (2..1024; default 100);
411  *
412  *      loss-level L 
413  *              The probability above which packets are lost.
414  *               (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
415  *
416  *      name identifier
417  *              Optional a name (listed by "ipfw pipe show")
418  *              to identify the distribution;
419  *
420  *      "delay prob" | "prob delay"
421  *              One of these two lines is mandatory and defines
422  *              the format of the following lines with data points.
423  *
424  *      XXX YYY
425  *              2 or more lines representing points in the curve,
426  *              with either delay or probability first, according
427  *              to the chosen format.
428  *              The unit for delay is milliseconds.
429  *
430  * Data points does not need to be ordered or equal to the number
431  * specified in the "samples" line. ipfw will sort and interpolate
432  * the curve as needed.
433  *
434  * Example of a profile file:
435  
436         name    bla_bla_bla
437         samples 100
438         loss-level    0.86
439         prob    delay
440         0       200     # minimum overhead is 200ms
441         0.5     200
442         0.5     300
443         0.8     1000
444         0.9     1300
445         1       1300
446  
447  * Internally, we will convert the curve to a fixed number of
448  * samples, and when it is time to transmit a packet we will
449  * model the extra delay as extra bits in the packet.
450  *
451  */
452
453 /* XXX move to an array definition ? */
454 #define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN
455 #define ED_TOK_SAMPLES  "samples"
456 #define ED_TOK_LOSS     "loss-level"
457 #define ED_TOK_NAME     "name"
458 #define ED_TOK_DELAY    "delay"
459 #define ED_TOK_PROB     "prob"
460 #define ED_TOK_BW       "bw"
461 #define ED_SEPARATORS   " \t\n"
462 #define ED_MIN_SAMPLES_NO       2
463
464 /*
465  * returns 1 if s is a non-negative number, with at least one '.'
466  */
467 static int
468 is_valid_number(const char *s)
469 {
470         int i, dots_found = 0;
471         int len = strlen(s);
472
473         for (i = 0; i<len; ++i)
474                 if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1))
475                         return 0;
476         return 1;
477 }
478
479 /*
480  * Take as input a string describing a bandwidth value
481  * and return the numeric bandwidth value.
482  * set clocking interface or bandwidth value
483  */
484 static void
485 read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen)
486 {
487         if (*bandwidth != -1)
488                 warn("duplicate token, override bandwidth value!");
489
490         if (arg[0] >= 'a' && arg[0] <= 'z') {
491                 if (namelen >= IFNAMSIZ)
492                         warn("interface name truncated");
493                 namelen--;
494                 /* interface name */
495                 strncpy(if_name, arg, namelen);
496                 if_name[namelen] = '\0';
497                 *bandwidth = 0;
498         } else {        /* read bandwidth value */
499                 int bw;
500                 char *end = NULL;
501
502                 bw = strtoul(arg, &end, 0);
503                 if (*end == 'K' || *end == 'k') {
504                         end++;
505                         bw *= 1000;
506                 } else if (*end == 'M') {
507                         end++;
508                         bw *= 1000000;
509                 }
510                 if ((*end == 'B' &&
511                     _substrcmp2(end, "Bi", "Bit/s") != 0) ||
512                     _substrcmp2(end, "by", "bytes") == 0)
513                         bw *= 8;
514
515                 if (bw < 0)
516                         errx(EX_DATAERR, "bandwidth too large");
517
518                 *bandwidth = bw;
519                 if_name[0] = '\0';
520         }
521 }
522
523 struct point {
524         double prob;
525         double delay;
526 };
527
528 static int
529 compare_points(const void *vp1, const void *vp2)
530 {
531         const struct point *p1 = vp1;
532         const struct point *p2 = vp2;
533         double res = 0;
534
535         res = p1->prob - p2->prob;
536         if (res == 0)
537                 res = p1->delay - p2->delay;
538         if (res < 0)
539                 return -1;
540         else if (res > 0)
541                 return 1;
542         else
543                 return 0;
544 }
545
546 #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
547
548 static void
549 load_extra_delays(const char *filename, struct dn_pipe *p)
550 {
551         char    line[ED_MAX_LINE_LEN];
552         FILE    *f;
553         int     lineno = 0;
554         int     i;
555
556         int     samples = -1;
557         double  loss = -1.0;
558         char    profile_name[ED_MAX_NAME_LEN];
559         int     delay_first = -1;
560         int     do_points = 0;
561         struct point    points[ED_MAX_SAMPLES_NO];
562         int     points_no = 0;
563
564         profile_name[0] = '\0';
565         f = fopen(filename, "r");
566         if (f == NULL)
567                 err(EX_UNAVAILABLE, "fopen: %s", filename);
568
569         while (fgets(line, ED_MAX_LINE_LEN, f)) {         /* read commands */
570                 char *s, *cur = line, *name = NULL, *arg = NULL;
571
572                 ++lineno;
573
574                 /* parse the line */
575                 while (cur) {
576                         s = strsep(&cur, ED_SEPARATORS);
577                         if (s == NULL || *s == '#')
578                                 break;
579                         if (*s == '\0')
580                                 continue;
581                         if (arg)
582                                 errx(ED_EFMT("too many arguments"));
583                         if (name == NULL)
584                                 name = s;
585                         else
586                                 arg = s;
587                 }
588                 if (name == NULL)       /* empty line */
589                         continue;
590                 if (arg == NULL)
591                         errx(ED_EFMT("missing arg for %s"), name);
592
593                 if (!strcasecmp(name, ED_TOK_SAMPLES)) {
594                     if (samples > 0)
595                         errx(ED_EFMT("duplicate ``samples'' line"));
596                     if (atoi(arg) <=0)
597                         errx(ED_EFMT("invalid number of samples"));
598                     samples = atoi(arg);
599                     if (samples>ED_MAX_SAMPLES_NO)
600                             errx(ED_EFMT("too many samples, maximum is %d"),
601                                 ED_MAX_SAMPLES_NO);
602                     do_points = 0;
603                 } else if (!strcasecmp(name, ED_TOK_BW)) {
604                     read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name));
605                 } else if (!strcasecmp(name, ED_TOK_LOSS)) {
606                     if (loss != -1.0)
607                         errx(ED_EFMT("duplicated token: %s"), name);
608                     if (!is_valid_number(arg))
609                         errx(ED_EFMT("invalid %s"), arg);
610                     loss = atof(arg);
611                     if (loss > 1)
612                         errx(ED_EFMT("%s greater than 1.0"), name);
613                     do_points = 0;
614                 } else if (!strcasecmp(name, ED_TOK_NAME)) {
615                     if (profile_name[0] != '\0')
616                         errx(ED_EFMT("duplicated token: %s"), name);
617                     strncpy(profile_name, arg, sizeof(profile_name) - 1);
618                     profile_name[sizeof(profile_name)-1] = '\0';
619                     do_points = 0;
620                 } else if (!strcasecmp(name, ED_TOK_DELAY)) {
621                     if (do_points)
622                         errx(ED_EFMT("duplicated token: %s"), name);
623                     delay_first = 1;
624                     do_points = 1;
625                 } else if (!strcasecmp(name, ED_TOK_PROB)) {
626                     if (do_points)
627                         errx(ED_EFMT("duplicated token: %s"), name);
628                     delay_first = 0;
629                     do_points = 1;
630                 } else if (do_points) {
631                     if (!is_valid_number(name) || !is_valid_number(arg))
632                         errx(ED_EFMT("invalid point found"));
633                     if (delay_first) {
634                         points[points_no].delay = atof(name);
635                         points[points_no].prob = atof(arg);
636                     } else {
637                         points[points_no].delay = atof(arg);
638                         points[points_no].prob = atof(name);
639                     }
640                     if (points[points_no].prob > 1.0)
641                         errx(ED_EFMT("probability greater than 1.0"));
642                     ++points_no;
643                 } else {
644                     errx(ED_EFMT("unrecognised command '%s'"), name);
645                 }
646         }
647
648         if (samples == -1) {
649             warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES);
650             samples = 100;
651         }
652
653         if (loss == -1.0) {
654             warnx("'%s' not found, assuming no loss", ED_TOK_LOSS);
655             loss = 1;
656         }
657
658         /* make sure that there are enough points. */
659         if (points_no < ED_MIN_SAMPLES_NO)
660             errx(ED_EFMT("too few samples, need at least %d"),
661                 ED_MIN_SAMPLES_NO);
662
663         qsort(points, points_no, sizeof(struct point), compare_points);
664
665         /* interpolation */
666         for (i = 0; i<points_no-1; ++i) {
667             double y1 = points[i].prob * samples;
668             double x1 = points[i].delay;
669             double y2 = points[i+1].prob * samples;
670             double x2 = points[i+1].delay;
671
672             int index = y1;
673             int stop = y2;
674
675             if (x1 == x2) {
676                 for (; index<stop; ++index)
677                     p->samples[index] = x1;
678             } else {
679                 double m = (y2-y1)/(x2-x1);
680                 double c = y1 - m*x1;
681                 for (; index<stop ; ++index)
682                     p->samples[index] = (index - c)/m;
683             }
684         }
685         p->samples_no = samples;
686         p->loss_level = loss * samples;
687         strncpy(p->name, profile_name, sizeof(p->name));
688 }
689
690 void
691 ipfw_config_pipe(int ac, char **av)
692 {
693         int samples[ED_MAX_SAMPLES_NO];
694         struct dn_pipe p;
695         int i;
696         char *end;
697         void *par = NULL;
698
699         memset(&p, 0, sizeof p);
700         p.bandwidth = -1;
701
702         av++; ac--;
703         /* Pipe number */
704         if (ac && isdigit(**av)) {
705                 i = atoi(*av); av++; ac--;
706                 if (co.do_pipe == 1)
707                         p.pipe_nr = i;
708                 else
709                         p.fs.fs_nr = i;
710         }
711         while (ac > 0) {
712                 double d;
713                 int tok = match_token(dummynet_params, *av);
714                 ac--; av++;
715
716                 switch(tok) {
717                 case TOK_NOERROR:
718                         p.fs.flags_fs |= DN_NOERROR;
719                         break;
720
721                 case TOK_PLR:
722                         NEED1("plr needs argument 0..1\n");
723                         d = strtod(av[0], NULL);
724                         if (d > 1)
725                                 d = 1;
726                         else if (d < 0)
727                                 d = 0;
728                         p.fs.plr = (int)(d*0x7fffffff);
729                         ac--; av++;
730                         break;
731
732                 case TOK_QUEUE:
733                         NEED1("queue needs queue size\n");
734                         end = NULL;
735                         p.fs.qsize = strtoul(av[0], &end, 0);
736                         if (*end == 'K' || *end == 'k') {
737                                 p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
738                                 p.fs.qsize *= 1024;
739                         } else if (*end == 'B' ||
740                             _substrcmp2(end, "by", "bytes") == 0) {
741                                 p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
742                         }
743                         ac--; av++;
744                         break;
745
746                 case TOK_BUCKETS:
747                         NEED1("buckets needs argument\n");
748                         p.fs.rq_size = strtoul(av[0], NULL, 0);
749                         ac--; av++;
750                         break;
751
752                 case TOK_MASK:
753                         NEED1("mask needs mask specifier\n");
754                         /*
755                          * per-flow queue, mask is dst_ip, dst_port,
756                          * src_ip, src_port, proto measured in bits
757                          */
758                         par = NULL;
759
760                         bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
761                         end = NULL;
762
763                         while (ac >= 1) {
764                             uint32_t *p32 = NULL;
765                             uint16_t *p16 = NULL;
766                             uint32_t *p20 = NULL;
767                             struct in6_addr *pa6 = NULL;
768                             uint32_t a;
769
770                             tok = match_token(dummynet_params, *av);
771                             ac--; av++;
772                             switch(tok) {
773                             case TOK_ALL:
774                                     /*
775                                      * special case, all bits significant
776                                      */
777                                     p.fs.flow_mask.dst_ip = ~0;
778                                     p.fs.flow_mask.src_ip = ~0;
779                                     p.fs.flow_mask.dst_port = ~0;
780                                     p.fs.flow_mask.src_port = ~0;
781                                     p.fs.flow_mask.proto = ~0;
782                                     n2mask(&(p.fs.flow_mask.dst_ip6), 128);
783                                     n2mask(&(p.fs.flow_mask.src_ip6), 128);
784                                     p.fs.flow_mask.flow_id6 = ~0;
785                                     p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
786                                     goto end_mask;
787
788                             case TOK_DSTIP:
789                                     p32 = &p.fs.flow_mask.dst_ip;
790                                     break;
791
792                             case TOK_SRCIP:
793                                     p32 = &p.fs.flow_mask.src_ip;
794                                     break;
795
796                             case TOK_DSTIP6:
797                                     pa6 = &(p.fs.flow_mask.dst_ip6);
798                                     break;
799                             
800                             case TOK_SRCIP6:
801                                     pa6 = &(p.fs.flow_mask.src_ip6);
802                                     break;
803
804                             case TOK_FLOWID:
805                                     p20 = &p.fs.flow_mask.flow_id6;
806                                     break;
807
808                             case TOK_DSTPORT:
809                                     p16 = &p.fs.flow_mask.dst_port;
810                                     break;
811
812                             case TOK_SRCPORT:
813                                     p16 = &p.fs.flow_mask.src_port;
814                                     break;
815
816                             case TOK_PROTO:
817                                     break;
818
819                             default:
820                                     ac++; av--; /* backtrack */
821                                     goto end_mask;
822                             }
823                             if (ac < 1)
824                                     errx(EX_USAGE, "mask: value missing");
825                             if (*av[0] == '/') {
826                                     a = strtoul(av[0]+1, &end, 0);
827                                     if (pa6 == NULL)
828                                             a = (a == 32) ? ~0 : (1 << a) - 1;
829                             } else
830                                     a = strtoul(av[0], &end, 0);
831                             if (p32 != NULL)
832                                     *p32 = a;
833                             else if (p16 != NULL) {
834                                     if (a > 0xFFFF)
835                                             errx(EX_DATAERR,
836                                                 "port mask must be 16 bit");
837                                     *p16 = (uint16_t)a;
838                             } else if (p20 != NULL) {
839                                     if (a > 0xfffff)
840                                         errx(EX_DATAERR,
841                                             "flow_id mask must be 20 bit");
842                                     *p20 = (uint32_t)a;
843                             } else if (pa6 != NULL) {
844                                     if (a > 128)
845                                         errx(EX_DATAERR,
846                                             "in6addr invalid mask len");
847                                     else
848                                         n2mask(pa6, a);
849                             } else {
850                                     if (a > 0xFF)
851                                             errx(EX_DATAERR,
852                                                 "proto mask must be 8 bit");
853                                     p.fs.flow_mask.proto = (uint8_t)a;
854                             }
855                             if (a != 0)
856                                     p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
857                             ac--; av++;
858                         } /* end while, config masks */
859 end_mask:
860                         break;
861
862                 case TOK_RED:
863                 case TOK_GRED:
864                         NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
865                         p.fs.flags_fs |= DN_IS_RED;
866                         if (tok == TOK_GRED)
867                                 p.fs.flags_fs |= DN_IS_GENTLE_RED;
868                         /*
869                          * the format for parameters is w_q/min_th/max_th/max_p
870                          */
871                         if ((end = strsep(&av[0], "/"))) {
872                             double w_q = strtod(end, NULL);
873                             if (w_q > 1 || w_q <= 0)
874                                 errx(EX_DATAERR, "0 < w_q <= 1");
875                             p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
876                         }
877                         if ((end = strsep(&av[0], "/"))) {
878                             p.fs.min_th = strtoul(end, &end, 0);
879                             if (*end == 'K' || *end == 'k')
880                                 p.fs.min_th *= 1024;
881                         }
882                         if ((end = strsep(&av[0], "/"))) {
883                             p.fs.max_th = strtoul(end, &end, 0);
884                             if (*end == 'K' || *end == 'k')
885                                 p.fs.max_th *= 1024;
886                         }
887                         if ((end = strsep(&av[0], "/"))) {
888                             double max_p = strtod(end, NULL);
889                             if (max_p > 1 || max_p <= 0)
890                                 errx(EX_DATAERR, "0 < max_p <= 1");
891                             p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
892                         }
893                         ac--; av++;
894                         break;
895
896                 case TOK_DROPTAIL:
897                         p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
898                         break;
899
900                 case TOK_BW:
901                         NEED1("bw needs bandwidth or interface\n");
902                         if (co.do_pipe != 1)
903                             errx(EX_DATAERR, "bandwidth only valid for pipes");
904                         read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name));
905                         ac--; av++;
906                         break;
907
908                 case TOK_DELAY:
909                         if (co.do_pipe != 1)
910                                 errx(EX_DATAERR, "delay only valid for pipes");
911                         NEED1("delay needs argument 0..10000ms\n");
912                         p.delay = strtoul(av[0], NULL, 0);
913                         ac--; av++;
914                         break;
915
916                 case TOK_WEIGHT:
917                         if (co.do_pipe == 1)
918                                 errx(EX_DATAERR,"weight only valid for queues");
919                         NEED1("weight needs argument 0..100\n");
920                         p.fs.weight = strtoul(av[0], &end, 0);
921                         ac--; av++;
922                         break;
923
924                 case TOK_PIPE:
925                         if (co.do_pipe == 1)
926                                 errx(EX_DATAERR,"pipe only valid for queues");
927                         NEED1("pipe needs pipe_number\n");
928                         p.fs.parent_nr = strtoul(av[0], &end, 0);
929                         ac--; av++;
930                         break;
931
932                 case TOK_PIPE_PROFILE:
933                         if (co.do_pipe != 1)
934                             errx(EX_DATAERR, "extra delay only valid for pipes");
935                         NEED1("extra delay needs the file name\n");
936                         p.samples = &samples[0];
937                         load_extra_delays(av[0], &p);
938                         --ac; ++av;
939                         break;
940
941                 default:
942                         errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
943                 }
944         }
945         if (co.do_pipe == 1) {
946                 if (p.pipe_nr == 0)
947                         errx(EX_DATAERR, "pipe_nr must be > 0");
948                 if (p.delay > 10000)
949                         errx(EX_DATAERR, "delay must be < 10000");
950         } else { /* co.do_pipe == 2, queue */
951                 if (p.fs.parent_nr == 0)
952                         errx(EX_DATAERR, "pipe must be > 0");
953                 if (p.fs.weight >100)
954                         errx(EX_DATAERR, "weight must be <= 100");
955         }
956
957         /* check for bandwidth value */
958         if (p.bandwidth == -1) {
959                 p.bandwidth = 0;
960                 if (p.samples_no > 0)
961                         errx(EX_DATAERR, "profile requires a bandwidth limit");
962         }
963
964         if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) {
965                 size_t len;
966                 long limit;
967
968                 len = sizeof(limit);
969                 if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit",
970                         &limit, &len, NULL, 0) == -1)
971                         limit = 1024*1024;
972                 if (p.fs.qsize > limit)
973                         errx(EX_DATAERR, "queue size must be < %ldB", limit);
974         } else {
975                 size_t len;
976                 long limit;
977
978                 len = sizeof(limit);
979                 if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit",
980                         &limit, &len, NULL, 0) == -1)
981                         limit = 100;
982                 if (p.fs.qsize > limit)
983                         errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
984         }
985         if (p.fs.flags_fs & DN_IS_RED) {
986                 size_t len;
987                 int lookup_depth, avg_pkt_size;
988                 double s, idle, weight, w_q;
989                 struct clockinfo ck;
990                 int t;
991
992                 if (p.fs.min_th >= p.fs.max_th)
993                     errx(EX_DATAERR, "min_th %d must be < than max_th %d",
994                         p.fs.min_th, p.fs.max_th);
995                 if (p.fs.max_th == 0)
996                     errx(EX_DATAERR, "max_th must be > 0");
997
998                 len = sizeof(int);
999                 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
1000                         &lookup_depth, &len, NULL, 0) == -1)
1001                     errx(1, "sysctlbyname(\"%s\")",
1002                         "net.inet.ip.dummynet.red_lookup_depth");
1003                 if (lookup_depth == 0)
1004                     errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
1005                         " must be greater than zero");
1006
1007                 len = sizeof(int);
1008                 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
1009                         &avg_pkt_size, &len, NULL, 0) == -1)
1010
1011                     errx(1, "sysctlbyname(\"%s\")",
1012                         "net.inet.ip.dummynet.red_avg_pkt_size");
1013                 if (avg_pkt_size == 0)
1014                         errx(EX_DATAERR,
1015                             "net.inet.ip.dummynet.red_avg_pkt_size must"
1016                             " be greater than zero");
1017
1018                 len = sizeof(struct clockinfo);
1019                 if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1)
1020                         errx(1, "sysctlbyname(\"%s\")", "kern.clockrate");
1021
1022                 /*
1023                  * Ticks needed for sending a medium-sized packet.
1024                  * Unfortunately, when we are configuring a WF2Q+ queue, we
1025                  * do not have bandwidth information, because that is stored
1026                  * in the parent pipe, and also we have multiple queues
1027                  * competing for it. So we set s=0, which is not very
1028                  * correct. But on the other hand, why do we want RED with
1029                  * WF2Q+ ?
1030                  */
1031                 if (p.bandwidth==0) /* this is a WF2Q+ queue */
1032                         s = 0;
1033                 else
1034                         s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
1035
1036                 /*
1037                  * max idle time (in ticks) before avg queue size becomes 0.
1038                  * NOTA:  (3/w_q) is approx the value x so that
1039                  * (1-w_q)^x < 10^-3.
1040                  */
1041                 w_q = ((double)p.fs.w_q) / (1 << SCALE_RED);
1042                 idle = s * 3. / w_q;
1043                 p.fs.lookup_step = (int)idle / lookup_depth;
1044                 if (!p.fs.lookup_step)
1045                         p.fs.lookup_step = 1;
1046                 weight = 1 - w_q;
1047                 for (t = p.fs.lookup_step; t > 1; --t)
1048                         weight *= 1 - w_q;
1049                 p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
1050         }
1051         if (p.samples_no <= 0) {
1052         i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
1053         } else {
1054                 struct dn_pipe_max pm;
1055                 int len = sizeof(pm);
1056
1057                 memcpy(&pm.pipe, &p, sizeof(pm.pipe));
1058                 memcpy(&pm.samples, samples, sizeof(pm.samples));
1059
1060                 i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
1061         }
1062
1063         if (i)
1064                 err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
1065 }