X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=dummynet%2Fip_dummynet.c;h=0b23881550518718b0062aaa9df1334d027ccd18;hb=31b969263c34f46f398eec33c0b0e95947842cda;hp=a613bbfd9120f13c1288541eb0faab6d3fbe52c0;hpb=3f06507f45547df04f6be56c4bcb02dce9d2d316;p=ipfw.git diff --git a/dummynet/ip_dummynet.c b/dummynet/ip_dummynet.c index a613bbf..0b23881 100644 --- a/dummynet/ip_dummynet.c +++ b/dummynet/ip_dummynet.c @@ -56,7 +56,8 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:1 * include files marked with XXX are probably not needed */ -#include +#include "missing.h" + #include #include #include @@ -64,15 +65,15 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:1 #include #include #include -#include #include #include +#include #include #include #include #include #include -#include /* IFNAMSIZ, struct ifaddr, ifq head */ +#include /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ #include #include #include /* ip_len, ip_off */ @@ -85,7 +86,6 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:1 #include /* for ip6_input, ip6_output prototypes */ #include -#include "missing.h" /* * We keep a private variable for the simulation time, but we could * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) @@ -247,8 +247,18 @@ static void dummynet(void *); static void dummynet_flush(void); static void dummynet_send(struct mbuf *); void dummynet_drain(void); -static ip_dn_io_t dummynet_io; -static void dn_rule_delete(void *); +static int dummynet_io(struct mbuf **, int , struct ip_fw_args *); + +/* + * Flow queue is idle if: + * 1) it's empty for at least 1 tick + * 2) it has invalid timestamp (WF2Q case) + * 3) parent pipe has no 'exhausted' burst. + */ +#define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \ + curr_time > (q)->idle_time + 1 && \ + ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \ + (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst)) /* * Heap management functions. @@ -456,6 +466,31 @@ heap_free(struct dn_heap *h) * --- end of heap management functions --- */ +/* + * Dispose a packet in dummynet. Use an inline functions so if we + * need to free extra state associated to a packet, this is a + * central point to do it. + */ +static __inline void *dn_free_pkt(struct mbuf *m) +{ +#ifdef __linux__ + netisr_dispatch(-1, m); /* -1 drop the packet */ +#else + m_freem(m); +#endif + return NULL; +} + +static __inline void dn_free_pkts(struct mbuf *mnext) +{ + struct mbuf *m; + + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + dn_free_pkt(m); + } +} + /* * Return the mbuf tag holding the dummynet state. As an optimization * this is assumed to be the first tag on the list. If this turns out @@ -670,7 +705,7 @@ ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) * queue on error hoping next time we are luckier. */ } else /* RED needs to know when the queue becomes empty. */ - q->q_time = curr_time; + q->idle_time = curr_time; /* * If the delay line was empty call transmit_event() now. @@ -696,13 +731,17 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) struct dn_heap *neh = &(p->not_eligible_heap); int64_t p_numbytes = p->numbytes; + /* + * p->numbytes is only 32bits in FBSD7, but we might need 64 bits. + * Use a local variable for the computations, and write back the + * results when done, saturating if needed. + * The local variable has no impact on performance and helps + * reducing diffs between the various branches. + */ + DUMMYNET_LOCK_ASSERT(); if (p->if_name[0] == 0) /* tx clock is simulated */ - /* - * Since result may not fit into p->numbytes (32bit) we - * are using 64bit var here. - */ p_numbytes += (curr_time - p->sched_time) * p->bandwidth; else { /* * tx clock is for real, @@ -776,23 +815,26 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) break; } } - if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && - p->idle_heap.elements > 0) { + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) { + p->idle_time = curr_time; /* * No traffic and no events scheduled. * We can get rid of idle-heap. */ - int i; + if (p->idle_heap.elements > 0) { + int i; - for (i = 0; i < p->idle_heap.elements; i++) { - struct dn_flow_queue *q = p->idle_heap.p[i].object; + for (i = 0; i < p->idle_heap.elements; i++) { + struct dn_flow_queue *q; - q->F = 0; - q->S = q->F + 1; + q = p->idle_heap.p[i].object; + q->F = 0; + q->S = q->F + 1; + } + p->sum = 0; + p->V = 0; + p->idle_heap.elements = 0; } - p->sum = 0; - p->V = 0; - p->idle_heap.elements = 0; } /* * If we are getting clocks from dummynet (not a real interface) and @@ -813,13 +855,8 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) */ } - /* Fit (adjust if necessary) 64bit result into 32bit variable. */ - if (p_numbytes > INT_MAX) - p->numbytes = INT_MAX; - else if (p_numbytes < INT_MIN) - p->numbytes = INT_MIN; - else - p->numbytes = p_numbytes; + /* Write back p_numbytes (adjust 64->32bit if necessary). */ + p->numbytes = p_numbytes; /* * If the delay line was empty call transmit_event() now. @@ -959,6 +996,7 @@ dummynet_send(struct mbuf *m) pkt = dn_tag_get(m); dst = pkt->dn_dir; } + switch (dst) { case DN_TO_IP_OUT: ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); @@ -1007,16 +1045,12 @@ dummynet_send(struct mbuf *m) case DN_TO_DROP: /* drop the packet after some time */ -#ifdef __linux__ - netisr_dispatch(-1, m); /* -1 drop the packet */ -#else - m_freem(m); -#endif + dn_free_pkt(m); break; default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(m); + dn_free_pkt(m); break; } } @@ -1037,7 +1071,7 @@ expire_queues(struct dn_flow_set *fs) fs->last_expired = time_uptime ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) - if (q->head != NULL || q->S != q->F+1) { + if (!QUEUE_IS_IDLE(q)) { prev = q ; q = q->next ; } else { /* entry is idle, expire it */ @@ -1078,7 +1112,7 @@ create_queue(struct dn_flow_set *fs, int i) q->hash_slot = i; q->next = fs->rq[i]; q->S = q->F + 1; /* hack - mark timestamp as invalid. */ - q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0); fs->rq[i] = q; fs->rq_elements++; return (q); @@ -1167,7 +1201,7 @@ find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) break ; /* found */ /* No match. Check if we can expire the entry */ - if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { + if (pipe_expire && QUEUE_IS_IDLE(q)) { /* entry is idle and not in any heap, expire it */ struct dn_flow_queue *old_q = q ; @@ -1240,10 +1274,10 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) * XXX check wraps... */ if (q->avg) { - u_int t = div64(curr_time - q->q_time, + u_int t = div64(curr_time - q->idle_time, fs->lookup_step); - q->avg = (t >= 0 && t < fs->lookup_depth) ? + q->avg = (t < fs->lookup_depth) ? SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; } } @@ -1421,6 +1455,8 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) * Build and enqueue packet + parameters. */ pkt->rule = fwa->rule; + pkt->rule_id = fwa->rule_id; + pkt->chain_id = fwa->chain_id; pkt->dn_dir = dir; pkt->ifp = fwa->oif; @@ -1436,9 +1472,32 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) if (q->head != m) /* Flow was not idle, we are done. */ goto done; - if (q->q_time < (uint32_t)curr_time) - q->numbytes = io_fast ? fs->pipe->bandwidth : 0; - q->q_time = curr_time; + if (is_pipe) { /* Fixed rate queues. */ + if (q->idle_time < curr_time) { + /* Calculate available burst size. */ + q->numbytes += + (curr_time - q->idle_time - 1) * pipe->bandwidth; + if (q->numbytes > pipe->burst) + q->numbytes = pipe->burst; + if (io_fast) + q->numbytes += pipe->bandwidth; + } + } else { /* WF2Q. */ + if (pipe->idle_time < curr_time && + pipe->scheduler_heap.elements == 0 && + pipe->not_eligible_heap.elements == 0) { + /* Calculate available burst size. */ + pipe->numbytes += + (curr_time - pipe->idle_time - 1) * pipe->bandwidth; + if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst) + pipe->numbytes = pipe->burst; + if (io_fast) + pipe->numbytes += pipe->bandwidth; + } + pipe->idle_time = curr_time; + } + /* Necessary for both: fixed rate & WF2Q queues. */ + q->idle_time = curr_time; /* * If we reach this point the flow was previously idle, so we need @@ -1475,7 +1534,7 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) heap_extract(&(pipe->idle_heap), q); q->S = MAX64(q->F, pipe->V); } - q->F = div64(q->S + (len << MY_M), fs->weight); + q->F = q->S + div64(len << MY_M, fs->weight); if (pipe->not_eligible_heap.elements == 0 && pipe->scheduler_heap.elements == 0) @@ -1530,27 +1589,10 @@ dropit: if (q) q->drops++; DUMMYNET_UNLOCK(); - /* - * set the tag, if present. dn_tag_get cannot fail - * so we need to check first - */ - if (m_tag_first(m)) { - pkt = dn_tag_get(m); - pkt->dn_dir = DN_TO_DROP; - } - dummynet_send(m); /* drop the packet */ - *m0 = NULL; + *m0 = dn_free_pkt(m); return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } -/* - * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) - * Doing this would probably save us the initial bzero of dn_pkt - */ -#define DN_FREE_PKT(_m) do { \ - m_freem(_m); \ -} while (0) - /* * Dispose all packets and flow_queues on a flow_set. * If all=1, also remove red lookup table and other storage, @@ -1567,13 +1609,7 @@ purge_flow_set(struct dn_flow_set *fs, int all) for (i = 0; i <= fs->rq_size; i++) { for (q = fs->rq[i]; q != NULL; q = qn) { - struct mbuf *m, *mnext; - - mnext = q->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(q->head); qn = q->next; free(q, M_DUMMYNET); } @@ -1601,15 +1637,10 @@ purge_flow_set(struct dn_flow_set *fs, int all) static void purge_pipe(struct dn_pipe *pipe) { - struct mbuf *m, *mnext; purge_flow_set( &(pipe->fs), 1 ); - mnext = pipe->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(pipe->head); heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); @@ -1652,60 +1683,6 @@ dummynet_flush(void) DUMMYNET_UNLOCK(); } -extern struct ip_fw *ip_fw_default_rule; -static void -dn_rule_delete_fs(struct dn_flow_set *fs, void *r) -{ - int i ; - struct dn_flow_queue *q ; - struct mbuf *m ; - - for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ - for (q = fs->rq[i] ; q ; q = q->next ) - for (m = q->head ; m ; m = m->m_nextpkt ) { - struct dn_pkt_tag *pkt = dn_tag_get(m) ; - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule ; - } -} - -/* - * When a firewall rule is deleted, scan all queues and remove the pointer - * to the rule from matching packets, making them point to the default rule. - * The pointer is used to reinject packets in case one_pass = 0. - */ -void -dn_rule_delete(void *r) -{ - struct dn_pipe *pipe; - struct dn_flow_set *fs; - struct dn_pkt_tag *pkt; - struct mbuf *m; - int i; - - DUMMYNET_LOCK(); - /* - * If the rule references a queue (dn_flow_set), then scan - * the flow set, otherwise scan pipes. Should do either, but doing - * both does not harm. - */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(fs, &flowsethash[i], next) - dn_rule_delete_fs(fs, r); - - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(pipe, &pipehash[i], next) { - fs = &(pipe->fs); - dn_rule_delete_fs(fs, r); - for (m = pipe->head ; m ; m = m->m_nextpkt ) { - pkt = dn_tag_get(m); - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule; - } - } - DUMMYNET_UNLOCK(); -} - /* * setup RED parameters */ @@ -1828,6 +1805,8 @@ config_pipe(struct dn_pipe *p) * qsize = slots/bytes */ p->delay = (p->delay * hz) / 1000; + /* Scale burst size: bytes -> bits * hz */ + p->burst *= 8 * hz; /* We need either a pipe number or a flow_set number. */ if (p->pipe_nr == 0 && pfs->fs_nr == 0) return (EINVAL); @@ -1859,11 +1838,14 @@ config_pipe(struct dn_pipe *p) } else /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) - for (q = pipe->fs.rq[i]; q; q = q->next) - q->numbytes = io_fast ? p->bandwidth : 0; + for (q = pipe->fs.rq[i]; q; q = q->next) { + q->numbytes = p->burst + + (io_fast ? p->bandwidth : 0); + } pipe->bandwidth = p->bandwidth; - pipe->numbytes = 0; /* just in case... */ + pipe->burst = p->burst; + pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0); bcopy(p->if_name, pipe->if_name, sizeof(p->if_name)); pipe->ifp = NULL; /* reset interface ptr */ pipe->delay = p->delay; @@ -2008,7 +1990,6 @@ dummynet_drain(void) { struct dn_flow_set *fs; struct dn_pipe *pipe; - struct mbuf *m, *mnext; int i; DUMMYNET_LOCK_ASSERT(); @@ -2024,12 +2005,7 @@ dummynet_drain(void) for (i = 0; i < HASHSIZE; i++) { SLIST_FOREACH(pipe, &pipehash[i], next) { purge_flow_set(&(pipe->fs), 0); - - mnext = pipe->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(pipe->head); pipe->head = pipe->tail = NULL; } } @@ -2204,6 +2180,7 @@ dummynet_get(struct sockopt *sopt) */ bcopy(pipe, bp, sizeof(*pipe)); pipe_bp->delay = (pipe_bp->delay * 1000) / hz; + pipe_bp->burst = div64(pipe_bp->burst, 8 * hz); /* * XXX the following is a hack based on ->next being the * first field in dn_pipe and dn_flow_set. The correct @@ -2270,7 +2247,7 @@ ip_dn_ctl(struct sockopt *sopt) switch (sopt->sopt_name) { default : printf("dummynet: -- unknown option %d", sopt->sopt_name); - error = EINVAL ; + error = EINVAL ; break ; case IP_DUMMYNET_GET : @@ -2293,8 +2270,8 @@ ip_dn_ctl(struct sockopt *sopt) break ; case IP_DUMMYNET_DEL : /* remove a pipe or queue */ - p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); + p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, p, sizeof (struct dn_pipe), sizeof *p); if (error) break ; @@ -2333,7 +2310,6 @@ ip_dn_init(void) ip_dn_ctl_ptr = ip_dn_ctl; ip_dn_io_ptr = dummynet_io; - ip_dn_ruledel_ptr = dn_rule_delete; TASK_INIT(&dn_task, 0, dummynet_task, NULL); dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, @@ -2353,7 +2329,6 @@ ip_dn_destroy(void) { ip_dn_ctl_ptr = NULL; ip_dn_io_ptr = NULL; - ip_dn_ruledel_ptr = NULL; DUMMYNET_LOCK(); callout_stop(&dn_timeout);