* include files marked with XXX are probably not needed
*/
-#include <sys/limits.h>
+#include "missing.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/module.h>
-#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/time.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head */
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
#include <net/netisr.h>
#include <netinet/in.h>
#include <netinet/ip.h> /* ip_len, ip_off */
#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
#include <netinet6/ip6_var.h>
-#include "missing.h"
/*
* We keep a private variable for the simulation time, but we could
* probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
static void dummynet_flush(void);
static void dummynet_send(struct mbuf *);
void dummynet_drain(void);
-static ip_dn_io_t dummynet_io;
-static void dn_rule_delete(void *);
+static int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+
+/*
+ * Flow queue is idle if:
+ * 1) it's empty for at least 1 tick
+ * 2) it has invalid timestamp (WF2Q case)
+ * 3) parent pipe has no 'exhausted' burst.
+ */
+#define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \
+ curr_time > (q)->idle_time + 1 && \
+ ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \
+ (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst))
/*
* Heap management functions.
* --- end of heap management functions ---
*/
+/*
+ * Dispose a packet in dummynet. Use an inline functions so if we
+ * need to free extra state associated to a packet, this is a
+ * central point to do it.
+ */
+static __inline void *dn_free_pkt(struct mbuf *m)
+{
+#ifdef __linux__
+ netisr_dispatch(-1, m); /* -1 drop the packet */
+#else
+ m_freem(m);
+#endif
+ return NULL;
+}
+
+static __inline void dn_free_pkts(struct mbuf *mnext)
+{
+ struct mbuf *m;
+
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ dn_free_pkt(m);
+ }
+}
+
/*
* Return the mbuf tag holding the dummynet state. As an optimization
* this is assumed to be the first tag on the list. If this turns out
* queue on error hoping next time we are luckier.
*/
} else /* RED needs to know when the queue becomes empty. */
- q->q_time = curr_time;
+ q->idle_time = curr_time;
/*
* If the delay line was empty call transmit_event() now.
struct dn_heap *neh = &(p->not_eligible_heap);
int64_t p_numbytes = p->numbytes;
+ /*
+ * p->numbytes is only 32bits in FBSD7, but we might need 64 bits.
+ * Use a local variable for the computations, and write back the
+ * results when done, saturating if needed.
+ * The local variable has no impact on performance and helps
+ * reducing diffs between the various branches.
+ */
+
DUMMYNET_LOCK_ASSERT();
if (p->if_name[0] == 0) /* tx clock is simulated */
- /*
- * Since result may not fit into p->numbytes (32bit) we
- * are using 64bit var here.
- */
p_numbytes += (curr_time - p->sched_time) * p->bandwidth;
else { /*
* tx clock is for real,
break;
}
}
- if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 &&
- p->idle_heap.elements > 0) {
+ if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) {
+ p->idle_time = curr_time;
/*
* No traffic and no events scheduled.
* We can get rid of idle-heap.
*/
- int i;
+ if (p->idle_heap.elements > 0) {
+ int i;
- for (i = 0; i < p->idle_heap.elements; i++) {
- struct dn_flow_queue *q = p->idle_heap.p[i].object;
+ for (i = 0; i < p->idle_heap.elements; i++) {
+ struct dn_flow_queue *q;
- q->F = 0;
- q->S = q->F + 1;
+ q = p->idle_heap.p[i].object;
+ q->F = 0;
+ q->S = q->F + 1;
+ }
+ p->sum = 0;
+ p->V = 0;
+ p->idle_heap.elements = 0;
}
- p->sum = 0;
- p->V = 0;
- p->idle_heap.elements = 0;
}
/*
* If we are getting clocks from dummynet (not a real interface) and
*/
}
- /* Fit (adjust if necessary) 64bit result into 32bit variable. */
- if (p_numbytes > INT_MAX)
- p->numbytes = INT_MAX;
- else if (p_numbytes < INT_MIN)
- p->numbytes = INT_MIN;
- else
- p->numbytes = p_numbytes;
+ /* Write back p_numbytes (adjust 64->32bit if necessary). */
+ p->numbytes = p_numbytes;
/*
* If the delay line was empty call transmit_event() now.
pkt = dn_tag_get(m);
dst = pkt->dn_dir;
}
+
switch (dst) {
case DN_TO_IP_OUT:
ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
case DN_TO_DROP:
/* drop the packet after some time */
-#ifdef __linux__
- netisr_dispatch(-1, m); /* -1 drop the packet */
-#else
- m_freem(m);
-#endif
+ dn_free_pkt(m);
break;
default:
printf("dummynet: bad switch %d!\n", pkt->dn_dir);
- m_freem(m);
+ dn_free_pkt(m);
break;
}
}
fs->last_expired = time_uptime ;
for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
- if (q->head != NULL || q->S != q->F+1) {
+ if (!QUEUE_IS_IDLE(q)) {
prev = q ;
q = q->next ;
} else { /* entry is idle, expire it */
q->hash_slot = i;
q->next = fs->rq[i];
q->S = q->F + 1; /* hack - mark timestamp as invalid. */
- q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
+ q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0);
fs->rq[i] = q;
fs->rq_elements++;
return (q);
break ; /* found */
/* No match. Check if we can expire the entry */
- if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
+ if (pipe_expire && QUEUE_IS_IDLE(q)) {
/* entry is idle and not in any heap, expire it */
struct dn_flow_queue *old_q = q ;
* XXX check wraps...
*/
if (q->avg) {
- u_int t = div64(curr_time - q->q_time,
+ u_int t = div64(curr_time - q->idle_time,
fs->lookup_step);
- q->avg = (t >= 0 && t < fs->lookup_depth) ?
+ q->avg = (t < fs->lookup_depth) ?
SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
}
}
* Build and enqueue packet + parameters.
*/
pkt->rule = fwa->rule;
+ pkt->rule_id = fwa->rule_id;
+ pkt->chain_id = fwa->chain_id;
pkt->dn_dir = dir;
pkt->ifp = fwa->oif;
if (q->head != m) /* Flow was not idle, we are done. */
goto done;
- if (q->q_time < (uint32_t)curr_time)
- q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
- q->q_time = curr_time;
+ if (is_pipe) { /* Fixed rate queues. */
+ if (q->idle_time < curr_time) {
+ /* Calculate available burst size. */
+ q->numbytes +=
+ (curr_time - q->idle_time - 1) * pipe->bandwidth;
+ if (q->numbytes > pipe->burst)
+ q->numbytes = pipe->burst;
+ if (io_fast)
+ q->numbytes += pipe->bandwidth;
+ }
+ } else { /* WF2Q. */
+ if (pipe->idle_time < curr_time &&
+ pipe->scheduler_heap.elements == 0 &&
+ pipe->not_eligible_heap.elements == 0) {
+ /* Calculate available burst size. */
+ pipe->numbytes +=
+ (curr_time - pipe->idle_time - 1) * pipe->bandwidth;
+ if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst)
+ pipe->numbytes = pipe->burst;
+ if (io_fast)
+ pipe->numbytes += pipe->bandwidth;
+ }
+ pipe->idle_time = curr_time;
+ }
+ /* Necessary for both: fixed rate & WF2Q queues. */
+ q->idle_time = curr_time;
/*
* If we reach this point the flow was previously idle, so we need
heap_extract(&(pipe->idle_heap), q);
q->S = MAX64(q->F, pipe->V);
}
- q->F = div64(q->S + (len << MY_M), fs->weight);
+ q->F = q->S + div64(len << MY_M, fs->weight);
if (pipe->not_eligible_heap.elements == 0 &&
pipe->scheduler_heap.elements == 0)
if (q)
q->drops++;
DUMMYNET_UNLOCK();
- /*
- * set the tag, if present. dn_tag_get cannot fail
- * so we need to check first
- */
- if (m_tag_first(m)) {
- pkt = dn_tag_get(m);
- pkt->dn_dir = DN_TO_DROP;
- }
- dummynet_send(m); /* drop the packet */
- *m0 = NULL;
+ *m0 = dn_free_pkt(m);
return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
}
-/*
- * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
- * Doing this would probably save us the initial bzero of dn_pkt
- */
-#define DN_FREE_PKT(_m) do { \
- m_freem(_m); \
-} while (0)
-
/*
* Dispose all packets and flow_queues on a flow_set.
* If all=1, also remove red lookup table and other storage,
for (i = 0; i <= fs->rq_size; i++) {
for (q = fs->rq[i]; q != NULL; q = qn) {
- struct mbuf *m, *mnext;
-
- mnext = q->head;
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- DN_FREE_PKT(m);
- }
+ dn_free_pkts(q->head);
qn = q->next;
free(q, M_DUMMYNET);
}
static void
purge_pipe(struct dn_pipe *pipe)
{
- struct mbuf *m, *mnext;
purge_flow_set( &(pipe->fs), 1 );
- mnext = pipe->head;
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- DN_FREE_PKT(m);
- }
+ dn_free_pkts(pipe->head);
heap_free( &(pipe->scheduler_heap) );
heap_free( &(pipe->not_eligible_heap) );
DUMMYNET_UNLOCK();
}
-extern struct ip_fw *ip_fw_default_rule;
-static void
-dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
-{
- int i ;
- struct dn_flow_queue *q ;
- struct mbuf *m ;
-
- for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */
- for (q = fs->rq[i] ; q ; q = q->next )
- for (m = q->head ; m ; m = m->m_nextpkt ) {
- struct dn_pkt_tag *pkt = dn_tag_get(m) ;
- if (pkt->rule == r)
- pkt->rule = ip_fw_default_rule ;
- }
-}
-
-/*
- * When a firewall rule is deleted, scan all queues and remove the pointer
- * to the rule from matching packets, making them point to the default rule.
- * The pointer is used to reinject packets in case one_pass = 0.
- */
-void
-dn_rule_delete(void *r)
-{
- struct dn_pipe *pipe;
- struct dn_flow_set *fs;
- struct dn_pkt_tag *pkt;
- struct mbuf *m;
- int i;
-
- DUMMYNET_LOCK();
- /*
- * If the rule references a queue (dn_flow_set), then scan
- * the flow set, otherwise scan pipes. Should do either, but doing
- * both does not harm.
- */
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH(fs, &flowsethash[i], next)
- dn_rule_delete_fs(fs, r);
-
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH(pipe, &pipehash[i], next) {
- fs = &(pipe->fs);
- dn_rule_delete_fs(fs, r);
- for (m = pipe->head ; m ; m = m->m_nextpkt ) {
- pkt = dn_tag_get(m);
- if (pkt->rule == r)
- pkt->rule = ip_fw_default_rule;
- }
- }
- DUMMYNET_UNLOCK();
-}
-
/*
* setup RED parameters
*/
* qsize = slots/bytes
*/
p->delay = (p->delay * hz) / 1000;
+ /* Scale burst size: bytes -> bits * hz */
+ p->burst *= 8 * hz;
/* We need either a pipe number or a flow_set number. */
if (p->pipe_nr == 0 && pfs->fs_nr == 0)
return (EINVAL);
} else
/* Flush accumulated credit for all queues. */
for (i = 0; i <= pipe->fs.rq_size; i++)
- for (q = pipe->fs.rq[i]; q; q = q->next)
- q->numbytes = io_fast ? p->bandwidth : 0;
+ for (q = pipe->fs.rq[i]; q; q = q->next) {
+ q->numbytes = p->burst +
+ (io_fast ? p->bandwidth : 0);
+ }
pipe->bandwidth = p->bandwidth;
- pipe->numbytes = 0; /* just in case... */
+ pipe->burst = p->burst;
+ pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0);
bcopy(p->if_name, pipe->if_name, sizeof(p->if_name));
pipe->ifp = NULL; /* reset interface ptr */
pipe->delay = p->delay;
{
struct dn_flow_set *fs;
struct dn_pipe *pipe;
- struct mbuf *m, *mnext;
int i;
DUMMYNET_LOCK_ASSERT();
for (i = 0; i < HASHSIZE; i++) {
SLIST_FOREACH(pipe, &pipehash[i], next) {
purge_flow_set(&(pipe->fs), 0);
-
- mnext = pipe->head;
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- DN_FREE_PKT(m);
- }
+ dn_free_pkts(pipe->head);
pipe->head = pipe->tail = NULL;
}
}
*/
bcopy(pipe, bp, sizeof(*pipe));
pipe_bp->delay = (pipe_bp->delay * 1000) / hz;
+ pipe_bp->burst = div64(pipe_bp->burst, 8 * hz);
/*
* XXX the following is a hack based on ->next being the
* first field in dn_pipe and dn_flow_set. The correct
switch (sopt->sopt_name) {
default :
printf("dummynet: -- unknown option %d", sopt->sopt_name);
- error = EINVAL ;
+ error = EINVAL ;
break ;
case IP_DUMMYNET_GET :
break ;
case IP_DUMMYNET_DEL : /* remove a pipe or queue */
- p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
+ p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, p, sizeof (struct dn_pipe), sizeof *p);
if (error)
break ;
ip_dn_ctl_ptr = ip_dn_ctl;
ip_dn_io_ptr = dummynet_io;
- ip_dn_ruledel_ptr = dn_rule_delete;
TASK_INIT(&dn_task, 0, dummynet_task, NULL);
dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
{
ip_dn_ctl_ptr = NULL;
ip_dn_io_ptr = NULL;
- ip_dn_ruledel_ptr = NULL;
DUMMYNET_LOCK();
callout_stop(&dn_timeout);