X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fpinsched.c;h=831afefb2836417850cad30919d585fa137ab417;hb=HEAD;hp=0afd22ff645958861172c51be308349deb6f6f07;hpb=d65349ea28bb67a0062a9b4b60ff97538206373b;p=sliver-openvswitch.git diff --git a/ofproto/pinsched.c b/ofproto/pinsched.c index 0afd22ff6..831afefb2 100644 --- a/ofproto/pinsched.c +++ b/ofproto/pinsched.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,37 +16,38 @@ #include #include "pinsched.h" +#include +#include #include +#include #include +#include "flow.h" +#include "hash.h" +#include "hmap.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" -#include "port-array.h" -#include "queue.h" #include "random.h" #include "rconn.h" -#include "status.h" +#include "sat-math.h" #include "timeval.h" +#include "token-bucket.h" #include "vconn.h" +struct pinqueue { + struct hmap_node node; /* In struct pinsched's 'queues' hmap. */ + ofp_port_t port_no; /* Port number. */ + struct list packets; /* Contains "struct ofpbuf"s. */ + int n; /* Number of packets in 'packets'. */ +}; + struct pinsched { - /* Client-supplied parameters. */ - int rate_limit; /* Packets added to bucket per second. */ - int burst_limit; /* Maximum token bucket size, in packets. */ + struct token_bucket token_bucket; /* One queue per physical port. */ - struct port_array queues; /* Array of "struct ovs_queue *". */ + struct hmap queues; /* Contains "struct pinqueue"s. */ int n_queued; /* Sum over queues[*].n. */ - unsigned int last_tx_port; /* Last port checked in round-robin. */ - - /* Token bucket. - * - * It costs 1000 tokens to send a single packet_in message. A single token - * per message would be more straightforward, but this choice lets us avoid - * round-off error in refill_bucket()'s calculation of how many tokens to - * add to the bucket, since no division step is needed. */ - long long int last_fill; /* Time at which we last added tokens. */ - int tokens; /* Current number of tokens. */ + struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */ /* Transmission queue. */ int n_txq; /* No. of packets waiting in rconn for tx. */ @@ -55,41 +56,84 @@ struct pinsched { unsigned long long n_normal; /* # txed w/o rate limit queuing. */ unsigned long long n_limited; /* # queued for rate limiting. */ unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ - - /* Switch status. */ - struct status_category *ss_cat; }; +static void +advance_txq(struct pinsched *ps) +{ + struct hmap_node *next; + + next = (ps->next_txq + ? hmap_next(&ps->queues, &ps->next_txq->node) + : hmap_first(&ps->queues)); + ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL; +} + static struct ofpbuf * -dequeue_packet(struct pinsched *ps, struct ovs_queue *q, - unsigned int port_no) +dequeue_packet(struct pinsched *ps, struct pinqueue *q) { - struct ofpbuf *packet = queue_pop_head(q); - if (!q->n) { - free(q); - port_array_set(&ps->queues, port_no, NULL); - } + struct ofpbuf *packet = ofpbuf_from_list(list_pop_front(&q->packets)); + q->n--; ps->n_queued--; return packet; } +static void +adjust_limits(int *rate_limit, int *burst_limit) +{ + if (*rate_limit <= 0) { + *rate_limit = 1000; + } + if (*burst_limit <= 0) { + *burst_limit = *rate_limit / 4; + } + if (*burst_limit < 1) { + *burst_limit = 1; + } +} + +/* Destroys 'q' and removes it from 'ps''s set of queues. + * (The caller must ensure that 'q' is empty.) */ +static void +pinqueue_destroy(struct pinsched *ps, struct pinqueue *q) +{ + hmap_remove(&ps->queues, &q->node); + free(q); +} + +static struct pinqueue * +pinqueue_get(struct pinsched *ps, ofp_port_t port_no) +{ + uint32_t hash = hash_ofp_port(port_no); + struct pinqueue *q; + + HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) { + if (port_no == q->port_no) { + return q; + } + } + + q = xmalloc(sizeof *q); + hmap_insert(&ps->queues, &q->node, hash); + q->port_no = port_no; + list_init(&q->packets); + q->n = 0; + return q; +} + /* Drop a packet from the longest queue in 'ps'. */ static void drop_packet(struct pinsched *ps) { - struct ovs_queue *longest; /* Queue currently selected as longest. */ - int n_longest; /* # of queues of same length as 'longest'. */ - unsigned int longest_port_no; - unsigned int port_no; - struct ovs_queue *q; + struct pinqueue *longest; /* Queue currently selected as longest. */ + int n_longest = 0; /* # of queues of same length as 'longest'. */ + struct pinqueue *q; ps->n_queue_dropped++; - longest = port_array_first(&ps->queues, &port_no); - longest_port_no = port_no; - n_longest = 1; - while ((q = port_array_next(&ps->queues, &port_no)) != NULL) { - if (longest->n < q->n) { + longest = NULL; + HMAP_FOR_EACH (q, node, &ps->queues) { + if (!longest || longest->n < q->n) { longest = q; n_longest = 1; } else if (longest->n == q->n) { @@ -99,36 +143,36 @@ drop_packet(struct pinsched *ps) * distribution (Knuth algorithm 3.4.2R). */ if (!random_range(n_longest)) { longest = q; - longest_port_no = port_no; } } } /* FIXME: do we want to pop the tail instead? */ - ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no)); + ofpbuf_delete(dequeue_packet(ps, longest)); + if (longest->n == 0) { + pinqueue_destroy(ps, longest); + } } /* Remove and return the next packet to transmit (in round-robin order). */ static struct ofpbuf * get_tx_packet(struct pinsched *ps) { - struct ovs_queue *q = port_array_next(&ps->queues, &ps->last_tx_port); - if (!q) { - q = port_array_first(&ps->queues, &ps->last_tx_port); + struct ofpbuf *packet; + struct pinqueue *q; + + if (!ps->next_txq) { + advance_txq(ps); } - return dequeue_packet(ps, q, ps->last_tx_port); -} -/* Add tokens to the bucket based on elapsed time. */ -static void -refill_bucket(struct pinsched *ps) -{ - long long int now = time_msec(); - long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; - if (tokens >= 1000) { - ps->last_fill = now; - ps->tokens = MIN(tokens, ps->burst_limit * 1000); + q = ps->next_txq; + packet = dequeue_packet(ps, q); + advance_txq(ps); + if (q->n == 0) { + pinqueue_destroy(ps, q); } + + return packet; } /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns @@ -137,71 +181,54 @@ refill_bucket(struct pinsched *ps) static bool get_token(struct pinsched *ps) { - if (ps->tokens >= 1000) { - ps->tokens -= 1000; - return true; - } else { - return false; - } + return token_bucket_withdraw(&ps->token_bucket, 1000); } void -pinsched_send(struct pinsched *ps, uint16_t port_no, - struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) +pinsched_send(struct pinsched *ps, ofp_port_t port_no, + struct ofpbuf *packet, struct list *txq) { + list_init(txq); if (!ps) { - cb(packet, aux); + list_push_back(txq, &packet->list_node); } else if (!ps->n_queued && get_token(ps)) { /* In the common case where we are not constrained by the rate limit, * let the packet take the normal path. */ ps->n_normal++; - cb(packet, aux); + list_push_back(txq, &packet->list_node); } else { /* Otherwise queue it up for the periodic callback to drain out. */ - struct ovs_queue *q; + struct pinqueue *q; - /* We are called with a buffer obtained from dpif_recv() that has much - * more allocated space than actual content most of the time. Since - * we're going to store the packet for some time, free up that + /* We might be called with a buffer obtained from dpif_recv() that has + * much more allocated space than actual content most of the time. + * Since we're going to store the packet for some time, free up that * otherwise wasted space. */ ofpbuf_trim(packet); - if (ps->n_queued >= ps->burst_limit) { + if (ps->n_queued * 1000 >= ps->token_bucket.burst) { drop_packet(ps); } - q = port_array_get(&ps->queues, port_no); - if (!q) { - q = xmalloc(sizeof *q); - queue_init(q); - port_array_set(&ps->queues, port_no, q); - } - queue_push_tail(q, packet); + q = pinqueue_get(ps, port_no); + list_push_back(&q->packets, &packet->list_node); + q->n++; ps->n_queued++; ps->n_limited++; } } -static void -pinsched_status_cb(struct status_reply *sr, void *ps_) -{ - struct pinsched *ps = ps_; - - status_reply_put(sr, "normal=%llu", ps->n_normal); - status_reply_put(sr, "limited=%llu", ps->n_limited); - status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); -} - void -pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) +pinsched_run(struct pinsched *ps, struct list *txq) { + list_init(txq); if (ps) { int i; /* Drain some packets out of the bucket if possible, but limit the * number of iterations to allow other code to get work done too. */ - refill_bucket(ps); for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { - cb(get_tx_packet(ps), aux); + struct ofpbuf *packet = get_tx_packet(ps); + list_push_back(txq, &packet->list_node); } } } @@ -210,39 +237,29 @@ void pinsched_wait(struct pinsched *ps) { if (ps && ps->n_queued) { - if (ps->tokens >= 1000) { - /* We can transmit more packets as soon as we're called again. */ - poll_immediate_wake(); - } else { - /* We have to wait for the bucket to re-fill. We could calculate - * the exact amount of time here for increased smoothness. */ - poll_timer_wait(TIME_UPDATE_INTERVAL / 2); - } + token_bucket_wait(&ps->token_bucket, 1000); } } /* Creates and returns a scheduler for sending packet-in messages. */ struct pinsched * -pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) +pinsched_create(int rate_limit, int burst_limit) { struct pinsched *ps; - ps = xcalloc(1, sizeof *ps); - port_array_init(&ps->queues); + ps = xzalloc(sizeof *ps); + + adjust_limits(&rate_limit, &burst_limit); + token_bucket_init(&ps->token_bucket, + rate_limit, sat_mul(burst_limit, 1000)); + + hmap_init(&ps->queues); ps->n_queued = 0; - ps->last_tx_port = PORT_ARRAY_SIZE; - ps->last_fill = time_msec(); - ps->tokens = rate_limit * 100; + ps->next_txq = NULL; ps->n_txq = 0; ps->n_normal = 0; ps->n_limited = 0; ps->n_queue_dropped = 0; - pinsched_set_limits(ps, rate_limit, burst_limit); - - if (ss) { - ps->ss_cat = switch_status_register(ss, "rate-limit", - pinsched_status_cb, ps); - } return ps; } @@ -251,34 +268,41 @@ void pinsched_destroy(struct pinsched *ps) { if (ps) { - struct ovs_queue *queue; - unsigned int port_no; + struct pinqueue *q, *next; - PORT_ARRAY_FOR_EACH (queue, &ps->queues, port_no) { - queue_destroy(queue); - free(queue); + HMAP_FOR_EACH_SAFE (q, next, node, &ps->queues) { + hmap_remove(&ps->queues, &q->node); + ofpbuf_list_delete(&q->packets); + free(q); } - port_array_destroy(&ps->queues); - switch_status_unregister(ps->ss_cat); + hmap_destroy(&ps->queues); free(ps); } } void -pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) +pinsched_get_limits(const struct pinsched *ps, + int *rate_limit, int *burst_limit) { - if (rate_limit <= 0) { - rate_limit = 1000; - } - if (burst_limit <= 0) { - burst_limit = rate_limit / 4; - } - burst_limit = MAX(burst_limit, 1); - burst_limit = MIN(burst_limit, INT_MAX / 1000); + *rate_limit = ps->token_bucket.rate; + *burst_limit = ps->token_bucket.burst / 1000; +} - ps->rate_limit = rate_limit; - ps->burst_limit = burst_limit; +void +pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) +{ + adjust_limits(&rate_limit, &burst_limit); + token_bucket_set(&ps->token_bucket, + rate_limit, sat_mul(burst_limit, 1000)); while (ps->n_queued > burst_limit) { drop_packet(ps); } } + +/* Returns the number of packets scheduled to be sent eventually by 'ps'. + * Returns 0 if 'ps' is null. */ +unsigned int +pinsched_count_txqlen(const struct pinsched *ps) +{ + return ps ? ps->n_txq : 0; +}