2 * Copyright (c) 2012 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "ofproto-dpif-governor.h"
24 #include "poll-loop.h"
31 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_governor);
33 /* Minimum number of observed packets before setting up a flow.
35 * This value seems OK empirically. */
36 #define FLOW_SETUP_THRESHOLD 5
37 BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD > 1);
38 BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD < 16);
40 /* Minimum and maximum size of a governor, in bytes. */
41 enum { MIN_SIZE = 16 * 1024 };
42 enum { MAX_SIZE = 256 * 1024 };
43 BUILD_ASSERT_DECL(IS_POW2(MIN_SIZE));
44 BUILD_ASSERT_DECL(IS_POW2(MAX_SIZE));
46 /* Minimum and maximum time to process the number of packets that make up a
47 * given generation. If a generation completes faster than the minimum time,
48 * we double the table size (but no more than MAX_SIZE). If a generation take
49 * more than the maximum time to complete, we halve the table size (but no
50 * smaller than MIN_SIZE). */
51 enum { MIN_ELAPSED = 1000 }; /* In milliseconds. */
52 enum { MAX_ELAPSED = 5000 }; /* In milliseconds. */
54 static void governor_new_generation(struct governor *, unsigned int size);
56 /* Creates and returns a new governor named 'name' (which is used only for log
59 governor_create(const char *name)
61 struct governor *g = xzalloc(sizeof *g);
62 g->name = xstrdup(name);
63 governor_new_generation(g, MIN_SIZE);
69 governor_destroy(struct governor *g)
72 VLOG_INFO("%s: disengaging", g->name);
79 /* Performs periodic maintenance work on 'g'. */
81 governor_run(struct governor *g)
83 if (time_msec() - g->start > MAX_ELAPSED) {
84 if (g->size > MIN_SIZE) {
85 governor_new_generation(g, g->size / 2);
87 /* Don't start a new generation (we'd never go idle). */
92 /* Arranges for the poll loop to wake up when 'g' needs to do some work. */
94 governor_wait(struct governor *g)
96 if (g->size > MIN_SIZE) {
97 poll_timer_wait_until(g->start + MAX_ELAPSED);
101 /* Returns true if 'g' has been doing only a minimal amount of work and thus
102 * the client should consider getting rid of it entirely. */
104 governor_is_idle(struct governor *g)
106 return g->size == MIN_SIZE && time_msec() - g->start > MAX_ELAPSED;
109 /* Tests whether a flow whose hash is 'hash' and for which 'n' packets have
110 * just arrived should be set up in the datapath or just processed on a
111 * packet-by-packet basis. Returns true to set up a datapath flow, false to
112 * process the packets individually.
114 * One would expect 'n' to ordinarily be 1, if batching leads multiple packets
115 * to be processed at a time then it could be greater. */
117 governor_should_install_flow(struct governor *g, uint32_t hash, int n)
119 int old_count, new_count;
125 /* Count these packets and begin a new generation if necessary. */
127 if (g->n_packets >= g->size / 4) {
128 unsigned int new_size;
131 elapsed = time_msec() - g->start;
132 new_size = (elapsed < MIN_ELAPSED && g->size < MAX_SIZE ? g->size * 2
133 : elapsed > MAX_ELAPSED && g->size > MIN_SIZE ? g->size / 2
135 governor_new_generation(g, new_size);
138 /* If we've set up most of the flows we've seen, then we're wasting time
139 * handling most packets one at a time, so in this case instead set up most
140 * flows directly and use the remaining flows as a sample set to adjust our
143 * The definition of "most" is conservative, but the sample size is tuned
144 * based on a few experiments with TCP_CRR mode in netperf. */
145 if (g->n_setups >= g->n_flows - g->n_flows / 16
152 /* Do hash table processing.
154 * Even-numbered hash values use high-order nibbles.
155 * Odd-numbered hash values use low-order nibbles. */
156 e = &g->table[(hash >> 1) & (g->size - 1)];
157 old_count = (hash & 1 ? *e >> 4 : *e & 0x0f);
161 new_count = n + old_count;
162 if (new_count >= FLOW_SETUP_THRESHOLD) {
167 install_flow = false;
169 *e = hash & 1 ? (new_count << 4) | (*e & 0x0f) : (*e & 0xf0) | new_count;
174 /* Starts a new generation in 'g' with a table size of 'size' bytes. 'size'
175 * must be a power of two between MIN_SIZE and MAX_SIZE, inclusive. */
177 governor_new_generation(struct governor *g, unsigned int size)
179 ovs_assert(size >= MIN_SIZE && size <= MAX_SIZE);
180 ovs_assert(is_pow2(size));
182 /* Allocate new table, if necessary. */
183 if (g->size != size) {
185 VLOG_INFO("%s: engaging governor with %u kB hash table",
186 g->name, size / 1024);
188 VLOG_INFO("%s: processed %u packets in %.2f s, "
189 "%s hash table to %u kB "
190 "(%u hashes, %u setups, %u shortcuts)",
191 g->name, g->n_packets,
192 (time_msec() - g->start) / 1000.0,
193 size > g->size ? "enlarging" : "shrinking",
195 g->n_flows, g->n_setups, g->n_shortcuts);
199 g->table = xmalloc(size * sizeof *g->table);
202 VLOG_DBG("%s: processed %u packets in %.2f s with %u kB hash table "
203 "(%u hashes, %u setups, %u shortcuts)",
204 g->name, g->n_packets, (time_msec() - g->start) / 1000.0,
205 size / 1024, g->n_flows, g->n_setups, g->n_shortcuts);
208 /* Clear data for next generation. */
209 memset(g->table, 0, size * sizeof *g->table);
210 g->start = time_msec();