1 /* See the DRL-LICENSE file for this file's software license. */
4 * Implements the Sample and hold accounting mechanism as described in "New
5 * Directions in Traffic Measurement and Accounting" by Cristian Estan and
6 * George Varghese (SIGCOMM 2002).
9 * Randomly sample packets with some probability. For every sampled packet,
10 * if it isn't already in our flow table, add it. Once a flow is in the table,
11 * EVERY packet belonging to the flow subsequently updates the table
17 * Let p be the probability of samping a BYTE.
18 * Let s be the size of a packet.
19 * Then we approximate the probability of samping the entire packet as p * s.
22 #ifndef __SAMPLEHOLD__
23 #define __SAMPLEHOLD__
28 #define FLOW_DELETED 1
31 #define RANDOM_GRANULARITY (1000)
33 /* For simple S&H testing. */
34 #define SAMPLEHOLD_PERCENTAGE (5)
35 #define SAMPLEHOLD_OVERFACTOR (1.25)
37 //#define SAMPLEHOLD_PERCENTAGE (5)
38 //#define SAMPLEHOLD_OVERFACTOR (10)
39 #define SAMPLEHOLD_BONUS_FACTOR (1.05)
41 /** In-table representation of a flow that has been sampled. */
42 typedef struct sampled_flow {
44 /* Flow-specific values. */
46 /** The rate of the flow in the current estimate interval. */
49 /** The total number of bytes this flow has sent during this cleaning
54 /** The total number of bytes this flow had sent during this cleaning interval
55 * as of the last rate estimate update. */
58 /** The time at which this flow was last updated. */
59 struct timeval last_update;
61 /* Identification information. */
63 /** The flow's source IP address. */
66 /** The flow's destination IP address. */
69 /** The flow's source port. */
72 /** The flow's destination port. */
75 /** The flow's protocol. This corresponds to the protocol field of the IP
79 /** Bookkeeping to keep track of the state of the flow in the table. */
85 * The structure in which flows are stored.
87 struct sampled_flow_table {
89 /** Pointer to the common flow information for the identity that owns this
90 * sampled flow table. This is updated with aggregate information. */
91 common_accounting_t *common;
93 /* Table properties. */
95 /** The maximum capacity of the table. */
98 /** The current size of the table. */
101 /** Hash function pointer. */
102 uint32_t (*hash_function)(const key_flow *key);
104 /** Pointer to the array that backs the flow table. */
105 sampled_flow *backing;
107 /** Pointer to the flow in backing with the largest rate. */
108 sampled_flow *largest;
110 /** The probability of sampling a byte. */
113 /** Threshold for keeping things around during clean (bytes). */
118 /** The type sampled_flow_table is really a pointer to a struct
119 * sampled_flow_table. */
120 typedef struct sampled_flow_table *sampled_flow_table;
122 /**** Table API ****/
125 * Creates a new table with the specified maximum byte count, percentage
126 * of byte count to classify as an interesting flow, and an oversampling factor.
128 * Returns the new table or NULL on failure.
130 sampled_flow_table sampled_table_create(uint32_t (*hash_function)(const key_flow *key), uint32_t max_bytes, uint32_t flow_percentage, uint32_t oversampling_factor, common_accounting_t *common);
133 * Destroys the specified table.
135 void sampled_table_destroy(sampled_flow_table table);
138 * Finds the data associated with the specified key.
140 * Returns a pointer to the sampled_flow or NULL if the key is not in the table.
142 sampled_flow *sampled_table_lookup(sampled_flow_table table, const key_flow *key);
145 * This is the function that should be called on every packet. It will first
146 * check to see if the flow is in the table. If so, it updates the flow's
147 * table information. If not, it will probabilistically sample and add the
150 * Returns 1 if, after the call, the flow is in the table. 0 If it is not
153 int sampled_table_sample(sampled_flow_table table, const key_flow *key);
156 * Returns the number of elements in the table.
158 uint32_t sampled_table_size(const sampled_flow_table table);
160 /** Cleans the table by removing "small" flows and relocating the "big" ones
161 * to their best hash locations. */
162 int sampled_table_cleanup(sampled_flow_table table);
165 * Updates the rate information for all flows in the table according to the
166 * specified current time and EWMA weight.
168 void sampled_table_update_flows(sampled_flow_table table, struct timeval now, double ewma_weight);
170 /** Returns the largest flow in the table or NULL if there isn't one. */
171 sampled_flow *sampled_table_largest(sampled_flow_table table);
173 #endif /* __SAMPLEHOLD__ */