ovs-atomic: Add atomic_destroy() and use everywhere it is needed.
[sliver-openvswitch.git] / ofproto / ofproto-dpif-ipfix.c
1 /*
2  * Copyright (c) 2012, 2013 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18 #include "ofproto-dpif-ipfix.h"
19 #include <sys/time.h>
20 #include "byte-order.h"
21 #include "collectors.h"
22 #include "flow.h"
23 #include "hash.h"
24 #include "hmap.h"
25 #include "list.h"
26 #include "ofpbuf.h"
27 #include "ofproto.h"
28 #include "packets.h"
29 #include "poll-loop.h"
30 #include "sset.h"
31 #include "util.h"
32 #include "timeval.h"
33 #include "util.h"
34 #include "vlog.h"
35
36 VLOG_DEFINE_THIS_MODULE(ipfix);
37
38 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
39 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
40
41 /* Cf. IETF RFC 5101 Section 10.3.4. */
42 #define IPFIX_DEFAULT_COLLECTOR_PORT 4739
43
44 struct dpif_ipfix_exporter {
45     struct collectors *collectors;
46     uint32_t seq_number;
47     time_t last_template_set_time;
48     struct hmap cache_flow_key_map;  /* ipfix_flow_cache_entry. */
49     struct list cache_flow_start_timestamp_list;  /* ipfix_flow_cache_entry. */
50     uint32_t cache_active_timeout;  /* In seconds. */
51     uint32_t cache_max_flows;
52 };
53
54 struct dpif_ipfix_bridge_exporter {
55     struct dpif_ipfix_exporter exporter;
56     struct ofproto_ipfix_bridge_exporter_options *options;
57     uint32_t probability;
58 };
59
60 struct dpif_ipfix_flow_exporter {
61     struct dpif_ipfix_exporter exporter;
62     struct ofproto_ipfix_flow_exporter_options *options;
63 };
64
65 struct dpif_ipfix_flow_exporter_map_node {
66     struct hmap_node node;
67     struct dpif_ipfix_flow_exporter exporter;
68 };
69
70 struct dpif_ipfix {
71     struct dpif_ipfix_bridge_exporter bridge_exporter;
72     struct hmap flow_exporter_map;  /* dpif_ipfix_flow_exporter_map_node. */
73     atomic_int ref_cnt;
74 };
75
76 #define IPFIX_VERSION 0x000a
77
78 /* When using UDP, IPFIX Template Records must be re-sent regularly.
79  * The standard default interval is 10 minutes (600 seconds).
80  * Cf. IETF RFC 5101 Section 10.3.6. */
81 #define IPFIX_TEMPLATE_INTERVAL 600
82
83 /* Cf. IETF RFC 5101 Section 3.1. */
84 OVS_PACKED(
85 struct ipfix_header {
86     ovs_be16 version;  /* IPFIX_VERSION. */
87     ovs_be16 length;  /* Length in bytes including this header. */
88     ovs_be32 export_time;  /* Seconds since the epoch. */
89     ovs_be32 seq_number;  /* Message sequence number. */
90     ovs_be32 obs_domain_id;  /* Observation Domain ID. */
91 });
92 BUILD_ASSERT_DECL(sizeof(struct ipfix_header) == 16);
93
94 #define IPFIX_SET_ID_TEMPLATE 2
95 #define IPFIX_SET_ID_OPTION_TEMPLATE 3
96
97 /* Cf. IETF RFC 5101 Section 3.3.2. */
98 OVS_PACKED(
99 struct ipfix_set_header {
100     ovs_be16 set_id;  /* IPFIX_SET_ID_* or valid template ID for Data Sets. */
101     ovs_be16 length;  /* Length of the set in bytes including header. */
102 });
103 BUILD_ASSERT_DECL(sizeof(struct ipfix_set_header) == 4);
104
105 /* Alternatives for templates at each layer.  A template is defined by
106  * a combination of one value for each layer. */
107 enum ipfix_proto_l2 {
108     IPFIX_PROTO_L2_ETH = 0,  /* No VLAN. */
109     IPFIX_PROTO_L2_VLAN,
110     NUM_IPFIX_PROTO_L2
111 };
112 enum ipfix_proto_l3 {
113     IPFIX_PROTO_L3_UNKNOWN = 0,
114     IPFIX_PROTO_L3_IPV4,
115     IPFIX_PROTO_L3_IPV6,
116     NUM_IPFIX_PROTO_L3
117 };
118 enum ipfix_proto_l4 {
119     IPFIX_PROTO_L4_UNKNOWN = 0,
120     IPFIX_PROTO_L4_TCP_UDP,
121     NUM_IPFIX_PROTO_L4
122 };
123
124 /* Any Template ID > 255 is usable for Template Records. */
125 #define IPFIX_TEMPLATE_ID_MIN 256
126
127 /* Cf. IETF RFC 5101 Section 3.4.1. */
128 OVS_PACKED(
129 struct ipfix_template_record_header {
130     ovs_be16 template_id;
131     ovs_be16 field_count;
132 });
133 BUILD_ASSERT_DECL(sizeof(struct ipfix_template_record_header) == 4);
134
135 enum ipfix_entity_id {
136 #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME)  IPFIX_ENTITY_ID_##ENUM = ID,
137 #include "ofproto/ipfix-entities.def"
138 };
139
140 enum ipfix_entity_size {
141 #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME)  IPFIX_ENTITY_SIZE_##ENUM = SIZE,
142 #include "ofproto/ipfix-entities.def"
143 };
144
145 OVS_PACKED(
146 struct ipfix_template_field_specifier {
147     ovs_be16 element_id;  /* IPFIX_ENTITY_ID_*. */
148     ovs_be16 field_length;  /* Length of the field's value, in bytes. */
149     /* No Enterprise ID, since only standard element IDs are specified. */
150 });
151 BUILD_ASSERT_DECL(sizeof(struct ipfix_template_field_specifier) == 4);
152
153 /* Part of data record flow key for common metadata and Ethernet entities. */
154 OVS_PACKED(
155 struct ipfix_data_record_flow_key_common {
156     ovs_be32 observation_point_id;  /* OBSERVATION_POINT_ID */
157     uint8_t source_mac_address[6];  /* SOURCE_MAC_ADDRESS */
158     uint8_t destination_mac_address[6];  /* DESTINATION_MAC_ADDRESS */
159     ovs_be16 ethernet_type;  /* ETHERNET_TYPE */
160     uint8_t ethernet_header_length;  /* ETHERNET_HEADER_LENGTH */
161 });
162 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_common) == 19);
163
164 /* Part of data record flow key for VLAN entities. */
165 OVS_PACKED(
166 struct ipfix_data_record_flow_key_vlan {
167     ovs_be16 vlan_id;  /* VLAN_ID */
168     ovs_be16 dot1q_vlan_id;  /* DOT1Q_VLAN_ID */
169     uint8_t dot1q_priority;  /* DOT1Q_PRIORITY */
170 });
171 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_vlan) == 5);
172
173 /* Part of data record flow key for IP entities. */
174 /* XXX: Replace IP_TTL with MINIMUM_TTL and MAXIMUM_TTL? */
175 OVS_PACKED(
176 struct ipfix_data_record_flow_key_ip {
177     uint8_t ip_version;  /* IP_VERSION */
178     uint8_t ip_ttl;  /* IP_TTL */
179     uint8_t protocol_identifier;  /* PROTOCOL_IDENTIFIER */
180     uint8_t ip_diff_serv_code_point;  /* IP_DIFF_SERV_CODE_POINT */
181     uint8_t ip_precedence;  /* IP_PRECEDENCE */
182     uint8_t ip_class_of_service;  /* IP_CLASS_OF_SERVICE */
183 });
184 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ip) == 6);
185
186 /* Part of data record flow key for IPv4 entities. */
187 OVS_PACKED(
188 struct ipfix_data_record_flow_key_ipv4 {
189     ovs_be32 source_ipv4_address;  /* SOURCE_IPV4_ADDRESS */
190     ovs_be32 destination_ipv4_address;  /* DESTINATION_IPV4_ADDRESS */
191 });
192 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv4) == 8);
193
194 /* Part of data record flow key for IPv6 entities. */
195 OVS_PACKED(
196 struct ipfix_data_record_flow_key_ipv6 {
197     uint8_t source_ipv6_address[16];  /* SOURCE_IPV6_ADDRESS */
198     uint8_t destination_ipv6_address[16];  /* DESTINATION_IPV6_ADDRESS */
199     ovs_be32 flow_label_ipv6;  /* FLOW_LABEL_IPV6 */
200 });
201 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv6) == 36);
202
203 /* Part of data record flow key for TCP/UDP entities. */
204 OVS_PACKED(
205 struct ipfix_data_record_flow_key_tcpudp {
206     ovs_be16 source_transport_port;  /* SOURCE_TRANSPORT_PORT */
207     ovs_be16 destination_transport_port;  /* DESTINATION_TRANSPORT_PORT */
208 });
209 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_tcpudp) == 4);
210
211 /* Cf. IETF RFC 5102 Section 5.11.3. */
212 enum ipfix_flow_end_reason {
213     IDLE_TIMEOUT = 0x01,
214     ACTIVE_TIMEOUT = 0x02,
215     END_OF_FLOW_DETECTED = 0x03,
216     FORCED_END = 0x04,
217     LACK_OF_RESOURCES = 0x05
218 };
219
220 /* Part of data record for common aggregated elements. */
221 OVS_PACKED(
222 struct ipfix_data_record_aggregated_common {
223     ovs_be32 flow_start_delta_microseconds; /* FLOW_START_DELTA_MICROSECONDS */
224     ovs_be32 flow_end_delta_microseconds; /* FLOW_END_DELTA_MICROSECONDS */
225     ovs_be64 packet_delta_count;  /* PACKET_DELTA_COUNT */
226     ovs_be64 layer2_octet_delta_count;  /* LAYER2_OCTET_DELTA_COUNT */
227     uint8_t flow_end_reason;  /* FLOW_END_REASON */
228 });
229 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_common) == 25);
230
231 /* Part of data record for IP aggregated elements. */
232 OVS_PACKED(
233 struct ipfix_data_record_aggregated_ip {
234     ovs_be64 octet_delta_sum_of_squares;  /* OCTET_DELTA_SUM_OF_SQUARES */
235     ovs_be64 minimum_ip_total_length;  /* MINIMUM_IP_TOTAL_LENGTH */
236     ovs_be64 maximum_ip_total_length;  /* MAXIMUM_IP_TOTAL_LENGTH */
237 });
238 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_ip) == 24);
239
240 #define MAX_FLOW_KEY_LEN                                 \
241     (sizeof(struct ipfix_data_record_flow_key_common)    \
242      + sizeof(struct ipfix_data_record_flow_key_vlan)    \
243      + sizeof(struct ipfix_data_record_flow_key_ip)      \
244      + sizeof(struct ipfix_data_record_flow_key_ipv6)    \
245      + sizeof(struct ipfix_data_record_flow_key_tcpudp))
246
247 #define MAX_DATA_RECORD_LEN                                 \
248     (MAX_FLOW_KEY_LEN                                       \
249      + sizeof(struct ipfix_data_record_aggregated_common)   \
250      + sizeof(struct ipfix_data_record_aggregated_ip))
251
252 /* Max length of a data set.  To simplify the implementation, each
253  * data record is sent in a separate data set, so each data set
254  * contains at most one data record. */
255 #define MAX_DATA_SET_LEN             \
256     (sizeof(struct ipfix_set_header) \
257      + MAX_DATA_RECORD_LEN)
258
259 /* Max length of an IPFIX message. Arbitrarily set to accomodate low
260  * MTU. */
261 #define MAX_MESSAGE_LEN 1024
262
263 /* Cache structures. */
264
265 /* Flow key. */
266 struct ipfix_flow_key {
267     uint32_t obs_domain_id;
268     uint16_t template_id;
269     size_t flow_key_msg_part_size;
270     uint64_t flow_key_msg_part[DIV_ROUND_UP(MAX_FLOW_KEY_LEN, 8)];
271 };
272
273 /* Flow cache entry. */
274 struct ipfix_flow_cache_entry {
275     struct hmap_node flow_key_map_node;
276     struct list cache_flow_start_timestamp_list_node;
277     struct ipfix_flow_key flow_key;
278     /* Common aggregated elements. */
279     uint64_t flow_start_timestamp_usec;
280     uint64_t flow_end_timestamp_usec;
281     uint64_t packet_delta_count;
282     uint64_t layer2_octet_delta_count;
283     uint64_t octet_delta_sum_of_squares;  /* 0 if not IP. */
284     uint16_t minimum_ip_total_length;  /* 0 if not IP. */
285     uint16_t maximum_ip_total_length;  /* 0 if not IP. */
286 };
287
288 static void dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *, bool,
289                                     const uint64_t, const uint32_t);
290
291 static void get_export_time_now(uint64_t *, uint32_t *);
292
293 static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *, bool);
294
295 static bool
296 ofproto_ipfix_bridge_exporter_options_equal(
297     const struct ofproto_ipfix_bridge_exporter_options *a,
298     const struct ofproto_ipfix_bridge_exporter_options *b)
299 {
300     return (a->obs_domain_id == b->obs_domain_id
301             && a->obs_point_id == b->obs_point_id
302             && a->sampling_rate == b->sampling_rate
303             && a->cache_active_timeout == b->cache_active_timeout
304             && a->cache_max_flows == b->cache_max_flows
305             && sset_equals(&a->targets, &b->targets));
306 }
307
308 static struct ofproto_ipfix_bridge_exporter_options *
309 ofproto_ipfix_bridge_exporter_options_clone(
310     const struct ofproto_ipfix_bridge_exporter_options *old)
311 {
312     struct ofproto_ipfix_bridge_exporter_options *new =
313         xmemdup(old, sizeof *old);
314     sset_clone(&new->targets, &old->targets);
315     return new;
316 }
317
318 static void
319 ofproto_ipfix_bridge_exporter_options_destroy(
320     struct ofproto_ipfix_bridge_exporter_options *options)
321 {
322     if (options) {
323         sset_destroy(&options->targets);
324         free(options);
325     }
326 }
327
328 static bool
329 ofproto_ipfix_flow_exporter_options_equal(
330     const struct ofproto_ipfix_flow_exporter_options *a,
331     const struct ofproto_ipfix_flow_exporter_options *b)
332 {
333     return (a->collector_set_id == b->collector_set_id
334             && a->cache_active_timeout == b->cache_active_timeout
335             && a->cache_max_flows == b->cache_max_flows
336             && sset_equals(&a->targets, &b->targets));
337 }
338
339 static struct ofproto_ipfix_flow_exporter_options *
340 ofproto_ipfix_flow_exporter_options_clone(
341     const struct ofproto_ipfix_flow_exporter_options *old)
342 {
343     struct ofproto_ipfix_flow_exporter_options *new =
344         xmemdup(old, sizeof *old);
345     sset_clone(&new->targets, &old->targets);
346     return new;
347 }
348
349 static void
350 ofproto_ipfix_flow_exporter_options_destroy(
351     struct ofproto_ipfix_flow_exporter_options *options)
352 {
353     if (options) {
354         sset_destroy(&options->targets);
355         free(options);
356     }
357 }
358
359 static void
360 dpif_ipfix_exporter_init(struct dpif_ipfix_exporter *exporter)
361 {
362     exporter->collectors = NULL;
363     exporter->seq_number = 1;
364     exporter->last_template_set_time = TIME_MIN;
365     hmap_init(&exporter->cache_flow_key_map);
366     list_init(&exporter->cache_flow_start_timestamp_list);
367     exporter->cache_active_timeout = 0;
368     exporter->cache_max_flows = 0;
369 }
370
371 static void
372 dpif_ipfix_exporter_clear(struct dpif_ipfix_exporter *exporter)
373 {
374     /* Flush the cache with flow end reason "forced end." */
375     dpif_ipfix_cache_expire_now(exporter, true);
376
377     collectors_destroy(exporter->collectors);
378     exporter->collectors = NULL;
379     exporter->seq_number = 1;
380     exporter->last_template_set_time = TIME_MIN;
381     exporter->cache_active_timeout = 0;
382     exporter->cache_max_flows = 0;
383 }
384
385 static void
386 dpif_ipfix_exporter_destroy(struct dpif_ipfix_exporter *exporter)
387 {
388     dpif_ipfix_exporter_clear(exporter);
389     hmap_destroy(&exporter->cache_flow_key_map);
390 }
391
392 static bool
393 dpif_ipfix_exporter_set_options(struct dpif_ipfix_exporter *exporter,
394                                 const struct sset *targets,
395                                 const uint32_t cache_active_timeout,
396                                 const uint32_t cache_max_flows)
397 {
398     collectors_destroy(exporter->collectors);
399     collectors_create(targets, IPFIX_DEFAULT_COLLECTOR_PORT,
400                       &exporter->collectors);
401     if (exporter->collectors == NULL) {
402         VLOG_WARN_RL(&rl, "no collectors could be initialized, "
403                      "IPFIX exporter disabled");
404         dpif_ipfix_exporter_clear(exporter);
405         return false;
406     }
407     exporter->cache_active_timeout = cache_active_timeout;
408     exporter->cache_max_flows = cache_max_flows;
409     return true;
410 }
411
412 static void
413 dpif_ipfix_bridge_exporter_init(struct dpif_ipfix_bridge_exporter *exporter)
414 {
415     dpif_ipfix_exporter_init(&exporter->exporter);
416     exporter->options = NULL;
417     exporter->probability = 0;
418 }
419
420 static void
421 dpif_ipfix_bridge_exporter_clear(struct dpif_ipfix_bridge_exporter *exporter)
422 {
423     dpif_ipfix_exporter_clear(&exporter->exporter);
424     ofproto_ipfix_bridge_exporter_options_destroy(exporter->options);
425     exporter->options = NULL;
426     exporter->probability = 0;
427 }
428
429 static void
430 dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter)
431 {
432     dpif_ipfix_bridge_exporter_clear(exporter);
433     dpif_ipfix_exporter_destroy(&exporter->exporter);
434 }
435
436 static void
437 dpif_ipfix_bridge_exporter_set_options(
438     struct dpif_ipfix_bridge_exporter *exporter,
439     const struct ofproto_ipfix_bridge_exporter_options *options)
440 {
441     bool options_changed;
442
443     if (!options || sset_is_empty(&options->targets)) {
444         /* No point in doing any work if there are no targets. */
445         dpif_ipfix_bridge_exporter_clear(exporter);
446         return;
447     }
448
449     options_changed = (
450         !exporter->options
451         || !ofproto_ipfix_bridge_exporter_options_equal(
452             options, exporter->options));
453
454     /* Configure collectors if options have changed or if we're
455      * shortchanged in collectors (which indicates that opening one or
456      * more of the configured collectors failed, so that we should
457      * retry). */
458     if (options_changed
459         || collectors_count(exporter->exporter.collectors)
460             < sset_count(&options->targets)) {
461         if (!dpif_ipfix_exporter_set_options(
462                 &exporter->exporter, &options->targets,
463                 options->cache_active_timeout, options->cache_max_flows)) {
464             return;
465         }
466     }
467
468     /* Avoid reconfiguring if options didn't change. */
469     if (!options_changed) {
470         return;
471     }
472
473     ofproto_ipfix_bridge_exporter_options_destroy(exporter->options);
474     exporter->options = ofproto_ipfix_bridge_exporter_options_clone(options);
475     exporter->probability =
476         MAX(1, UINT32_MAX / exporter->options->sampling_rate);
477
478     /* Run over the cache as some entries might have expired after
479      * changing the timeouts. */
480     dpif_ipfix_cache_expire_now(&exporter->exporter, false);
481 }
482
483 static struct dpif_ipfix_flow_exporter_map_node*
484 dpif_ipfix_find_flow_exporter_map_node(
485     const struct dpif_ipfix *di, const uint32_t collector_set_id)
486     OVS_REQUIRES(mutex)
487 {
488     struct dpif_ipfix_flow_exporter_map_node *exporter_node;
489
490     HMAP_FOR_EACH_WITH_HASH (exporter_node, node,
491                              hash_int(collector_set_id, 0),
492                              &di->flow_exporter_map) {
493         if (exporter_node->exporter.options->collector_set_id
494             == collector_set_id) {
495             return exporter_node;
496         }
497     }
498
499     return NULL;
500 }
501
502 static void
503 dpif_ipfix_flow_exporter_init(struct dpif_ipfix_flow_exporter *exporter)
504 {
505     dpif_ipfix_exporter_init(&exporter->exporter);
506     exporter->options = NULL;
507 }
508
509 static void
510 dpif_ipfix_flow_exporter_clear(struct dpif_ipfix_flow_exporter *exporter)
511 {
512     dpif_ipfix_exporter_clear(&exporter->exporter);
513     ofproto_ipfix_flow_exporter_options_destroy(exporter->options);
514     exporter->options = NULL;
515 }
516
517 static void
518 dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter)
519 {
520     dpif_ipfix_flow_exporter_clear(exporter);
521     dpif_ipfix_exporter_destroy(&exporter->exporter);
522 }
523
524 static bool
525 dpif_ipfix_flow_exporter_set_options(
526     struct dpif_ipfix_flow_exporter *exporter,
527     const struct ofproto_ipfix_flow_exporter_options *options)
528 {
529     bool options_changed;
530
531     if (sset_is_empty(&options->targets)) {
532         /* No point in doing any work if there are no targets. */
533         dpif_ipfix_flow_exporter_clear(exporter);
534         return true;
535     }
536
537     options_changed = (
538         !exporter->options
539         || !ofproto_ipfix_flow_exporter_options_equal(
540             options, exporter->options));
541
542     /* Configure collectors if options have changed or if we're
543      * shortchanged in collectors (which indicates that opening one or
544      * more of the configured collectors failed, so that we should
545      * retry). */
546     if (options_changed
547         || collectors_count(exporter->exporter.collectors)
548             < sset_count(&options->targets)) {
549         if (!dpif_ipfix_exporter_set_options(
550                 &exporter->exporter, &options->targets,
551                 options->cache_active_timeout, options->cache_max_flows)) {
552             return false;
553         }
554     }
555
556     /* Avoid reconfiguring if options didn't change. */
557     if (!options_changed) {
558         return true;
559     }
560
561     ofproto_ipfix_flow_exporter_options_destroy(exporter->options);
562     exporter->options = ofproto_ipfix_flow_exporter_options_clone(options);
563
564     /* Run over the cache as some entries might have expired after
565      * changing the timeouts. */
566     dpif_ipfix_cache_expire_now(&exporter->exporter, false);
567
568     return true;
569 }
570
571 void
572 dpif_ipfix_set_options(
573     struct dpif_ipfix *di,
574     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
575     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
576     size_t n_flow_exporters_options) OVS_EXCLUDED(mutex)
577 {
578     int i;
579     struct ofproto_ipfix_flow_exporter_options *options;
580     struct dpif_ipfix_flow_exporter_map_node *node, *next;
581     size_t n_broken_flow_exporters_options = 0;
582
583     ovs_mutex_lock(&mutex);
584     dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter,
585                                            bridge_exporter_options);
586
587     /* Add new flow exporters and update current flow exporters. */
588     options = (struct ofproto_ipfix_flow_exporter_options *)
589         flow_exporters_options;
590     for (i = 0; i < n_flow_exporters_options; i++) {
591         node = dpif_ipfix_find_flow_exporter_map_node(
592             di, options->collector_set_id);
593         if (!node) {
594             node = xzalloc(sizeof *node);
595             dpif_ipfix_flow_exporter_init(&node->exporter);
596             hmap_insert(&di->flow_exporter_map, &node->node,
597                         hash_int(options->collector_set_id, 0));
598         }
599         if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) {
600             n_broken_flow_exporters_options++;
601         }
602         options++;
603     }
604
605     ovs_assert(hmap_count(&di->flow_exporter_map) >=
606                (n_flow_exporters_options - n_broken_flow_exporters_options));
607
608     /* Remove dropped flow exporters, if any needs to be removed. */
609     if (hmap_count(&di->flow_exporter_map) > n_flow_exporters_options) {
610         HMAP_FOR_EACH_SAFE (node, next, node, &di->flow_exporter_map) {
611             /* This is slow but doesn't take any extra memory, and
612              * this table is not supposed to contain many rows anyway. */
613             options = (struct ofproto_ipfix_flow_exporter_options *)
614                 flow_exporters_options;
615             for (i = 0; i < n_flow_exporters_options; i++) {
616               if (node->exporter.options->collector_set_id
617                   == options->collector_set_id) {
618                   break;
619               }
620               options++;
621             }
622             if (i == n_flow_exporters_options) {  // Not found.
623                 hmap_remove(&di->flow_exporter_map, &node->node);
624                 dpif_ipfix_flow_exporter_destroy(&node->exporter);
625                 free(node);
626             }
627         }
628     }
629
630     ovs_assert(hmap_count(&di->flow_exporter_map) ==
631                (n_flow_exporters_options - n_broken_flow_exporters_options));
632     ovs_mutex_unlock(&mutex);
633 }
634
635 struct dpif_ipfix *
636 dpif_ipfix_create(void)
637 {
638     struct dpif_ipfix *di;
639     di = xzalloc(sizeof *di);
640     dpif_ipfix_bridge_exporter_init(&di->bridge_exporter);
641     hmap_init(&di->flow_exporter_map);
642     atomic_init(&di->ref_cnt, 1);
643     return di;
644 }
645
646 struct dpif_ipfix *
647 dpif_ipfix_ref(const struct dpif_ipfix *di_)
648 {
649     struct dpif_ipfix *di = CONST_CAST(struct dpif_ipfix *, di_);
650     if (di) {
651         int orig;
652         atomic_add(&di->ref_cnt, 1, &orig);
653         ovs_assert(orig > 0);
654     }
655     return di;
656 }
657
658 uint32_t
659 dpif_ipfix_get_bridge_exporter_probability(const struct dpif_ipfix *di)
660     OVS_EXCLUDED(mutex)
661 {
662     uint32_t ret;
663     ovs_mutex_lock(&mutex);
664     ret = di->bridge_exporter.probability;
665     ovs_mutex_unlock(&mutex);
666     return ret;
667 }
668
669 static void
670 dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex)
671 {
672     struct dpif_ipfix_flow_exporter_map_node *exp_node, *exp_next;
673
674     dpif_ipfix_bridge_exporter_clear(&di->bridge_exporter);
675
676     HMAP_FOR_EACH_SAFE (exp_node, exp_next, node, &di->flow_exporter_map) {
677         hmap_remove(&di->flow_exporter_map, &exp_node->node);
678         dpif_ipfix_flow_exporter_destroy(&exp_node->exporter);
679         free(exp_node);
680     }
681 }
682
683 void
684 dpif_ipfix_unref(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
685 {
686     int orig;
687
688     if (!di) {
689         return;
690     }
691
692     atomic_sub(&di->ref_cnt, 1, &orig);
693     ovs_assert(orig > 0);
694     if (orig == 1) {
695         ovs_mutex_lock(&mutex);
696         dpif_ipfix_clear(di);
697         dpif_ipfix_bridge_exporter_destroy(&di->bridge_exporter);
698         hmap_destroy(&di->flow_exporter_map);
699         atomic_destroy(&di->ref_cnt);
700         free(di);
701         ovs_mutex_unlock(&mutex);
702     }
703 }
704
705 static void
706 ipfix_init_header(uint32_t export_time_sec, uint32_t seq_number,
707                   uint32_t obs_domain_id, struct ofpbuf *msg)
708 {
709     struct ipfix_header *hdr;
710
711     hdr = ofpbuf_put_zeros(msg, sizeof *hdr);
712     hdr->version = htons(IPFIX_VERSION);
713     hdr->length = htons(sizeof *hdr);  /* Updated in ipfix_send_msg. */
714     hdr->export_time = htonl(export_time_sec);
715     hdr->seq_number = htonl(seq_number);
716     hdr->obs_domain_id = htonl(obs_domain_id);
717 }
718
719 static void
720 ipfix_send_msg(const struct collectors *collectors, struct ofpbuf *msg)
721 {
722     struct ipfix_header *hdr;
723
724     /* Adjust the length in the header. */
725     hdr = msg->data;
726     hdr->length = htons(msg->size);
727
728     collectors_send(collectors, msg->data, msg->size);
729     msg->size = 0;
730 }
731
732 static uint16_t
733 ipfix_get_template_id(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3,
734                       enum ipfix_proto_l4 l4)
735 {
736     uint16_t template_id;
737     template_id = l2;
738     template_id = template_id * NUM_IPFIX_PROTO_L3 + l3;
739     template_id = template_id * NUM_IPFIX_PROTO_L4 + l4;
740     return IPFIX_TEMPLATE_ID_MIN + template_id;
741 }
742
743 static void
744 ipfix_define_template_entity(enum ipfix_entity_id id,
745                              enum ipfix_entity_size size, struct ofpbuf *msg)
746 {
747     struct ipfix_template_field_specifier *field;
748
749     field = ofpbuf_put_zeros(msg, sizeof *field);
750     field->element_id = htons(id);
751     field->field_length = htons(size);
752 }
753
754 static uint16_t
755 ipfix_define_template_fields(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3,
756                              enum ipfix_proto_l4 l4, struct ofpbuf *msg)
757 {
758     uint16_t count = 0;
759
760 #define DEF(ID) \
761     { \
762         ipfix_define_template_entity(IPFIX_ENTITY_ID_##ID, \
763                                      IPFIX_ENTITY_SIZE_##ID, msg); \
764         count++; \
765     }
766
767     /* 1. Flow key. */
768
769     DEF(OBSERVATION_POINT_ID);
770
771     /* Common Ethernet entities. */
772     DEF(SOURCE_MAC_ADDRESS);
773     DEF(DESTINATION_MAC_ADDRESS);
774     DEF(ETHERNET_TYPE);
775     DEF(ETHERNET_HEADER_LENGTH);
776
777     if (l2 == IPFIX_PROTO_L2_VLAN) {
778         DEF(VLAN_ID);
779         DEF(DOT1Q_VLAN_ID);
780         DEF(DOT1Q_PRIORITY);
781     }
782
783     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
784         DEF(IP_VERSION);
785         DEF(IP_TTL);
786         DEF(PROTOCOL_IDENTIFIER);
787         DEF(IP_DIFF_SERV_CODE_POINT);
788         DEF(IP_PRECEDENCE);
789         DEF(IP_CLASS_OF_SERVICE);
790
791         if (l3 == IPFIX_PROTO_L3_IPV4) {
792             DEF(SOURCE_IPV4_ADDRESS);
793             DEF(DESTINATION_IPV4_ADDRESS);
794         } else {  /* l3 == IPFIX_PROTO_L3_IPV6 */
795             DEF(SOURCE_IPV6_ADDRESS);
796             DEF(DESTINATION_IPV6_ADDRESS);
797             DEF(FLOW_LABEL_IPV6);
798         }
799     }
800
801     if (l4 != IPFIX_PROTO_L4_UNKNOWN) {
802         DEF(SOURCE_TRANSPORT_PORT);
803         DEF(DESTINATION_TRANSPORT_PORT);
804     }
805
806     /* 2. Flow aggregated data. */
807
808     DEF(FLOW_START_DELTA_MICROSECONDS);
809     DEF(FLOW_END_DELTA_MICROSECONDS);
810     DEF(PACKET_DELTA_COUNT);
811     DEF(LAYER2_OCTET_DELTA_COUNT);
812     DEF(FLOW_END_REASON);
813
814     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
815         DEF(OCTET_DELTA_SUM_OF_SQUARES);
816         DEF(MINIMUM_IP_TOTAL_LENGTH);
817         DEF(MAXIMUM_IP_TOTAL_LENGTH);
818     }
819
820 #undef DEF
821
822     return count;
823 }
824
825 static void
826 ipfix_send_template_msg(struct dpif_ipfix_exporter *exporter,
827                         uint32_t export_time_sec, uint32_t obs_domain_id)
828 {
829     uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)];
830     struct ofpbuf msg;
831     size_t set_hdr_offset, tmpl_hdr_offset;
832     struct ipfix_set_header *set_hdr;
833     struct ipfix_template_record_header *tmpl_hdr;
834     uint16_t field_count;
835     enum ipfix_proto_l2 l2;
836     enum ipfix_proto_l3 l3;
837     enum ipfix_proto_l4 l4;
838
839     ofpbuf_use_stub(&msg, msg_stub, sizeof msg_stub);
840
841     ipfix_init_header(export_time_sec, exporter->seq_number, obs_domain_id,
842                       &msg);
843     set_hdr_offset = msg.size;
844
845     /* Add a Template Set. */
846     set_hdr = ofpbuf_put_zeros(&msg, sizeof *set_hdr);
847     set_hdr->set_id = htons(IPFIX_SET_ID_TEMPLATE);
848
849     /* Define one template for each possible combination of
850      * protocols. */
851     for (l2 = 0; l2 < NUM_IPFIX_PROTO_L2; l2++) {
852         for (l3 = 0; l3 < NUM_IPFIX_PROTO_L3; l3++) {
853             for (l4 = 0; l4 < NUM_IPFIX_PROTO_L4; l4++) {
854                 if (l3 == IPFIX_PROTO_L3_UNKNOWN &&
855                     l4 != IPFIX_PROTO_L4_UNKNOWN) {
856                     continue;
857                 }
858                 tmpl_hdr_offset = msg.size;
859                 tmpl_hdr = ofpbuf_put_zeros(&msg, sizeof *tmpl_hdr);
860                 tmpl_hdr->template_id = htons(
861                     ipfix_get_template_id(l2, l3, l4));
862                 field_count = ipfix_define_template_fields(l2, l3, l4, &msg);
863                 tmpl_hdr = (struct ipfix_template_record_header*)
864                     ((uint8_t*)msg.data + tmpl_hdr_offset);
865                 tmpl_hdr->field_count = htons(field_count);
866             }
867         }
868     }
869
870     set_hdr = (struct ipfix_set_header*)((uint8_t*)msg.data + set_hdr_offset);
871     set_hdr->length = htons(msg.size - set_hdr_offset);
872
873     /* XXX: Add Options Template Sets, at least to define a Flow Keys
874      * Option Template. */
875
876     ipfix_send_msg(exporter->collectors, &msg);
877
878     ofpbuf_uninit(&msg);
879 }
880
881 static inline uint32_t
882 ipfix_hash_flow_key(const struct ipfix_flow_key *flow_key, uint32_t basis)
883 {
884     uint32_t hash;
885     hash = hash_int(flow_key->obs_domain_id, basis);
886     hash = hash_int(flow_key->template_id, hash);
887     hash = hash_bytes(flow_key->flow_key_msg_part,
888                       flow_key->flow_key_msg_part_size, hash);
889     return hash;
890 }
891
892 static bool
893 ipfix_flow_key_equal(const struct ipfix_flow_key *a,
894                      const struct ipfix_flow_key *b)
895 {
896     /* The template ID determines the flow key size, so not need to
897      * compare it. */
898     return (a->obs_domain_id == b->obs_domain_id
899             && a->template_id == b->template_id
900             && memcmp(a->flow_key_msg_part, b->flow_key_msg_part,
901                       a->flow_key_msg_part_size) == 0);
902 }
903
904 static struct ipfix_flow_cache_entry*
905 ipfix_cache_find_entry(const struct dpif_ipfix_exporter *exporter,
906                        const struct ipfix_flow_key *flow_key)
907 {
908     struct ipfix_flow_cache_entry *entry;
909
910     HMAP_FOR_EACH_WITH_HASH (entry, flow_key_map_node,
911                              ipfix_hash_flow_key(flow_key, 0),
912                              &exporter->cache_flow_key_map) {
913         if (ipfix_flow_key_equal(&entry->flow_key, flow_key)) {
914             return entry;
915         }
916     }
917
918     return NULL;
919 }
920
921 static bool
922 ipfix_cache_next_timeout_msec(const struct dpif_ipfix_exporter *exporter,
923                               long long int *next_timeout_msec)
924 {
925     struct ipfix_flow_cache_entry *entry;
926
927     LIST_FOR_EACH (entry, cache_flow_start_timestamp_list_node,
928                    &exporter->cache_flow_start_timestamp_list) {
929         *next_timeout_msec = entry->flow_start_timestamp_usec / 1000LL
930             + 1000LL * exporter->cache_active_timeout;
931         return true;
932     }
933
934     return false;
935 }
936
937 static void
938 ipfix_cache_aggregate_entries(struct ipfix_flow_cache_entry *from_entry,
939                               struct ipfix_flow_cache_entry *to_entry)
940 {
941     uint64_t *to_start, *to_end, *from_start, *from_end;
942     uint16_t *to_min_len, *to_max_len, *from_min_len, *from_max_len;
943
944     to_start = &to_entry->flow_start_timestamp_usec;
945     to_end = &to_entry->flow_end_timestamp_usec;
946     from_start = &from_entry->flow_start_timestamp_usec;
947     from_end = &from_entry->flow_end_timestamp_usec;
948
949     if (*to_start > *from_start) {
950         *to_start = *from_start;
951     }
952     if (*to_end < *from_end) {
953         *to_end = *from_end;
954     }
955
956     to_entry->packet_delta_count += from_entry->packet_delta_count;
957     to_entry->layer2_octet_delta_count += from_entry->layer2_octet_delta_count;
958
959     to_entry->octet_delta_sum_of_squares +=
960         from_entry->octet_delta_sum_of_squares;
961
962     to_min_len = &to_entry->minimum_ip_total_length;
963     to_max_len = &to_entry->maximum_ip_total_length;
964     from_min_len = &from_entry->minimum_ip_total_length;
965     from_max_len = &from_entry->maximum_ip_total_length;
966
967     if (!*to_min_len || (*from_min_len && *to_min_len > *from_min_len)) {
968         *to_min_len = *from_min_len;
969     }
970     if (*to_max_len < *from_max_len) {
971         *to_max_len = *from_max_len;
972     }
973 }
974
975 /* Add an entry into a flow cache.  The entry is either aggregated into
976  * an existing entry with the same flow key and free()d, or it is
977  * inserted into the cache. */
978 static void
979 ipfix_cache_update(struct dpif_ipfix_exporter *exporter,
980                    struct ipfix_flow_cache_entry *entry)
981 {
982     struct ipfix_flow_cache_entry *old_entry;
983
984     old_entry = ipfix_cache_find_entry(exporter, &entry->flow_key);
985
986     if (old_entry == NULL) {
987         hmap_insert(&exporter->cache_flow_key_map, &entry->flow_key_map_node,
988                     ipfix_hash_flow_key(&entry->flow_key, 0));
989
990         /* As the latest entry added into the cache, it should
991          * logically have the highest flow_start_timestamp_usec, so
992          * append it at the tail. */
993         list_push_back(&exporter->cache_flow_start_timestamp_list,
994                        &entry->cache_flow_start_timestamp_list_node);
995
996         /* Enforce exporter->cache_max_flows limit. */
997         if (hmap_count(&exporter->cache_flow_key_map)
998             > exporter->cache_max_flows) {
999             dpif_ipfix_cache_expire_now(exporter, false);
1000         }
1001     } else {
1002         ipfix_cache_aggregate_entries(entry, old_entry);
1003         free(entry);
1004     }
1005 }
1006
1007 static void
1008 ipfix_cache_entry_init(struct ipfix_flow_cache_entry *entry,
1009                        struct ofpbuf *packet, const struct flow *flow,
1010                        uint64_t packet_delta_count, uint32_t obs_domain_id,
1011                        uint32_t obs_point_id)
1012 {
1013     struct ipfix_flow_key *flow_key;
1014     struct ofpbuf msg;
1015     enum ipfix_proto_l2 l2;
1016     enum ipfix_proto_l3 l3;
1017     enum ipfix_proto_l4 l4;
1018     uint8_t ethernet_header_length;
1019     uint16_t ethernet_total_length;
1020
1021     flow_key = &entry->flow_key;
1022     ofpbuf_use_stack(&msg, flow_key->flow_key_msg_part,
1023                      sizeof flow_key->flow_key_msg_part);
1024
1025     /* Choose the right template ID matching the protocols in the
1026      * sampled packet. */
1027     l2 = (flow->vlan_tci == 0) ? IPFIX_PROTO_L2_ETH : IPFIX_PROTO_L2_VLAN;
1028
1029     switch(ntohs(flow->dl_type)) {
1030     case ETH_TYPE_IP:
1031         l3 = IPFIX_PROTO_L3_IPV4;
1032         break;
1033     case ETH_TYPE_IPV6:
1034         l3 = IPFIX_PROTO_L3_IPV6;
1035         break;
1036     default:
1037         l3 = IPFIX_PROTO_L3_UNKNOWN;
1038     }
1039
1040     l4 = IPFIX_PROTO_L4_UNKNOWN;
1041     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1042         switch(flow->nw_proto) {
1043         case IPPROTO_TCP:  /* TCP */
1044         case IPPROTO_UDP:  /* UDP */
1045             l4 = IPFIX_PROTO_L4_TCP_UDP;
1046             break;
1047         }
1048     }
1049
1050     flow_key->obs_domain_id = obs_domain_id;
1051     flow_key->template_id = ipfix_get_template_id(l2, l3, l4);
1052
1053     /* The fields defined in the ipfix_data_record_* structs and sent
1054      * below must match exactly the templates defined in
1055      * ipfix_define_template_fields. */
1056
1057     ethernet_header_length = (l2 == IPFIX_PROTO_L2_VLAN)
1058         ? VLAN_ETH_HEADER_LEN : ETH_HEADER_LEN;
1059     ethernet_total_length = packet->size;
1060
1061     /* Common Ethernet entities. */
1062     {
1063         struct ipfix_data_record_flow_key_common *data_common;
1064
1065         data_common = ofpbuf_put_zeros(&msg, sizeof *data_common);
1066         data_common->observation_point_id = htonl(obs_point_id);
1067         memcpy(data_common->source_mac_address, flow->dl_src,
1068                sizeof flow->dl_src);
1069         memcpy(data_common->destination_mac_address, flow->dl_dst,
1070                sizeof flow->dl_dst);
1071         data_common->ethernet_type = flow->dl_type;
1072         data_common->ethernet_header_length = ethernet_header_length;
1073     }
1074
1075     if (l2 == IPFIX_PROTO_L2_VLAN) {
1076         struct ipfix_data_record_flow_key_vlan *data_vlan;
1077         uint16_t vlan_id = vlan_tci_to_vid(flow->vlan_tci);
1078         uint8_t priority = vlan_tci_to_pcp(flow->vlan_tci);
1079
1080         data_vlan = ofpbuf_put_zeros(&msg, sizeof *data_vlan);
1081         data_vlan->vlan_id = htons(vlan_id);
1082         data_vlan->dot1q_vlan_id = htons(vlan_id);
1083         data_vlan->dot1q_priority = priority;
1084     }
1085
1086     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1087         struct ipfix_data_record_flow_key_ip *data_ip;
1088
1089         data_ip = ofpbuf_put_zeros(&msg, sizeof *data_ip);
1090         data_ip->ip_version = (l3 == IPFIX_PROTO_L3_IPV4) ? 4 : 6;
1091         data_ip->ip_ttl = flow->nw_ttl;
1092         data_ip->protocol_identifier = flow->nw_proto;
1093         data_ip->ip_diff_serv_code_point = flow->nw_tos >> 2;
1094         data_ip->ip_precedence = flow->nw_tos >> 5;
1095         data_ip->ip_class_of_service = flow->nw_tos;
1096
1097         if (l3 == IPFIX_PROTO_L3_IPV4) {
1098             struct ipfix_data_record_flow_key_ipv4 *data_ipv4;
1099             data_ipv4 = ofpbuf_put_zeros(&msg, sizeof *data_ipv4);
1100             data_ipv4->source_ipv4_address = flow->nw_src;
1101             data_ipv4->destination_ipv4_address = flow->nw_dst;
1102         } else {  /* l3 == IPFIX_PROTO_L3_IPV6 */
1103             struct ipfix_data_record_flow_key_ipv6 *data_ipv6;
1104
1105             data_ipv6 = ofpbuf_put_zeros(&msg, sizeof *data_ipv6);
1106             memcpy(data_ipv6->source_ipv6_address, &flow->ipv6_src,
1107                    sizeof flow->ipv6_src);
1108             memcpy(data_ipv6->destination_ipv6_address, &flow->ipv6_dst,
1109                    sizeof flow->ipv6_dst);
1110             data_ipv6->flow_label_ipv6 = flow->ipv6_label;
1111         }
1112     }
1113
1114     if (l4 != IPFIX_PROTO_L4_UNKNOWN) {
1115         struct ipfix_data_record_flow_key_tcpudp *data_tcpudp;
1116
1117         data_tcpudp = ofpbuf_put_zeros(&msg, sizeof *data_tcpudp);
1118         data_tcpudp->source_transport_port = flow->tp_src;
1119         data_tcpudp->destination_transport_port = flow->tp_dst;
1120     }
1121
1122     flow_key->flow_key_msg_part_size = msg.size;
1123
1124     {
1125         struct timeval now;
1126         uint64_t layer2_octet_delta_count;
1127
1128         /* Calculate the total matched octet count by considering as
1129          * an approximation that all matched packets have the same
1130          * length. */
1131         layer2_octet_delta_count = packet_delta_count * ethernet_total_length;
1132
1133         xgettimeofday(&now);
1134         entry->flow_end_timestamp_usec = now.tv_usec + 1000000LL * now.tv_sec;
1135         entry->flow_start_timestamp_usec = entry->flow_end_timestamp_usec;
1136         entry->packet_delta_count = packet_delta_count;
1137         entry->layer2_octet_delta_count = layer2_octet_delta_count;
1138     }
1139
1140     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1141         uint16_t ip_total_length =
1142             ethernet_total_length - ethernet_header_length;
1143
1144         entry->octet_delta_sum_of_squares =
1145             packet_delta_count * ip_total_length * ip_total_length;
1146         entry->minimum_ip_total_length = ip_total_length;
1147         entry->maximum_ip_total_length = ip_total_length;
1148     } else {
1149         entry->octet_delta_sum_of_squares = 0;
1150         entry->minimum_ip_total_length = 0;
1151         entry->maximum_ip_total_length = 0;
1152     }
1153 }
1154
1155 /* Send each single data record in its own data set, to simplify the
1156  * implementation by avoiding having to group record by template ID
1157  * before sending. */
1158 static void
1159 ipfix_put_data_set(uint32_t export_time_sec,
1160                    struct ipfix_flow_cache_entry *entry,
1161                    enum ipfix_flow_end_reason flow_end_reason,
1162                    struct ofpbuf *msg)
1163 {
1164     size_t set_hdr_offset;
1165     struct ipfix_set_header *set_hdr;
1166
1167     set_hdr_offset = msg->size;
1168
1169     /* Put a Data Set. */
1170     set_hdr = ofpbuf_put_zeros(msg, sizeof *set_hdr);
1171     set_hdr->set_id = htons(entry->flow_key.template_id);
1172
1173     /* Copy the flow key part of the data record. */
1174
1175     ofpbuf_put(msg, entry->flow_key.flow_key_msg_part,
1176                entry->flow_key.flow_key_msg_part_size);
1177
1178     /* Put the non-key part of the data record. */
1179
1180     {
1181         struct ipfix_data_record_aggregated_common *data_aggregated_common;
1182         uint64_t export_time_usec, flow_start_delta_usec, flow_end_delta_usec;
1183
1184         /* Calculate the negative deltas relative to the export time
1185          * in seconds sent in the header, not the exact export
1186          * time. */
1187         export_time_usec = 1000000LL * export_time_sec;
1188         flow_start_delta_usec = export_time_usec
1189             - entry->flow_start_timestamp_usec;
1190         flow_end_delta_usec = export_time_usec
1191             - entry->flow_end_timestamp_usec;
1192
1193         data_aggregated_common = ofpbuf_put_zeros(
1194             msg, sizeof *data_aggregated_common);
1195         data_aggregated_common->flow_start_delta_microseconds = htonl(
1196             flow_start_delta_usec);
1197         data_aggregated_common->flow_end_delta_microseconds = htonl(
1198             flow_end_delta_usec);
1199         data_aggregated_common->packet_delta_count = htonll(
1200             entry->packet_delta_count);
1201         data_aggregated_common->layer2_octet_delta_count = htonll(
1202             entry->layer2_octet_delta_count);
1203         data_aggregated_common->flow_end_reason = flow_end_reason;
1204     }
1205
1206     if (entry->octet_delta_sum_of_squares) {  /* IP packet. */
1207         struct ipfix_data_record_aggregated_ip *data_aggregated_ip;
1208
1209         data_aggregated_ip = ofpbuf_put_zeros(
1210             msg, sizeof *data_aggregated_ip);
1211         data_aggregated_ip->octet_delta_sum_of_squares = htonll(
1212             entry->octet_delta_sum_of_squares);
1213         data_aggregated_ip->minimum_ip_total_length = htonll(
1214             entry->minimum_ip_total_length);
1215         data_aggregated_ip->maximum_ip_total_length = htonll(
1216             entry->maximum_ip_total_length);
1217     }
1218
1219     set_hdr = (struct ipfix_set_header*)((uint8_t*)msg->data + set_hdr_offset);
1220     set_hdr->length = htons(msg->size - set_hdr_offset);
1221 }
1222
1223 /* Send an IPFIX message with a single data record. */
1224 static void
1225 ipfix_send_data_msg(struct dpif_ipfix_exporter *exporter,
1226                     uint32_t export_time_sec,
1227                     struct ipfix_flow_cache_entry *entry,
1228                     enum ipfix_flow_end_reason flow_end_reason)
1229 {
1230     uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)];
1231     struct ofpbuf msg;
1232     ofpbuf_use_stub(&msg, msg_stub, sizeof msg_stub);
1233
1234     ipfix_init_header(export_time_sec, exporter->seq_number++,
1235                       entry->flow_key.obs_domain_id, &msg);
1236     ipfix_put_data_set(export_time_sec, entry, flow_end_reason, &msg);
1237     ipfix_send_msg(exporter->collectors, &msg);
1238
1239     ofpbuf_uninit(&msg);
1240 }
1241
1242 static void
1243 dpif_ipfix_sample(struct dpif_ipfix_exporter *exporter,
1244                   struct ofpbuf *packet, const struct flow *flow,
1245                   uint64_t packet_delta_count, uint32_t obs_domain_id,
1246                   uint32_t obs_point_id)
1247 {
1248     struct ipfix_flow_cache_entry *entry;
1249
1250     /* Create a flow cache entry from the sample. */
1251     entry = xmalloc(sizeof *entry);
1252     ipfix_cache_entry_init(entry, packet, flow, packet_delta_count,
1253                            obs_domain_id, obs_point_id);
1254     ipfix_cache_update(exporter, entry);
1255 }
1256
1257 void
1258 dpif_ipfix_bridge_sample(struct dpif_ipfix *di, struct ofpbuf *packet,
1259                          const struct flow *flow) OVS_EXCLUDED(mutex)
1260 {
1261     uint64_t packet_delta_count;
1262
1263     ovs_mutex_lock(&mutex);
1264     /* Use the sampling probability as an approximation of the number
1265      * of matched packets. */
1266     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
1267     dpif_ipfix_sample(&di->bridge_exporter.exporter, packet, flow,
1268                       packet_delta_count,
1269                       di->bridge_exporter.options->obs_domain_id,
1270                       di->bridge_exporter.options->obs_point_id);
1271     ovs_mutex_unlock(&mutex);
1272 }
1273
1274 void
1275 dpif_ipfix_flow_sample(struct dpif_ipfix *di, struct ofpbuf *packet,
1276                        const struct flow *flow, uint32_t collector_set_id,
1277                        uint16_t probability, uint32_t obs_domain_id,
1278                        uint32_t obs_point_id) OVS_EXCLUDED(mutex)
1279 {
1280     struct dpif_ipfix_flow_exporter_map_node *node;
1281     /* Use the sampling probability as an approximation of the number
1282      * of matched packets. */
1283     uint64_t packet_delta_count = USHRT_MAX / probability;
1284
1285     ovs_mutex_lock(&mutex);
1286     node = dpif_ipfix_find_flow_exporter_map_node(di, collector_set_id);
1287     if (node) {
1288         dpif_ipfix_sample(&node->exporter.exporter, packet, flow,
1289                           packet_delta_count, obs_domain_id, obs_point_id);
1290     }
1291     ovs_mutex_unlock(&mutex);
1292 }
1293
1294 static void
1295 dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter,
1296                         bool forced_end, const uint64_t export_time_usec,
1297                         const uint32_t export_time_sec)
1298 {
1299     struct ipfix_flow_cache_entry *entry, *next_entry;
1300     uint64_t max_flow_start_timestamp_usec;
1301     bool template_msg_sent = false;
1302     enum ipfix_flow_end_reason flow_end_reason;
1303
1304     if (list_is_empty(&exporter->cache_flow_start_timestamp_list)) {
1305         return;
1306     }
1307
1308     max_flow_start_timestamp_usec = export_time_usec -
1309         1000000LL * exporter->cache_active_timeout;
1310
1311     LIST_FOR_EACH_SAFE (entry, next_entry, cache_flow_start_timestamp_list_node,
1312                         &exporter->cache_flow_start_timestamp_list) {
1313         if (forced_end) {
1314             flow_end_reason = FORCED_END;
1315         } else if (entry->flow_start_timestamp_usec
1316                    <= max_flow_start_timestamp_usec) {
1317             flow_end_reason = ACTIVE_TIMEOUT;
1318         } else if (hmap_count(&exporter->cache_flow_key_map)
1319                    > exporter->cache_max_flows) {
1320             /* Enforce exporter->cache_max_flows. */
1321             flow_end_reason = LACK_OF_RESOURCES;
1322         } else {
1323             /* Remaining flows haven't expired yet. */
1324             break;
1325         }
1326
1327         list_remove(&entry->cache_flow_start_timestamp_list_node);
1328         hmap_remove(&exporter->cache_flow_key_map,
1329                     &entry->flow_key_map_node);
1330
1331         if (!template_msg_sent
1332             && (exporter->last_template_set_time + IPFIX_TEMPLATE_INTERVAL)
1333                 <= export_time_sec) {
1334             ipfix_send_template_msg(exporter, export_time_sec,
1335                                     entry->flow_key.obs_domain_id);
1336             exporter->last_template_set_time = export_time_sec;
1337             template_msg_sent = true;
1338         }
1339
1340         /* XXX: Group multiple data records for the same obs domain id
1341          * into the same message. */
1342         ipfix_send_data_msg(exporter, export_time_sec, entry, flow_end_reason);
1343         free(entry);
1344     }
1345 }
1346
1347 static void
1348 get_export_time_now(uint64_t *export_time_usec, uint32_t *export_time_sec)
1349 {
1350     struct timeval export_time;
1351     xgettimeofday(&export_time);
1352
1353     *export_time_usec = export_time.tv_usec + 1000000LL * export_time.tv_sec;
1354
1355     /* The IPFIX start and end deltas are negative deltas relative to
1356      * the export time, so set the export time 1 second off to
1357      * calculate those deltas. */
1358     if (export_time.tv_usec == 0) {
1359         *export_time_sec = export_time.tv_sec;
1360     } else {
1361         *export_time_sec = export_time.tv_sec + 1;
1362     }
1363 }
1364
1365 static void
1366 dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *exporter,
1367                             bool forced_end)
1368 {
1369     uint64_t export_time_usec;
1370     uint32_t export_time_sec;
1371
1372     get_export_time_now(&export_time_usec, &export_time_sec);
1373     dpif_ipfix_cache_expire(exporter, forced_end, export_time_usec,
1374                             export_time_sec);
1375 }
1376
1377 void
1378 dpif_ipfix_run(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
1379 {
1380     uint64_t export_time_usec;
1381     uint32_t export_time_sec;
1382     struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node;
1383
1384     ovs_mutex_lock(&mutex);
1385     get_export_time_now(&export_time_usec, &export_time_sec);
1386     if (di->bridge_exporter.probability > 0) {  /* Bridge exporter enabled. */
1387       dpif_ipfix_cache_expire(
1388           &di->bridge_exporter.exporter, false, export_time_usec,
1389           export_time_sec);
1390     }
1391     HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) {
1392         dpif_ipfix_cache_expire(
1393             &flow_exporter_node->exporter.exporter, false, export_time_usec,
1394             export_time_sec);
1395     }
1396     ovs_mutex_unlock(&mutex);
1397 }
1398
1399 void
1400 dpif_ipfix_wait(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
1401 {
1402     long long int next_timeout_msec = LLONG_MAX;
1403     struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node;
1404
1405     ovs_mutex_lock(&mutex);
1406     if (di->bridge_exporter.probability > 0) {  /* Bridge exporter enabled. */
1407         if (ipfix_cache_next_timeout_msec(
1408                 &di->bridge_exporter.exporter, &next_timeout_msec)) {
1409             poll_timer_wait_until(next_timeout_msec);
1410         }
1411     }
1412     HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) {
1413         if (ipfix_cache_next_timeout_msec(
1414                 &flow_exporter_node->exporter.exporter, &next_timeout_msec)) {
1415             poll_timer_wait_until(next_timeout_msec);
1416         }
1417     }
1418     ovs_mutex_unlock(&mutex);
1419 }