/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
-#include "cfg.h"
+#include "byte-order.h"
+#include "collectors.h"
#include "flow.h"
-#include "netflow.h"
+#include "lib/netflow.h"
#include "ofpbuf.h"
#include "ofproto.h"
+#include "ofproto/netflow.h"
#include "packets.h"
+#include "poll-loop.h"
#include "socket-util.h"
-#include "svec.h"
#include "timeval.h"
#include "util.h"
-#include "xtoxll.h"
-
-#define THIS_MODULE VLM_netflow
#include "vlog.h"
-#define NETFLOW_V5_VERSION 5
-
-/* Every NetFlow v5 message contains the header that follows. This is
- * followed by up to thirty records that describe a terminating flow.
- * We only send a single record per NetFlow message.
- */
-struct netflow_v5_header {
- uint16_t version; /* NetFlow version is 5. */
- uint16_t count; /* Number of records in this message. */
- uint32_t sysuptime; /* System uptime in milliseconds. */
- uint32_t unix_secs; /* Number of seconds since Unix epoch. */
- uint32_t unix_nsecs; /* Number of residual nanoseconds
- after epoch seconds. */
- uint32_t flow_seq; /* Number of flows since sending
- messages began. */
- uint8_t engine_type; /* Engine type. */
- uint8_t engine_id; /* Engine id. */
- uint16_t sampling_interval; /* Set to zero. */
-};
-BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24);
-
-/* A NetFlow v5 description of a terminating flow. It is preceded by a
- * NetFlow v5 header.
- */
-struct netflow_v5_record {
- uint32_t src_addr; /* Source IP address. */
- uint32_t dst_addr; /* Destination IP address. */
- uint32_t nexthop; /* IP address of next hop. Set to 0. */
- uint16_t input; /* Input interface index. */
- uint16_t output; /* Output interface index. */
- uint32_t packet_count; /* Number of packets. */
- uint32_t byte_count; /* Number of bytes. */
- uint32_t init_time; /* Value of sysuptime on first packet. */
- uint32_t used_time; /* Value of sysuptime on last packet. */
-
- /* The 'src_port' and 'dst_port' identify the source and destination
- * port, respectively, for TCP and UDP. For ICMP, the high-order
- * byte identifies the type and low-order byte identifies the code
- * in the 'dst_port' field. */
- uint16_t src_port;
- uint16_t dst_port;
-
- uint8_t pad1;
- uint8_t tcp_flags; /* Union of seen TCP flags. */
- uint8_t ip_proto; /* IP protocol. */
- uint8_t ip_tos; /* IP TOS value. */
- uint16_t src_as; /* Source AS ID. Set to 0. */
- uint16_t dst_as; /* Destination AS ID. Set to 0. */
- uint8_t src_mask; /* Source mask bits. Set to 0. */
- uint8_t dst_mask; /* Destination mask bits. Set to 0. */
- uint8_t pad[2];
-};
-BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48);
+VLOG_DEFINE_THIS_MODULE(netflow);
struct netflow {
uint8_t engine_type; /* Value of engine_type to use. */
uint8_t engine_id; /* Value of engine_id to use. */
long long int boot_time; /* Time when netflow_create() was called. */
- int *fds; /* Sockets for NetFlow collectors. */
- size_t n_fds; /* Number of Netflow collectors. */
- bool add_id_to_iface; /* Put the 7 least signficiant bits of
- * 'engine_id' into the most signficant
+ struct collectors *collectors; /* NetFlow collectors. */
+ bool add_id_to_iface; /* Put the 7 least significiant bits of
+ * 'engine_id' into the most significant
* bits of the interface fields. */
uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */
struct ofpbuf packet; /* NetFlow packet being accumulated. */
+ long long int active_timeout; /* Timeout for flows that are still active. */
+ long long int next_timeout; /* Next scheduled active timeout. */
+ long long int reconfig_time; /* When we reconfigured the timeouts. */
};
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-
-static int
-open_collector(char *dst)
+void
+netflow_mask_wc(struct flow *flow, struct flow_wildcards *wc)
{
- char *save_ptr = NULL;
- const char *host_name;
- const char *port_string;
- struct sockaddr_in sin;
- int retval;
- int fd;
-
- /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that
- * can cause segfaults here:
- * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614.
- * Using "::" instead of the obvious ":" works around it. */
- host_name = strtok_r(dst, ":", &save_ptr);
- port_string = strtok_r(NULL, ":", &save_ptr);
- if (!host_name) {
- ovs_error(0, "%s: bad peer name format", dst);
- return -EAFNOSUPPORT;
- }
- if (!port_string) {
- ovs_error(0, "%s: bad port format", dst);
- return -EAFNOSUPPORT;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- if (lookup_ip(host_name, &sin.sin_addr)) {
- return -ENOENT;
- }
- sin.sin_port = htons(atoi(port_string));
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
- VLOG_ERR("%s: socket: %s", dst, strerror(errno));
- return -errno;
- }
-
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return -retval;
- }
-
- retval = connect(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- int error = errno;
- VLOG_ERR("%s: connect: %s", dst, strerror(error));
- close(fd);
- return -error;
+ if (flow->dl_type != htons(ETH_TYPE_IP)) {
+ return;
}
-
- return fd;
+ memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+ memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
+ memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
+ memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
+ memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
+ wc->masks.nw_tos |= IP_DSCP_MASK;
}
-void
-netflow_expire(struct netflow *nf, const struct ofexpired *expired)
+static void
+gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow,
+ struct ofexpired *expired,
+ uint32_t packet_count, uint32_t byte_count)
{
struct netflow_v5_header *nf_hdr;
struct netflow_v5_record *nf_rec;
- struct timeval now;
- /* NetFlow only reports on IP packets. */
- if (expired->flow.dl_type != htons(ETH_TYPE_IP)) {
- return;
- }
+ if (!nf->packet.size) {
+ struct timespec now;
- time_timeval(&now);
+ time_wall_timespec(&now);
- if (!nf->packet.size) {
nf_hdr = ofpbuf_put_zeros(&nf->packet, sizeof *nf_hdr);
nf_hdr->version = htons(NETFLOW_V5_VERSION);
nf_hdr->count = htons(0);
nf_hdr->sysuptime = htonl(time_msec() - nf->boot_time);
nf_hdr->unix_secs = htonl(now.tv_sec);
- nf_hdr->unix_nsecs = htonl(now.tv_usec * 1000);
+ nf_hdr->unix_nsecs = htonl(now.tv_nsec);
nf_hdr->flow_seq = htonl(nf->netflow_cnt++);
nf_hdr->engine_type = nf->engine_type;
nf_hdr->engine_id = nf->engine_id;
nf_rec = ofpbuf_put_zeros(&nf->packet, sizeof *nf_rec);
nf_rec->src_addr = expired->flow.nw_src;
nf_rec->dst_addr = expired->flow.nw_dst;
- nf_rec->nexthop = htons(0);
+ nf_rec->nexthop = htonl(0);
if (nf->add_id_to_iface) {
uint16_t iface = (nf->engine_id & 0x7f) << 9;
- nf_rec->input = htons(iface | (expired->flow.in_port & 0x1ff));
- nf_rec->output = htons(iface);
- printf("input: %x\n", ntohs(nf_rec->input));
+ nf_rec->input = htons(iface
+ | (ofp_to_u16(expired->flow.in_port.ofp_port) & 0x1ff));
+ nf_rec->output = htons(iface
+ | (ofp_to_u16(nf_flow->output_iface) & 0x1ff));
} else {
- nf_rec->input = htons(expired->flow.in_port);
- nf_rec->output = htons(0);
+ nf_rec->input = htons(ofp_to_u16(expired->flow.in_port.ofp_port));
+ nf_rec->output = htons(ofp_to_u16(nf_flow->output_iface));
}
- nf_rec->packet_count = htonl(MIN(expired->packet_count, UINT32_MAX));
- nf_rec->byte_count = htonl(MIN(expired->byte_count, UINT32_MAX));
- nf_rec->init_time = htonl(expired->created - nf->boot_time);
- nf_rec->used_time = htonl(MAX(expired->created, expired->used)
+ nf_rec->packet_count = htonl(packet_count);
+ nf_rec->byte_count = htonl(byte_count);
+ nf_rec->init_time = htonl(nf_flow->created - nf->boot_time);
+ nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used)
- nf->boot_time);
- if (expired->flow.nw_proto == IP_TYPE_ICMP) {
+ if (expired->flow.nw_proto == IPPROTO_ICMP) {
/* In NetFlow, the ICMP type and code are concatenated and
* placed in the 'dst_port' field. */
uint8_t type = ntohs(expired->flow.tp_src);
nf_rec->src_port = expired->flow.tp_src;
nf_rec->dst_port = expired->flow.tp_dst;
}
- nf_rec->tcp_flags = expired->tcp_flags;
+ nf_rec->tcp_flags = nf_flow->tcp_flags;
nf_rec->ip_proto = expired->flow.nw_proto;
- nf_rec->ip_tos = expired->ip_tos;
+ nf_rec->ip_tos = expired->flow.nw_tos & IP_DSCP_MASK;
- /* NetFlow messages are limited to 30 records. A length of 1400
- * bytes guarantees that the limit is not exceeded. */
- if (nf->packet.size >= 1400) {
+ /* NetFlow messages are limited to 30 records. */
+ if (ntohs(nf_hdr->count) >= 30) {
netflow_run(nf);
}
}
void
-netflow_run(struct netflow *nf)
+netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
+ struct ofexpired *expired)
{
- size_t i;
+ uint64_t pkt_delta = expired->packet_count - nf_flow->packet_count_off;
+ uint64_t byte_delta = expired->byte_count - nf_flow->byte_count_off;
- if (!nf->packet.size) {
+ nf_flow->last_expired += nf->active_timeout;
+
+ /* NetFlow only reports on IP packets and we should only report flows
+ * that actually have traffic. */
+ if (expired->flow.dl_type != htons(ETH_TYPE_IP) || pkt_delta == 0) {
return;
}
- for (i = 0; i < nf->n_fds; i++) {
- if (send(nf->fds[i], nf->packet.data, nf->packet.size, 0) == -1) {
- VLOG_WARN_RL(&rl, "netflow message send failed: %s",
- strerror(errno));
+ if ((byte_delta >> 32) <= 175) {
+ /* NetFlow v5 records are limited to 32-bit counters. If we've wrapped
+ * a counter, send as multiple records so we don't lose track of any
+ * traffic. We try to evenly distribute the packet and byte counters,
+ * so that the bytes-per-packet lengths don't look wonky across the
+ * records. */
+ while (byte_delta) {
+ int n_recs = (byte_delta + UINT32_MAX - 1) / UINT32_MAX;
+ uint32_t pkt_count = pkt_delta / n_recs;
+ uint32_t byte_count = byte_delta / n_recs;
+
+ gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
+
+ pkt_delta -= pkt_count;
+ byte_delta -= byte_count;
}
+ } else {
+ /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10
+ * == 175 * 2**32 bytes. The byte counter is bigger than that, so it's
+ * probably a bug--for example, the netdev code uses UINT64_MAX to
+ * report "unknown value", and perhaps that has leaked through to here.
+ *
+ * We wouldn't want to hit the loop above in this case, because it
+ * would try to send up to UINT32_MAX netflow records, which would take
+ * a long time.
+ */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+ VLOG_WARN_RL(&rl, "impossible byte counter %"PRIu64, byte_delta);
}
- nf->packet.size = 0;
+
+ /* Update flow tracking data. */
+ nf_flow->created = 0;
+ nf_flow->packet_count_off = expired->packet_count;
+ nf_flow->byte_count_off = expired->byte_count;
+ nf_flow->tcp_flags = 0;
}
-static void
-clear_collectors(struct netflow *nf)
+/* Returns true if it's time to send out a round of NetFlow active timeouts,
+ * false otherwise. */
+bool
+netflow_run(struct netflow *nf)
{
- size_t i;
+ if (nf->packet.size) {
+ collectors_send(nf->collectors, nf->packet.data, nf->packet.size);
+ nf->packet.size = 0;
+ }
+
+ if (nf->active_timeout && time_msec() >= nf->next_timeout) {
+ nf->next_timeout = time_msec() + 1000;
+ return true;
+ } else {
+ return false;
+ }
+}
- for (i = 0; i < nf->n_fds; i++) {
- close(nf->fds[i]);
+void
+netflow_wait(struct netflow *nf)
+{
+ if (nf->active_timeout) {
+ poll_timer_wait_until(nf->next_timeout);
+ }
+ if (nf->packet.size) {
+ poll_immediate_wake();
}
- free(nf->fds);
- nf->fds = NULL;
- nf->n_fds = 0;
}
int
-netflow_set_collectors(struct netflow *nf, const struct svec *collectors_)
+netflow_set_options(struct netflow *nf,
+ const struct netflow_options *nf_options)
{
- struct svec collectors;
int error = 0;
- size_t i;
-
- clear_collectors(nf);
-
- svec_clone(&collectors, collectors_);
- svec_sort_unique(&collectors);
-
- nf->fds = xmalloc(sizeof *nf->fds * collectors.n);
- for (i = 0; i < collectors.n; i++) {
- const char *name = collectors.names[i];
- char *tmpname = xstrdup(name);
- int fd = open_collector(tmpname);
- free(tmpname);
- if (fd >= 0) {
- nf->fds[nf->n_fds++] = fd;
- } else {
- VLOG_WARN("couldn't open connection to collector (%s), "
- "ignoring %s\n", strerror(-fd), name);
- if (!error) {
- error = -fd;
- }
- }
+ long long int old_timeout;
+
+ nf->engine_type = nf_options->engine_type;
+ nf->engine_id = nf_options->engine_id;
+ nf->add_id_to_iface = nf_options->add_id_to_iface;
+
+ collectors_destroy(nf->collectors);
+ collectors_create(&nf_options->collectors, 0, &nf->collectors);
+
+ old_timeout = nf->active_timeout;
+ if (nf_options->active_timeout >= 0) {
+ nf->active_timeout = nf_options->active_timeout;
+ } else {
+ nf->active_timeout = NF_ACTIVE_TIMEOUT_DEFAULT;
+ }
+ nf->active_timeout *= 1000;
+ if (old_timeout != nf->active_timeout) {
+ nf->reconfig_time = time_msec();
+ nf->next_timeout = time_msec();
}
- svec_destroy(&collectors);
return error;
}
-void
-netflow_set_engine(struct netflow *nf, uint8_t engine_type,
- uint8_t engine_id, bool add_id_to_iface)
-{
- nf->engine_type = engine_type;
- nf->engine_id = engine_id;
- nf->add_id_to_iface = add_id_to_iface;
-}
-
struct netflow *
netflow_create(void)
{
- struct netflow *nf = xmalloc(sizeof *nf);
+ struct netflow *nf = xzalloc(sizeof *nf);
nf->engine_type = 0;
nf->engine_id = 0;
nf->boot_time = time_msec();
- nf->fds = NULL;
- nf->n_fds = 0;
+ nf->collectors = NULL;
nf->add_id_to_iface = false;
nf->netflow_cnt = 0;
ofpbuf_init(&nf->packet, 1500);
{
if (nf) {
ofpbuf_uninit(&nf->packet);
- clear_collectors(nf);
+ collectors_destroy(nf->collectors);
free(nf);
}
}
+
+/* Initializes a new 'nf_flow' given that the caller has already cleared it to
+ * all-zero-bits. */
+void
+netflow_flow_init(struct netflow_flow *nf_flow OVS_UNUSED)
+{
+ /* Nothing to do. */
+}
+
+void
+netflow_flow_clear(struct netflow_flow *nf_flow)
+{
+ ofp_port_t output_iface = nf_flow->output_iface;
+
+ memset(nf_flow, 0, sizeof *nf_flow);
+ nf_flow->output_iface = output_iface;
+}
+
+void
+netflow_flow_update_time(struct netflow *nf, struct netflow_flow *nf_flow,
+ long long int used)
+{
+ if (!nf_flow->created) {
+ nf_flow->created = used;
+ }
+
+ if (!nf || !nf->active_timeout || !nf_flow->last_expired ||
+ nf->reconfig_time > nf_flow->last_expired) {
+ /* Keep the time updated to prevent a flood of expiration in
+ * the future. */
+ nf_flow->last_expired = time_msec();
+ }
+}
+
+void
+netflow_flow_update_flags(struct netflow_flow *nf_flow, uint8_t tcp_flags)
+{
+ nf_flow->tcp_flags |= tcp_flags;
+}
+
+bool
+netflow_active_timeout_expired(struct netflow *nf, struct netflow_flow *nf_flow)
+{
+ if (nf->active_timeout) {
+ return time_msec() > nf_flow->last_expired + nf->active_timeout;
+ }
+
+ return false;
+}