1 /* See the DRL-LICENSE file for this file's software license. */
4 * ulogd output target for DRL: GRD and FPS
6 * Ken Yocum <kyocum@cs.ucsd.edu>
8 * Original shell of this code from ulogd_NETFLOW
9 * Like that code, we keep track of per-slice data rates
10 * out of this slice. Thus we are rate limiting particular slices
11 * across multiple boxes, ensuring that their outbound rate does not
12 * exceed some fixed limit.
16 * Enforcer: linux drop percentage.
18 * This file reads packets from the netlink socket. It updates all
19 * the hashmaps which track how much data has arrived per flow.
20 * It starts two threads for this limiter.
21 * One thread handles periodic estimation.
22 * The other thread handles communication with other limiters.
27 * ulogd_DRL: attach to netlink socket, accept packets. replaces ratelimit.cc
28 * util.c: generic hashing functions, flow comparisons, sundry items.
29 * gossip.c: Recv gossip, send gossip.
30 * peer_comm.c: Thread to listen for updates from other limiters.
31 * estimate.c: Thread to calculate the local limits.
34 * Ken Yocum <kyocum@cs.ucsd.edu>
37 * Some code appropriated from ulogd_NETFLOW:
39 * Mark Huang <mlhuang@cs.princeton.edu>
40 * Copyright (C) 2004-2005 The Trustees of Princeton University
42 * Based on admindump.pl by Mic Bowman and Paul Brett
43 * Copyright (c) 2002 Intel Corporation
47 /* Enable GNU glibc extensions */
53 /* va_start() and friends */
59 /* strstr() and friends */
62 /* dirname() and basename() */
65 /* fork() and wait() */
66 #include <sys/types.h>
70 /* errno and assert() */
77 /* time() and friends */
82 #include <sys/socket.h>
83 #include <netinet/in.h>
84 #include <arpa/inet.h>
86 /* ICMP definitions */
87 #include <netinet/ip.h>
88 #include <netinet/ip_icmp.h>
93 /* pthread_create() */
99 /* Signal definitions - so that we can catch SIGHUP and update config. */
102 #include <ulogd/ulogd.h>
103 #include <ulogd/conffile.h>
105 /* Perhaps useful for files within vservers? */
106 #if !defined(STANDALONE) && HAVE_LIBPROPER
107 #include <proper/prop.h>
111 * Jenkins hash support
112 * lives in raterouter.h
116 #include "raterouter.h"
118 #include "ratetypes.h" /* needs util and pthread.h */
119 #include "calendar.h"
123 * /etc/ulogd.conf configuration options
124 * Add the config options for DRL.
127 static config_entry_t bind_addr = {
130 .type = CONFIG_TYPE_STRING,
131 .options = CONFIG_OPT_NONE,
132 .u = { .string = "AUTO" },
135 static config_entry_t create_htb = {
138 .type = CONFIG_TYPE_INT,
139 .options = CONFIG_OPT_NONE,
143 static config_entry_t enforce_on = {
146 .type = CONFIG_TYPE_INT,
147 .options = CONFIG_OPT_NONE,
151 static config_entry_t partition = {
153 .key = "partition_set",
154 .type = CONFIG_TYPE_INT,
155 .options = CONFIG_OPT_NONE,
156 .u = { .value = 0xfffffff },
159 static config_entry_t sfq_slice = {
162 .type = CONFIG_TYPE_STRING,
163 .options = CONFIG_OPT_NONE,
164 .u = { .string = "NONE" },
167 static config_entry_t netem_slice = {
169 .key = "netem_slice",
170 .type = CONFIG_TYPE_STRING,
171 .options = CONFIG_OPT_NONE,
172 .u = { .string = "ALL" },
175 static config_entry_t netem_loss = {
176 .next = &netem_slice,
178 .type = CONFIG_TYPE_INT,
179 .options = CONFIG_OPT_NONE,
183 static config_entry_t netem_delay = {
185 .key = "netem_delay",
186 .type = CONFIG_TYPE_INT,
187 .options = CONFIG_OPT_NONE,
191 static config_entry_t drl_configfile = {
192 .next = &netem_delay,
193 .key = "drl_configfile",
194 .type = CONFIG_TYPE_STRING,
195 .options = CONFIG_OPT_MANDATORY,
196 .u = { .string = "drl.xml" },
199 /** The administrative bandwidth limit (mbps) for the local node. The node
200 * will not set a limit higher than this, even when distributed capacity is
201 * available. Set to 0 for no limit. */
202 static config_entry_t nodelimit = {
203 .next = &drl_configfile,
205 .type = CONFIG_TYPE_INT,
206 .options = CONFIG_OPT_MANDATORY,
210 /** Determines the verbosity of logging. */
211 static config_entry_t drl_loglevel = {
213 .key = "drl_loglevel",
214 .type = CONFIG_TYPE_INT,
215 .options = CONFIG_OPT_MANDATORY,
216 .u = { .value = LOG_WARN },
219 /** The path of the logfile. */
220 static config_entry_t drl_logfile = {
221 .next = &drl_loglevel,
222 .key = "drl_logfile",
223 .type = CONFIG_TYPE_STRING,
224 .options = CONFIG_OPT_MANDATORY,
225 .u = { .string = "drl_logfile.log" },
228 /** The choice of DRL protocol. */
229 static config_entry_t policy = {
230 .next = &drl_logfile,
232 .type = CONFIG_TYPE_STRING,
233 .options = CONFIG_OPT_MANDATORY,
234 .u = { .string = "GRD" },
237 /** The estimate interval, in milliseconds. */
238 static config_entry_t estintms = {
241 .type = CONFIG_TYPE_INT,
242 .options = CONFIG_OPT_MANDATORY,
243 .u = { .value = 100 },
246 #define config_entries (&estintms)
249 * Debug functionality
256 #define NIPQUAD(addr) \
257 ((unsigned char *)&addr)[0], \
258 ((unsigned char *)&addr)[1], \
259 ((unsigned char *)&addr)[2], \
260 ((unsigned char *)&addr)[3]
262 #define IPQUAD(addr) \
263 ((unsigned char *)&addr)[3], \
264 ((unsigned char *)&addr)[2], \
265 ((unsigned char *)&addr)[1], \
266 ((unsigned char *)&addr)[0]
270 /* Salt for the hash functions */
274 * Hash slice name lookups on context ID.
277 /* Special context IDs */
278 #define UNKNOWN_XID -1
282 CONNECTION_REFUSED_XID = 65536, /* MAX_S_CONTEXT + 1 */
289 pthread_t estimate_thread;
290 pthread_t signal_thread;
291 pthread_t comm_thread;
292 uint32_t local_ip = 0;
294 extern FILE *logfile;
295 extern uint8_t system_loglevel;
296 extern uint8_t do_enforcement;
298 /* Used to simulate partitions. */
299 int do_partition = 0;
300 int partition_set = 0xfffffff;
304 static inline uint32_t
305 hash_flow(uint8_t protocol, uint32_t src_ip, uint16_t src_port, uint32_t dst_ip, uint16_t dst_port, uint32_t hash_max)
307 unsigned char mybytes[FLOWKEYSIZE];
308 mybytes[0] = protocol;
309 *(uint32_t*)(&(mybytes[1])) = src_ip;
310 *(uint32_t*)(&(mybytes[5])) = dst_ip;
311 *(uint32_t*)(&(mybytes[9])) = (src_port << 16) | dst_port;
312 return jhash(mybytes,FLOWKEYSIZE,salt) & (hash_max - 1);
315 uint32_t sampled_hasher(const key_flow *key) {
316 /* Last arg is UINT_MAX because sampled flow keeps track of its own capacity. */
317 return hash_flow(key->protocol, key->source_ip, key->source_port, key->dest_ip, key->dest_port, UINT_MAX);
320 uint32_t standard_hasher(const key_flow *key) {
321 return hash_flow(key->protocol, key->source_ip, key->source_port, key->dest_ip, key->dest_port, STD_FLOW_HASH_SIZE);
324 uint32_t multiple_hasher(const key_flow *key) {
325 return hash_flow(key->protocol, key->source_ip, key->source_port, key->dest_ip, key->dest_port, MUL_FLOW_HASH_SIZE);
333 /* Interesting keys */
357 #define INTR_IDS (sizeof(intr_ids)/sizeof(intr_ids[0]))
358 static struct intr_id intr_ids[] = {
359 [OOB_TIME_SEC] = { "oob.time.sec", 0 },
360 [OOB_MARK] = { "oob.mark", 0 },
361 [IP_SADDR] = { "ip.saddr", 0 },
362 [IP_DADDR] = { "ip.daddr", 0 },
363 [IP_TOTLEN] = { "ip.totlen", 0 },
364 [IP_PROTOCOL] = { "ip.protocol", 0 },
365 [TCP_SPORT] = { "tcp.sport", 0 },
366 [TCP_DPORT] { "tcp.dport", 0 },
367 [TCP_ACK] = { "tcp.ack", 0 },
368 [TCP_FIN] = { "tcp.fin", 0 },
369 [TCP_SYN] = { "tcp.syn", 0 },
370 [TCP_RST] = { "tcp.rst", 0 },
371 [UDP_SPORT] = { "udp.sport", 0 },
372 [UDP_DPORT] = { "udp.dport", 0 },
373 [ICMP_TYPE] = { "icmp.type", 0 },
374 [ICMP_CODE] = { "icmp.code", 0 },
375 [GRE_FLAG_KEY] = { "gre.flag.key", 0 },
376 [GRE_VERSION] = { "gre.version", 0 },
377 [GRE_KEY] = { "gre.key", 0 },
378 [PPTP_CALLID] = { "pptp.callid", 0 },
381 #define GET_VALUE(x) intr_ids[x].res->value
383 #define DATE(t) ((t) / (24*60*60) * (24*60*60))
385 static int _output_drl(ulog_iret_t *res)
388 uint32_t src_ip, dst_ip;
389 uint16_t src_port, dst_port;
396 protocol = GET_VALUE(IP_PROTOCOL).ui8;
397 src_ip = GET_VALUE(IP_SADDR).ui32;
398 dst_ip = GET_VALUE(IP_DADDR).ui32;
399 xid = GET_VALUE(OOB_MARK).ui32;
404 src_port = GET_VALUE(TCP_SPORT).ui16;
405 dst_port = GET_VALUE(TCP_DPORT).ui16;
409 /* netflow had an issue with many udp flows and set
410 * src_port=0 to handle it. We don't.
412 src_port = GET_VALUE(UDP_SPORT).ui16;
415 * traceroutes create a large number of flows in the db
416 * this is a quick hack to catch the most common form
417 * of traceroute (basically we're mapping any UDP packet
418 * in the 33435-33524 range to the "trace" port, 33524 is
419 * 3 packets * nhops (30).
421 dst_port = GET_VALUE(UDP_DPORT).ui16;
422 if (dst_port >= 33435 && dst_port <= 33524)
427 src_port = GET_VALUE(ICMP_TYPE).ui8;
428 dst_port = GET_VALUE(ICMP_CODE).ui8;
431 * We special case some of the ICMP traffic that the kernel
432 * always generates. Since this is attributed to root, it
433 * creates significant "noise" in the output. We want to be
434 * able to quickly see that root is generating traffic.
436 if (xid == ROOT_XID) {
437 if (src_port == ICMP_ECHOREPLY)
438 xid = ICMP_ECHOREPLY_XID;
439 else if (src_port == ICMP_UNREACH)
440 xid = ICMP_UNREACH_XID;
445 if (GET_VALUE(GRE_FLAG_KEY).b) {
446 if (GET_VALUE(GRE_VERSION).ui8 == 1) {
447 /* Get PPTP call ID */
448 src_port = GET_VALUE(PPTP_CALLID).ui16;
450 /* XXX Truncate GRE keys to 16 bits */
451 src_port = (uint16_t) GET_VALUE(GRE_KEY).ui32;
454 /* No key available */
461 /* This is the default key for packets from unsupported protocols */
467 key.protocol = protocol;
468 key.source_ip = src_ip;
469 key.dest_ip = dst_ip;
470 key.source_port = src_port;
471 key.dest_port = dst_port;
472 key.packet_size = GET_VALUE(IP_TOTLEN).ui16;
473 key.packet_time = (time_t) GET_VALUE(OOB_TIME_SEC).ui32;
475 pthread_rwlock_rdlock(&limiter.limiter_lock); /* CLUNK! */
477 leaf = (leaf_t *) map_search(limiter.stable_instance.leaf_map, &xid, sizeof(xid));
479 /* Even if the packet doesn't match any specific xid, it should still
480 * count in the machine-type tables. This catches root (xid == 0) and
481 * unclassified (xid = fff) packets, which don't have map entries. */
483 ident = limiter.stable_instance.last_machine;
485 ident = leaf->parent;
489 pthread_mutex_lock(&ident->table_mutex);
491 /* Update the identity's table. */
492 ident->table_sample_function(ident->table, &key);
494 #ifdef SHADOW_ACCTING
496 /* Update the shadow perfect copy of the accounting table. */
497 standard_table_sample((standard_flow_table) ident->shadow_table, &key);
501 pthread_mutex_unlock(&ident->table_mutex);
503 ident = ident->parent;
506 pthread_rwlock_unlock(&limiter.limiter_lock); /* CLINK! */
511 /* get all key id's for the keys we are intrested in */
512 static int get_ids(void)
515 struct intr_id *cur_id;
517 for (i = 0; i < INTR_IDS; i++) {
518 cur_id = &intr_ids[i];
519 cur_id->res = keyh_getres(keyh_getid(cur_id->name));
521 ulogd_log(ULOGD_ERROR,
522 "Cannot resolve keyhash id for %s\n",
530 static void free_identity(identity_t *ident) {
532 free_comm(&ident->comm);
535 ident->table_destroy_function(ident->table);
538 if (ident->loop_action) {
539 ident->loop_action->valid = 0;
542 if (ident->comm_action) {
543 ident->comm_action->valid = 0;
546 pthread_mutex_destroy(&ident->table_mutex);
552 static void free_identity_map(map_handle map) {
553 identity_t *tofree = NULL;
555 map_reset_iterate(map);
556 while ((tofree = (identity_t *) map_next(map))) {
557 free_identity(tofree);
563 static void free_instance(drl_instance_t *instance) {
564 if (instance->leaves)
565 free(instance->leaves);
566 if (instance->leaf_map)
567 free_map(instance->leaf_map, 0);
568 if (instance->ident_map)
569 free_identity_map(instance->ident_map);
570 if (instance->machines)
571 free(instance->machines);
573 free(instance->sets);
575 /* FIXME: Drain the calendar first and free all the entries. */
580 memset(instance, 0, sizeof(drl_instance_t));
583 static void free_failed_config(parsed_configs configs, drl_instance_t *instance) {
585 if (configs.machines)
586 free_ident_list(configs.machines);
588 free_ident_list(configs.sets);
592 free_instance(instance);
595 static identity_t *new_identity(ident_config *config) {
596 identity_t *ident = malloc(sizeof(identity_t));
597 remote_node_t *comm_nodes = malloc(sizeof(remote_node_t)*config->peer_count);
598 ident_peer *peer = config->peers;
605 if (comm_nodes == NULL) {
610 memset(ident, 0, sizeof(identity_t));
611 memset(comm_nodes, 0, config->peer_count * sizeof(remote_node_t));
613 ident->id = config->id;
614 ident->limit = (uint32_t) (((double) config->limit * 1000.0) / 8.0);
615 ident->fixed_ewma_weight = config->fixed_ewma_weight;
616 ident->communication_intervals = config->communication_intervals;
617 ident->mainloop_intervals = config->mainloop_intervals;
618 ident->ewma_weight = pow(ident->fixed_ewma_weight,
619 (limiter.estintms/1000.0) * config->mainloop_intervals);
620 ident->parent = NULL;
621 ident->independent = config->independent;
623 pthread_mutex_init(&ident->table_mutex, NULL);
624 switch (config->accounting) {
627 standard_table_create(standard_hasher, &ident->common);
629 /* Ugly function pointer casting. Makes things sufficiently
630 * generic, though. */
631 ident->table_sample_function =
632 (int (*)(void *, const key_flow *)) standard_table_sample;
633 ident->table_cleanup_function =
634 (int (*)(void *)) standard_table_cleanup;
635 ident->table_update_function =
636 (void (*)(void *, struct timeval, double)) standard_table_update_flows;
637 ident->table_destroy_function =
638 (void (*)(void *)) standard_table_destroy;
643 multiple_table_create(multiple_hasher, MUL_INTERVAL_COUNT, &ident->common);
645 ident->table_sample_function =
646 (int (*)(void *, const key_flow *)) multiple_table_sample;
647 ident->table_cleanup_function =
648 (int (*)(void *)) multiple_table_cleanup;
649 ident->table_update_function =
650 (void (*)(void *, struct timeval, double)) multiple_table_update_flows;
651 ident->table_destroy_function =
652 (void (*)(void *)) multiple_table_destroy;
656 ident->table = sampled_table_create(sampled_hasher,
657 ident->limit * IDENT_CLEAN_INTERVAL,
658 SAMPLEHOLD_PERCENTAGE, SAMPLEHOLD_OVERFACTOR, &ident->common);
660 ident->table_sample_function =
661 (int (*)(void *, const key_flow *)) sampled_table_sample;
662 ident->table_cleanup_function =
663 (int (*)(void *)) sampled_table_cleanup;
664 ident->table_update_function =
665 (void (*)(void *, struct timeval, double)) sampled_table_update_flows;
666 ident->table_destroy_function =
667 (void (*)(void *)) sampled_table_destroy;
671 ident->table = simple_table_create(&ident->common);
673 ident->table_sample_function =
674 (int (*)(void *, const key_flow *)) simple_table_sample;
675 ident->table_cleanup_function =
676 (int (*)(void *)) simple_table_cleanup;
677 ident->table_update_function =
678 (void (*)(void *, struct timeval, double)) simple_table_update_flows;
679 ident->table_destroy_function =
680 (void (*)(void *)) simple_table_destroy;
684 #ifdef SHADOW_ACCTING
686 ident->shadow_table = standard_table_create(standard_hasher, &ident->shadow_common);
688 if (ident->shadow_table == NULL) {
689 ident->table_destroy_function(ident->table);
696 /* Make sure the table was allocated. */
697 if (ident->table == NULL) {
703 comm_nodes[peer_slot].addr = peer->ip;
704 comm_nodes[peer_slot].port = htons(LIMITER_LISTEN_PORT);
709 if (new_comm(&ident->comm, config, comm_nodes)) {
710 printlog(LOG_CRITICAL, "Failed to create communication structure.\n");
714 ident->comm.remote_nodes = comm_nodes;
716 if (!create_htb.u.value) {
717 ident->htb_node = config->htb_node;
718 ident->htb_parent = config->htb_parent;
724 static int validate_htb_exists(int node, int parent) {
725 FILE *pipe = popen("/sbin/tc class show dev eth0", "r");
728 while (fgets(line, 200, pipe) != NULL) {
732 sscanf(line, "class htb 1:%x parent 1:%x prio %s", &n, &p, ignore);
733 if (n == node && p == parent) {
743 /* Determines the validity of the parameters of one ident_config.
748 static int validate_config(ident_config *config) {
749 /* Limit must be a positive integer. */
750 if (config->limit < 1) {
754 /* Commfabric must be a valid choice (COMM_MESH or COMM_GOSSIP). */
755 if (config->commfabric != COMM_MESH &&
756 config->commfabric != COMM_GOSSIP) {
760 /* If commfabric is COMM_GOSSIP, this must be a positive integer. */
761 if (config->commfabric == COMM_GOSSIP && config->branch < 1) {
765 /* Accounting must be a valid choice (ACT_STANDARD, ACT_SAMPLEHOLD,
766 * ACT_SIMPLE, ACT_MULTIPLE). */
767 if (config->accounting != ACT_STANDARD &&
768 config->accounting != ACT_SAMPLEHOLD &&
769 config->accounting != ACT_SIMPLE &&
770 config->accounting != ACT_MULTIPLE) {
774 /* Ewma weight must be greater than or equal to zero. */
775 if (config->fixed_ewma_weight < 0) {
779 if (!create_htb.u.value) {
780 if (config->htb_node < 0 || config->htb_parent < 0) {
781 printlog(LOG_CRITICAL, "When create_htb is disabled in ulogd.conf, an identity must specify the htb_node and htb_parent propertities in its configuration.\n");
785 if (validate_htb_exists(config->htb_node, config->htb_parent)) {
786 printlog(LOG_CRITICAL, "Identity specified htb node %x with parent %x. No such node/parent combo seems to exist!\n", config->htb_node, config->htb_parent);
790 if (config->htb_node > -1 || config->htb_parent > -1) {
791 printlog(LOG_WARN, "htb_node or htb_parent are configured but ignored because we're configured to create our own htb hierarchy.\n");
795 /* Note: Parsing stage requires that each ident has at least one peer. */
802 static int validate_configs(parsed_configs configs, drl_instance_t *instance) {
804 ident_config *mlist = configs.machines;
805 ident_config *slist = configs.sets;
806 ident_config *tmp = NULL;
810 /* ID must be non-zero and unique. */
811 /* This is ugly and hackish, but this function will be called rarely.
812 * I'm tired of trying to be clever. */
814 printlog(LOG_CRITICAL, "Negative ident id: %d (%x) ?\n", mlist->id, mlist->id);
819 if (mlist->id == tmp->id) {
820 printlog(LOG_CRITICAL, "Duplicate ident id: %d (%x)\n", mlist->id, mlist->id);
827 if (mlist->id == tmp->id) {
828 printlog(LOG_CRITICAL, "Duplicate ident id: %d (%x)\n", mlist->id, mlist->id);
834 if (validate_config(mlist)) {
835 printlog(LOG_CRITICAL, "Invalid ident parameters for id: %d (%x)\n", mlist->id, mlist->id);
839 if (mlist->independent) {
840 printlog(LOG_CRITICAL, "Makes no sense to have independent machine node - setting independent to false.\n");
841 mlist->independent = 0;
847 instance->sets = malloc(configs.set_count * sizeof(identity_t *));
848 if (instance->sets == NULL) {
849 printlog(LOG_CRITICAL, "Not enough memory to allocate set identity collection.\n");
853 memset(instance->sets, 0, configs.set_count * sizeof(identity_t *));
854 instance->set_count = configs.set_count;
856 /* For sets, make sure that the hierarchy is valid. */
858 ident_member *members = slist->members;
860 /* ID must be non-zero and unique. */
862 printlog(LOG_CRITICAL, "Negative ident id: %d (%x) ?\n", slist->id, slist->id);
867 if (slist->id == tmp->id) {
868 printlog(LOG_CRITICAL, "Duplicate ident id: %d (%x)\n", slist->id, slist->id);
874 if (validate_config(slist)) {
875 printlog(LOG_CRITICAL, "Invalid ident parameters for id: %d (%x)\n", slist->id, slist->id);
879 /* Allocate an identity_t for this set-type identity. */
880 instance->sets[i] = new_identity(slist);
882 if (instance->sets[i] == NULL) {
883 printlog(LOG_CRITICAL, "Not enough memory to allocate set identity.\n");
887 /* Loop through children and look up each in leaf or ident map
888 * depending on the type of child. Set the child's parent pointer
889 * to the identity we just created above, unless it is already set,
890 * in which case we have an error. */
892 identity_t *child_ident = NULL;
893 leaf_t *child_leaf = NULL;
895 switch (members->type) {
897 child_leaf = map_search(instance->leaf_map, &members->value,
898 sizeof(members->value));
899 if (child_leaf == NULL) {
900 printlog(LOG_CRITICAL, "xid: child leaf not found.\n");
903 if (child_leaf->parent != NULL) {
904 /* Error - This leaf already has a parent. */
905 printlog(LOG_CRITICAL, "xid: child already has a parent.\n");
908 child_leaf->parent = instance->sets[i];
911 child_ident = map_search(instance->ident_map, &members->value,
912 sizeof(members->value));
913 if (child_ident == NULL) {
914 printlog(LOG_CRITICAL, "guid: child identity not found.\n");
917 if (child_ident->parent != NULL) {
918 /* Error - This identity already has a parent. */
919 printlog(LOG_CRITICAL, "guid: child identity already has a parent.\n");
922 child_ident->parent = instance->sets[i];
925 /* Error - shouldn't be possible. */
928 members = members->next;
931 map_insert(instance->ident_map, &instance->sets[i]->id,
932 sizeof(instance->sets[i]->id), instance->sets[i]);
940 static int fill_set_leaf_pointer(drl_instance_t *instance, identity_t *ident) {
942 identity_t *current_ident;
943 leaf_t *current_leaf;
944 leaf_t **leaves = malloc(instance->leaf_count * sizeof(leaf_t *));
945 if (leaves == NULL) {
949 map_reset_iterate(instance->leaf_map);
950 while ((current_leaf = (leaf_t *) map_next(instance->leaf_map))) {
951 current_ident = current_leaf->parent;
952 while (current_ident != NULL && current_ident != instance->last_machine) {
953 if (current_ident == ident) {
954 /* Found the ident we were looking for - add the leaf. */
955 leaves[count] = current_leaf;
959 current_ident = current_ident->parent;
963 ident->leaves = leaves;
964 ident->leaf_count = count;
969 static int init_identities(parsed_configs configs, drl_instance_t *instance) {
971 ident_config *config = configs.machines;
974 instance->cal = malloc(sizeof(struct ident_calendar) * SCHEDLEN);
976 if (instance->cal == NULL) {
980 for (i = 0; i < SCHEDLEN; ++i) {
981 TAILQ_INIT(instance->cal + i);
983 instance->cal_slot = 0;
985 instance->machines = malloc(configs.machine_count * sizeof(drl_instance_t *));
987 if (instance->machines == NULL) {
991 memset(instance->machines, 0, configs.machine_count * sizeof(drl_instance_t *));
992 instance->machine_count = configs.machine_count;
994 /* Allocate and add the machine identities. */
995 for (i = 0; i < configs.machine_count; ++i) {
996 identity_action *loop_action;
997 identity_action *comm_action;
998 instance->machines[i] = new_identity(config);
1000 if (instance->machines[i] == NULL) {
1004 loop_action = malloc(sizeof(identity_action));
1005 comm_action = malloc(sizeof(identity_action));
1007 if (loop_action == NULL || comm_action == NULL) {
1011 /* The first has no parent - it is the root. All others have the
1012 * previous ident as their parent. */
1014 instance->machines[i]->parent = NULL;
1016 instance->machines[i]->parent = instance->machines[i - 1];
1019 instance->last_machine = instance->machines[i];
1021 /* Add the ident to the guid->ident map. */
1022 map_insert(instance->ident_map, &instance->machines[i]->id,
1023 sizeof(instance->machines[i]->id), instance->machines[i]);
1025 config = config->next;
1027 memset(loop_action, 0, sizeof(identity_action));
1028 memset(comm_action, 0, sizeof(identity_action));
1029 loop_action->ident = instance->machines[i];
1030 loop_action->action = ACTION_MAINLOOP;
1031 loop_action->valid = 1;
1032 comm_action->ident = instance->machines[i];
1033 comm_action->action = ACTION_COMMUNICATE;
1034 comm_action->valid = 1;
1036 instance->machines[i]->loop_action = loop_action;
1037 instance->machines[i]->comm_action = comm_action;
1039 TAILQ_INSERT_TAIL(instance->cal + (instance->cal_slot & SCHEDMASK),
1040 loop_action, calendar);
1042 TAILQ_INSERT_TAIL(instance->cal + (instance->cal_slot & SCHEDMASK),
1043 comm_action, calendar);
1045 /* Setup the array of pointers to leaves. This is easy for machines
1046 * because a machine node applies to every leaf. */
1047 instance->machines[i]->leaves =
1048 malloc(instance->leaf_count * sizeof(leaf_t *));
1049 if (instance->machines[i]->leaves == NULL) {
1052 instance->machines[i]->leaf_count = instance->leaf_count;
1053 for (j = 0; j < instance->leaf_count; ++j) {
1054 instance->machines[i]->leaves[j] = &instance->leaves[j];
1058 /* Connect the set subtree to the machines. Any set or leaf without a
1059 * parent will take the last machine as its parent. */
1062 map_reset_iterate(instance->leaf_map);
1063 while ((leaf = (leaf_t *) map_next(instance->leaf_map))) {
1064 if (leaf->parent == NULL) {
1065 leaf->parent = instance->last_machine;
1070 for (i = 0; i < instance->set_count; ++i) {
1071 identity_action *loop_action;
1072 identity_action *comm_action;
1074 if (instance->sets[i]->parent == NULL && instance->sets[i]->independent == 0) {
1075 instance->sets[i]->parent = instance->last_machine;
1078 loop_action = malloc(sizeof(identity_action));
1079 comm_action = malloc(sizeof(identity_action));
1081 if (loop_action == NULL || comm_action == NULL) {
1085 memset(loop_action, 0, sizeof(identity_action));
1086 memset(comm_action, 0, sizeof(identity_action));
1087 loop_action->ident = instance->sets[i];
1088 loop_action->action = ACTION_MAINLOOP;
1089 loop_action->valid = 1;
1090 comm_action->ident = instance->sets[i];
1091 comm_action->action = ACTION_COMMUNICATE;
1092 comm_action->valid = 1;
1094 instance->sets[i]->loop_action = loop_action;
1095 instance->sets[i]->comm_action = comm_action;
1097 TAILQ_INSERT_TAIL(instance->cal + (instance->cal_slot & SCHEDMASK),
1098 loop_action, calendar);
1100 TAILQ_INSERT_TAIL(instance->cal + (instance->cal_slot & SCHEDMASK),
1101 comm_action, calendar);
1103 /* Setup the array of pointers to leaves. This is harder for sets,
1104 * but this doesn't need to be super-efficient because it happens
1105 * rarely and it isn't on the critical path for reconfig(). */
1106 if (fill_set_leaf_pointer(instance, instance->sets[i])) {
1115 static void print_instance(drl_instance_t *instance) {
1116 leaf_t *leaf = NULL;
1117 identity_t *ident = NULL;
1119 if (system_loglevel == LOG_DEBUG) {
1120 map_reset_iterate(instance->leaf_map);
1121 while ((leaf = (leaf_t *) map_next(instance->leaf_map))) {
1122 printf("%x:", leaf->xid);
1123 ident = leaf->parent;
1125 printf("%d:",ident->id);
1126 ident = ident->parent;
1128 printf("Leaf's parent pointer is %p\n", leaf->parent);
1131 printf("instance->last_machine is %p\n", instance->last_machine);
1135 static int assign_htb_hierarchy(drl_instance_t *instance) {
1137 int next_node = 0x100;
1139 /* If we're not going to create our own htb hierarchy (for instance,
1140 * if we're going to let PL's node manager do it for us), then we don't
1141 * want this function to do anything. */
1142 if (!create_htb.u.value) {
1143 printlog(LOG_DEBUG, "Skipping assign_htb_hierarchy becase ulogd.conf's create_htb set to 0.\n");
1147 /* Chain machine nodes under 1:10. */
1148 for (i = 0; i < instance->machine_count; ++i) {
1149 if (instance->machines[i]->parent == NULL) {
1151 instance->machines[i]->htb_parent = 0x10;
1154 instance->machines[i]->htb_parent =
1155 instance->machines[i]->parent->htb_node;
1158 instance->machines[i]->htb_node = next_node;
1164 /* Add set nodes under machine nodes. Iterate backwards to ensure parent is
1166 for (j = (instance->set_count - 1); j >= 0; --j) {
1167 if (instance->sets[j]->parent == NULL) {
1168 /* Independent node - goes under 0x10 away from machine nodes. */
1169 instance->sets[j]->htb_parent = 0x10;
1171 instance->sets[j]->htb_parent = instance->sets[j]->parent->htb_node;
1173 instance->sets[j]->htb_node = next_node;
1181 /* Added this so that I could comment one line and kill off all of the
1182 * command execution. */
1183 static inline int execute_cmd(const char *cmd) {
1187 static inline int add_htb_node(const char *iface, const uint32_t parent_major, const uint32_t parent_minor,
1188 const uint32_t classid_major, const uint32_t classid_minor,
1189 const uint64_t rate, const uint64_t ceil) {
1192 sprintf(cmd, "tc class add dev %s parent %x:%x classid %x:%x htb rate %llubit ceil %llubit",
1193 iface, parent_major, parent_minor, classid_major, classid_minor, rate, ceil);
1194 printlog(LOG_WARN, "INIT: HTB_cmd: %s\n", cmd);
1196 return execute_cmd(cmd);
1199 static inline int add_htb_netem(const char *iface, const uint32_t parent_major,
1200 const uint32_t parent_minor, const uint32_t handle,
1201 const int loss, const int delay) {
1204 sprintf(cmd, "/sbin/tc qdisc del dev %s parent %x:%x handle %x pfifo", iface, parent_major,
1205 parent_minor, handle);
1206 printlog(LOG_DEBUG, "HTB_cmd: %s\n", cmd);
1207 if (execute_cmd(cmd))
1208 printlog(LOG_DEBUG, "HTB_cmd: Previous deletion did not succeed.\n");
1210 sprintf(cmd, "/sbin/tc qdisc replace dev %s parent %x:%x handle %x netem loss %d%% delay %dms",
1211 iface, parent_major, parent_minor, handle, loss, delay);
1212 printlog(LOG_DEBUG, "HTB_cmd: %s\n", cmd);
1213 return execute_cmd(cmd);
1216 static inline int add_htb_sfq(const char *iface, const uint32_t parent_major,
1217 const uint32_t parent_minor, const uint32_t handle,
1218 const int perturb) {
1221 sprintf(cmd, "/sbin/tc qdisc del dev %s parent %x:%x handle %x pfifo", iface, parent_major,
1222 parent_minor, handle);
1223 printlog(LOG_WARN, "HTB_cmd: %s\n", cmd);
1224 if (execute_cmd(cmd))
1225 printlog(LOG_WARN, "HTB_cmd: Previous deletion did not succeed.\n");
1227 sprintf(cmd, "/sbin/tc qdisc replace dev %s parent %x:%x handle %x sfq perturb %d",
1228 iface, parent_major, parent_minor, handle, perturb);
1229 printlog(LOG_WARN, "HTB_cmd: %s\n", cmd);
1230 return execute_cmd(cmd);
1233 static int create_htb_hierarchy(drl_instance_t *instance) {
1236 uint64_t gigabit = 1024 * 1024 * 1024;
1238 /* If we're not going to create our own htb hierarchy (for instance,
1239 * if we're going to let PL's node manager do it for us), then we don't
1240 * want this function to do anything. */
1241 if (!create_htb.u.value) {
1242 printlog(LOG_DEBUG, "Skipping create_htb_hierarchy becase ulogd.conf's create_htb set to 0.\n");
1246 /* Nuke the hierarchy. */
1247 sprintf(cmd, "tc qdisc del dev eth0 root handle 1: htb");
1249 printlog(LOG_DEBUG, "HTB_cmd: %s\n", cmd);
1251 /* Re-initialize the basics. */
1252 sprintf(cmd, "tc qdisc add dev eth0 root handle 1: htb default 1fff");
1253 if (execute_cmd(cmd)) {
1256 printlog(LOG_DEBUG, "HTB_cmd: %s\n", cmd);
1258 if (add_htb_node("eth0", 1, 0, 1, 1, gigabit, gigabit))
1261 /* Add back 1:10. (Nodelimit : kilobits/sec -> bits/second)*/
1262 if (limiter.nodelimit) {
1263 if (add_htb_node("eth0", 1, 1, 1, 0x10, 8, (uint64_t) limiter.nodelimit * 1024))
1266 if (add_htb_node("eth0", 1, 1, 1, 0x10, 8, gigabit))
1271 for (i = 0; i < instance->machine_count; ++i) {
1272 if (add_htb_node("eth0", 1, instance->machines[i]->htb_parent, 1,
1273 instance->machines[i]->htb_node, 8, instance->machines[i]->limit * 1024)) {
1280 /* Add back 1:20. */
1282 if (instance->last_machine == NULL) {
1283 sprintf(cmd, "/sbin/tc class add dev eth0 parent 1:1 classid 1:20 htb rate 8bit ceil 1000mbit");
1285 sprintf(cmd, "/sbin/tc class add dev eth0 parent 1:%x classid 1:20 htb rate 8bit ceil 1000mbit",
1286 instance->last_machine->htb_node);
1289 sprintf(cmd, "/sbin/tc class add dev eth0 parent 1:1 classid 1:20 htb rate 8bit ceil 1000mbit");
1292 if (execute_cmd(cmd)) {
1295 printlog(LOG_DEBUG, "HTB_cmd: %s\n", cmd);
1298 for (j = (instance->set_count - 1); j >= 0; --j) {
1299 if (add_htb_node("eth0", 1, instance->sets[j]->htb_parent, 1,
1300 instance->sets[j]->htb_node, 8, instance->sets[j]->limit * 1024)) {
1305 /* Add leaves. FIXME: Set static sliver limit as ceil here! */
1306 for (k = 0; k < instance->leaf_count; ++k) {
1307 if (instance->leaves[k].parent == NULL) {
1308 if (add_htb_node("eth0", 1, 0x10, 1, (0x1000 | instance->leaves[k].xid), 8, gigabit))
1311 if (add_htb_node("eth0", 1, instance->leaves[k].parent->htb_node, 1, (0x1000 | instance->leaves[k].xid), 8, gigabit))
1315 /* Add exempt node for the leaf under 1:20 as 1:2<xid> */
1316 if (add_htb_node("eth0", 1, 0x20, 1, (0x2000 | instance->leaves[k].xid), 8, gigabit))
1320 /* Add 1:1000 and 1:2000 */
1321 if (instance->last_machine == NULL) {
1322 if (add_htb_node("eth0", 1, 0x10, 1, 0x1000, 8, gigabit))
1325 if (add_htb_node("eth0", 1, instance->last_machine->htb_node, 1, 0x1000, 8, gigabit))
1329 if (add_htb_node("eth0", 1, 0x20, 1, 0x2000, 8, gigabit))
1332 /* Add 1:1fff and 1:2fff */
1333 if (instance->last_machine == NULL) {
1334 if (add_htb_node("eth0", 1, 0x10, 1, 0x1fff, 8, gigabit))
1337 if (add_htb_node("eth0", 1, instance->last_machine->htb_node, 1, 0x1fff, 8, gigabit))
1341 if (add_htb_node("eth0", 1, 0x20, 1, 0x2fff, 8, gigabit))
1344 /* Artifical delay or loss for experimentation. */
1345 if (netem_delay.u.value || netem_loss.u.value) {
1346 if (!strcmp(netem_slice.u.string, "ALL")) {
1347 /* By default, netem applies to all leaves. */
1348 if (add_htb_netem("eth0", 1, 0x1000, 0x1000, netem_loss.u.value, netem_delay.u.value))
1350 if (add_htb_netem("eth0", 1, 0x1fff, 0x1fff, netem_loss.u.value, netem_delay.u.value))
1353 for (k = 0; k < instance->leaf_count; ++k) {
1354 if (add_htb_netem("eth0", 1, (0x1000 | instance->leaves[k].xid),
1355 (0x1000 | instance->leaves[k].xid), netem_loss.u.value, netem_delay.u.value)) {
1359 //FIXME: add exempt delay/loss here on 0x2000 ... ?
1362 /* netem_slice is not the default ALL value. Only apply netem
1363 * to the slice that is set in netem_slice.u.string. */
1366 sscanf(netem_slice.u.string, "%x", &slice_xid);
1368 if (add_htb_netem("eth0", 1, slice_xid, slice_xid, netem_loss.u.value, netem_delay.u.value))
1373 /* Turn on SFQ for experimentation. */
1374 if (strcmp(sfq_slice.u.string, "NONE")) {
1375 if (!strcmp(sfq_slice.u.string, "ALL")) {
1376 if (add_htb_sfq("eth0", 1, 0x1000, 0x1000, 30))
1378 if (add_htb_sfq("eth0", 1, 0x1fff, 0x1fff, 30))
1381 for (k = 0; k < instance->leaf_count; ++k) {
1382 if (add_htb_sfq("eth0", 1, (0x1000 | instance->leaves[k].xid),
1383 (0x1000 | instance->leaves[k].xid), 30)) {
1390 sscanf(sfq_slice.u.string, "%x", &slice_xid);
1392 if (add_htb_sfq("eth0", 1, slice_xid, slice_xid, 30))
1400 static int setup_tc_grd(drl_instance_t *instance) {
1404 for (i = 0; i < instance->leaf_count; ++i) {
1405 /* Delete the old pfifo qdisc that might have been there before. */
1406 sprintf(cmd, "/sbin/tc qdisc del dev eth0 parent 1:1%x handle 1%x pfifo",
1407 instance->leaves[i].xid, instance->leaves[i].xid);
1409 if (execute_cmd(cmd)) {
1410 printlog(LOG_DEBUG, "GRD: pfifo qdisc wasn't there!\n");
1413 /* Add the netem qdisc. */
1414 sprintf(cmd, "/sbin/tc qdisc replace dev eth0 parent 1:1%x handle 1%x netem loss 0 delay 0ms",
1415 instance->leaves[i].xid, instance->leaves[i].xid);
1417 if (execute_cmd(cmd)) {
1418 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1423 /* Do the same for 1000 and 1fff. */
1424 sprintf(cmd, "/sbin/tc qdisc del dev eth0 parent 1:1000 handle 1000 pfifo");
1426 if (execute_cmd(cmd)) {
1427 printlog(LOG_DEBUG, "GRD: pfifo qdisc wasn't there!\n");
1430 /* Add the netem qdisc. */
1431 sprintf(cmd, "/sbin/tc qdisc replace dev eth0 parent 1:1000 handle 1000 netem loss 0 delay 0ms");
1433 if (execute_cmd(cmd)) {
1434 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1438 sprintf(cmd, "/sbin/tc qdisc del dev eth0 parent 1:1fff handle 1fff pfifo");
1440 if (execute_cmd(cmd)) {
1441 printlog(LOG_DEBUG, "GRD: pfifo qdisc wasn't there!\n");
1444 /* Add the netem qdisc. */
1445 sprintf(cmd, "/sbin/tc qdisc replace dev eth0 parent 1:1fff handle 1fff netem loss 0 delay 0ms");
1447 if (execute_cmd(cmd)) {
1448 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1452 /* Artifical delay or loss for experimentation. */
1453 if (netem_delay.u.value || netem_loss.u.value) {
1454 if (!strcmp(netem_slice.u.string, "ALL")) {
1455 sprintf(cmd, "/sbin/tc qdisc change dev eth0 parent 1:1000 handle 1000 netem loss %d delay %dms", netem_loss.u.value, netem_delay.u.value);
1456 if (execute_cmd(cmd)) {
1457 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1461 sprintf(cmd, "/sbin/tc qdisc change dev eth0 parent 1:1fff handle 1fff netem loss %d delay %dms", netem_loss.u.value, netem_delay.u.value);
1462 if (execute_cmd(cmd)) {
1463 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1467 for (j = 0; j < instance->leaf_count; ++j) {
1468 leaf_t *current = &instance->leaves[j];
1470 current->delay = netem_delay.u.value;
1472 sprintf(cmd, "/sbin/tc qdisc change dev eth0 parent 1:1%x handle 1%x netem loss %d delay %dms", current->xid, current->xid, netem_loss.u.value, netem_delay.u.value);
1474 if (execute_cmd(cmd)) {
1475 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1481 leaf_t *leaf = NULL;
1483 sscanf(netem_slice.u.string, "%x", &slice_xid);
1485 leaf = (leaf_t *) map_search(instance->leaf_map, &slice_xid, sizeof(slice_xid));
1488 /* Leaf not found - invalid selection. */
1489 printf("Your experimental setup is incorrect...\n");
1493 leaf->delay = netem_delay.u.value;
1495 sprintf(cmd, "/sbin/tc qdisc change dev eth0 parent 1:1%x handle 1%x netem loss %d delay %dms", slice_xid, slice_xid, netem_loss.u.value, netem_delay.u.value);
1497 if (execute_cmd(cmd)) {
1498 printlog(LOG_CRITICAL, "TC GRD call failed: %s\n", cmd);
1509 * Initialize this limiter with options
1510 * Open UDP socket for peer communication
1512 static int init_drl(void) {
1513 parsed_configs configs;
1514 struct sockaddr_in server_address;
1516 memset(&limiter, 0, sizeof(limiter_t));
1518 /* Setup logging. */
1519 system_loglevel = (uint8_t) drl_loglevel.u.value;
1520 logfile = fopen(drl_logfile.u.string, "w");
1522 if (logfile == NULL) {
1523 printf("Couldn't open logfile - ");
1528 printlog(LOG_CRITICAL, "ulogd_DRL initializing . . .\n");
1530 limiter.nodelimit = (uint32_t) (((double) nodelimit.u.value * 1000000.0) / 8.0);
1532 init_hashing(); /* for all hash maps */
1534 pthread_rwlock_init(&limiter.limiter_lock,NULL);
1536 /* determine our local IP by iterating through interfaces */
1537 if (strncmp(bind_addr.u.string, "AUTO", 4)) {
1538 limiter.ip = bind_addr.u.string;
1540 limiter.ip = get_local_ip();
1541 if (limiter.ip == NULL) {
1542 printlog(LOG_CRITICAL, "ulogd_DRL unable to aquire local IP address, not registering.\n");
1546 limiter.localaddr = inet_addr(limiter.ip);
1547 limiter.port = htons(LIMITER_LISTEN_PORT);
1548 limiter.udp_socket = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
1549 if (limiter.udp_socket < 0) {
1551 printlog(LOG_CRITICAL, "Failed to create UDP socket().\n");
1555 memset(&server_address, 0, sizeof(server_address));
1556 server_address.sin_family = AF_INET;
1557 server_address.sin_addr.s_addr = limiter.localaddr;
1558 server_address.sin_port = limiter.port;
1560 if (bind(limiter.udp_socket, (struct sockaddr *) &server_address, sizeof(server_address)) < 0) {
1562 printlog(LOG_CRITICAL, "Failed to bind UDP socket.\n");
1566 printlog(LOG_WARN, " POLICY: %s\n",policy.u.string);
1567 if (strcasecmp(policy.u.string,"GRD") == 0) {
1568 limiter.policy = POLICY_GRD;
1569 } else if (strcasecmp(policy.u.string,"FPS") == 0) {
1570 limiter.policy = POLICY_FPS;
1572 printlog(LOG_CRITICAL,
1573 "Unknown DRL policy %s, aborting.\n",policy.u.string);
1577 limiter.estintms = estintms.u.value;
1578 if (limiter.estintms > 1000) {
1579 printlog(LOG_CRITICAL,
1580 "DRL: sorry estimate intervals must be less than 1 second.");
1581 printlog(LOG_CRITICAL,
1582 " Simple source mods will allow larger intervals. Using 1 second.\n");
1583 limiter.estintms = 1000;
1585 printlog(LOG_WARN, " Est interval: %dms\n",limiter.estintms);
1587 /* Acquire the big limiter lock for writing. Prevents pretty much
1588 * anything else from happening while the hierarchy is being changed. */
1589 pthread_rwlock_wrlock(&limiter.limiter_lock);
1591 limiter.stable_instance.ident_map = allocate_map();
1592 if (limiter.stable_instance.ident_map == NULL) {
1593 printlog(LOG_CRITICAL, "Failed to allocate memory for identity map.\n");
1597 if (get_eligible_leaves(&limiter.stable_instance)) {
1598 printlog(LOG_CRITICAL, "Failed to read eligigle leaves.\n");
1602 if (parse_drl_config(drl_configfile.u.string, &configs)) {
1603 /* Parse error occured. Return non-zero to notify init_drl(). */
1604 printlog(LOG_CRITICAL, "Failed to parse the DRL configuration file (%s).\n",
1605 drl_configfile.u.string);
1609 /* Validate identity hierarchy! */
1610 if (validate_configs(configs, &limiter.stable_instance)) {
1611 /* Clean up everything. */
1612 free_failed_config(configs, &limiter.stable_instance);
1613 printlog(LOG_CRITICAL, "Invalid DRL configuration file (%s).\n",
1614 drl_configfile.u.string);
1618 if (init_identities(configs, &limiter.stable_instance)) {
1619 free_failed_config(configs, &limiter.stable_instance);
1620 printlog(LOG_CRITICAL, "Failed to initialize identities.\n");
1624 /* At this point, we should be done with configs. */
1625 free_ident_list(configs.machines);
1626 free_ident_list(configs.sets);
1628 print_instance(&limiter.stable_instance);
1630 switch (limiter.policy) {
1632 if (assign_htb_hierarchy(&limiter.stable_instance)) {
1633 free_instance(&limiter.stable_instance);
1634 printlog(LOG_CRITICAL, "Failed to assign HTB hierarchy.\n");
1638 if (create_htb_hierarchy(&limiter.stable_instance)) {
1639 free_instance(&limiter.stable_instance);
1640 printlog(LOG_CRITICAL, "Failed to create HTB hierarchy.\n");
1646 if (setup_tc_grd(&limiter.stable_instance)) {
1647 free_instance(&limiter.stable_instance);
1648 printlog(LOG_CRITICAL, "Failed to initialize tc calls for GRD.\n");
1657 partition_set = partition.u.value;
1659 pthread_rwlock_unlock(&limiter.limiter_lock);
1661 if (pthread_create(&limiter.udp_recv_thread, NULL, limiter_receive_thread, NULL)) {
1662 printlog(LOG_CRITICAL, "Unable to start UDP receive thread.\n");
1666 printlog(LOG_WARN, "ulogd_DRL init finished.\n");
1671 static void reconfig() {
1672 parsed_configs configs;
1674 printlog(LOG_DEBUG, "--Starting reconfig()--\n");
1677 memset(&configs, 0, sizeof(parsed_configs));
1678 memset(&limiter.new_instance, 0, sizeof(drl_instance_t));
1680 limiter.new_instance.ident_map = allocate_map();
1681 if (limiter.new_instance.ident_map == NULL) {
1682 printlog(LOG_CRITICAL, "Failed to allocate ident_map during reconfig().\n");
1686 if (get_eligible_leaves(&limiter.new_instance)) {
1687 free_failed_config(configs, &limiter.new_instance);
1688 printlog(LOG_CRITICAL, "Failed to read leaves during reconfig().\n");
1692 if (parse_drl_config(drl_configfile.u.string, &configs)) {
1693 free_failed_config(configs, &limiter.new_instance);
1694 printlog(LOG_CRITICAL, "Failed to parse config during reconfig().\n");
1698 if (validate_configs(configs, &limiter.new_instance)) {
1699 free_failed_config(configs, &limiter.new_instance);
1700 printlog(LOG_CRITICAL, "Validation failed during reconfig().\n");
1701 pthread_rwlock_unlock(&limiter.limiter_lock);
1705 if (init_identities(configs, &limiter.new_instance)) {
1706 free_failed_config(configs, &limiter.new_instance);
1707 printlog(LOG_CRITICAL, "Initialization failed during reconfig().\n");
1708 pthread_rwlock_unlock(&limiter.limiter_lock);
1712 free_ident_list(configs.machines);
1713 free_ident_list(configs.sets);
1715 print_instance(&limiter.new_instance);
1718 pthread_rwlock_wrlock(&limiter.limiter_lock);
1720 switch (limiter.policy) {
1722 if (assign_htb_hierarchy(&limiter.new_instance)) {
1723 free_instance(&limiter.new_instance);
1724 printlog(LOG_CRITICAL, "Failed to assign HTB hierarchy during reconfig().\n");
1725 pthread_rwlock_unlock(&limiter.limiter_lock);
1729 if (create_htb_hierarchy(&limiter.new_instance)) {
1730 free_instance(&limiter.new_instance);
1731 printlog(LOG_CRITICAL, "Failed to create HTB hierarchy during reconfig().\n");
1733 /* Re-create old instance. */
1734 if (create_htb_hierarchy(&limiter.stable_instance)) {
1735 /* Error reinstating the old one - big problem. */
1736 printlog(LOG_CRITICAL, "Failed to reinstate HTB hierarchy during reconfig().\n");
1737 printlog(LOG_CRITICAL, "Giving up...\n");
1742 pthread_rwlock_unlock(&limiter.limiter_lock);
1748 if (setup_tc_grd(&limiter.new_instance)) {
1749 free_instance(&limiter.new_instance);
1750 printlog(LOG_CRITICAL, "GRD tc calls failed during reconfig().\n");
1752 /* Try to re-create old instance. */
1753 if (setup_tc_grd(&limiter.stable_instance)) {
1754 printlog(LOG_CRITICAL, "Failed to reinstate old GRD qdiscs during reconfig().\n");
1755 printlog(LOG_CRITICAL, "Giving up...\n");
1763 /* Should be impossible. */
1764 printf("Pigs are flying?\n");
1768 /* Switch over new to stable instance. */
1769 free_instance(&limiter.stable_instance);
1770 memcpy(&limiter.stable_instance, &limiter.new_instance, sizeof(drl_instance_t));
1772 /* Success! - Unlock */
1773 pthread_rwlock_unlock(&limiter.limiter_lock);
1776 static int stop_enforcement(drl_instance_t *instance) {
1780 for (i = 0; i < instance->machine_count; ++i) {
1781 sprintf(cmd, "/sbin/tc class change dev eth0 parent 1:%x classid 1:%x htb rate 8bit ceil 100mbit",
1782 instance->machines[i]->htb_parent,
1783 instance->machines[i]->htb_node);
1785 if (execute_cmd(cmd)) {
1790 for (i = 0; i < instance->set_count; ++i) {
1791 sprintf(cmd, "/sbin/tc class change dev eth0 parent 1:%x classid 1:%x htb rate 8bit ceil 100mbit",
1792 instance->sets[i]->htb_parent,
1793 instance->sets[i]->htb_node);
1795 if (execute_cmd(cmd)) {
1803 static void *signal_thread_func(void *args) {
1809 sigaddset(&sigs, SIGHUP);
1810 sigaddset(&sigs, SIGUSR1);
1811 sigaddset(&sigs, SIGUSR2);
1812 sigaddset(&sigs, SIGRTMAX);
1813 pthread_sigmask(SIG_BLOCK, &sigs, NULL);
1817 //sigaddset(&sigs, SIGHUP);
1818 sigaddset(&sigs, SIGUSR1);
1819 sigaddset(&sigs, SIGUSR2);
1820 sigaddset(&sigs, SIGRTMAX);
1822 err = sigwait(&sigs, &sig);
1825 printlog(LOG_CRITICAL, "sigwait() returned an error.\n");
1830 if (sig == SIGRTMAX) {
1831 printf("Caught SIGRTMAX - toggling fake partitions.\n");
1832 do_partition = !do_partition;
1838 printlog(LOG_CRITICAL, "Caught SIGHUP in signal_thread_func?!?\n");
1839 printf("Caught SIGHUP in signal_thread_func?!?\n");
1842 pthread_rwlock_wrlock(&limiter.limiter_lock);
1843 if (do_enforcement) {
1845 stop_enforcement(&limiter.stable_instance);
1846 printlog(LOG_CRITICAL, "--Switching enforcement off.--\n");
1849 printlog(LOG_CRITICAL, "--Switching enforcement on.--\n");
1851 pthread_rwlock_unlock(&limiter.limiter_lock);
1854 printlog(LOG_WARN, "Caught SIGUSR2 - re-reading XML file.\n");
1855 printf("Caught SIGUSR2 - re-reading XML file.\n");
1860 /* Intentionally blank. */
1866 static int drl_plugin_init() {
1867 sigset_t signal_mask;
1869 sigemptyset(&signal_mask);
1870 //sigaddset(&signal_mask, SIGHUP);
1871 sigaddset(&signal_mask, SIGUSR1);
1872 sigaddset(&signal_mask, SIGUSR2);
1873 sigaddset(&signal_mask, SIGRTMAX);
1874 pthread_sigmask(SIG_BLOCK, &signal_mask, NULL);
1876 if (pthread_create(&signal_thread, NULL, &signal_thread_func, NULL) != 0) {
1877 printlog(LOG_CRITICAL, "Failed to create signal handling thread.\n");
1878 fprintf(stderr, "An error has occured starting ulogd_DRL. Refer to your logfile (%s) for additional information.\n", drl_logfile.u.string);
1884 printlog(LOG_CRITICAL, "Init failed. :(\n");
1885 fprintf(stderr, "An error has occured starting ulogd_DRL. Refer to your logfile (%s) for additional information.\n", drl_logfile.u.string);
1890 /* start up the thread that will periodically estimate the
1891 * local rate and set the local limits
1894 if (pthread_create(&estimate_thread, NULL, (void*(*)(void*)) &handle_estimation, &limiter)!=0) {
1895 printlog(LOG_CRITICAL, "Couldn't start estimate thread.\n");
1896 fprintf(stderr, "An error has occured starting ulogd_DRL. Refer to your logfile (%s) for additional information.\n", drl_logfile.u.string);
1900 if (enforce_on.u.value) {
1901 pthread_rwlock_wrlock(&limiter.limiter_lock);
1903 printlog(LOG_CRITICAL, "--Switching enforcement on.--\n");
1904 pthread_rwlock_unlock(&limiter.limiter_lock);
1910 static void drl_signal(int sig) {
1911 if (sig == SIGHUP) {
1912 printf("Caught SIGHUP - reopening DRL log file.\n");
1915 logfile = fopen(drl_logfile.u.string, "a");
1916 printlog(LOG_CRITICAL, "Reopened logfile.\n");
1918 printlog(LOG_WARN, "Caught unexpected signal %d in drl_signal.\n", sig);
1922 static ulog_output_t drl_op = {
1924 .output = &_output_drl,
1925 .signal = &drl_signal,
1926 .init = &drl_plugin_init,
1931 /* Tests the amount of time it takes to call reconfig(). */
1932 static void time_reconfig(int iterations) {
1933 struct timeval start, end;
1936 gettimeofday(&start, NULL);
1937 for (i = 0; i < iterations; ++i) {
1940 gettimeofday(&end, NULL);
1942 printf("%d reconfigs() took %d seconds and %d microseconds.\n",
1943 iterations, end.tv_sec - start.tv_sec, end.tv_usec - start.tv_usec);
1946 // Seems to take about 85ms / iteration
1950 /* register output plugin with ulogd */
1951 static void _drl_reg_op(void)
1953 ulog_output_t *op = &drl_op;
1954 register_output(op);
1959 /* have the opts parsed */
1960 config_parse_file("DRL", config_entries);
1963 ulogd_log(ULOGD_ERROR, "can't resolve all keyhash id's\n");
1967 /* Seed the hash function */
1968 salt = getpid() ^ time(NULL);