Fixed bugs that Daniel pointed out.
[fprobe-ulog.git] / src / fprobe-ulog.c
index 0d84804..26eac1f 100644 (file)
 /* sig*() */
 #include <signal.h>
 
+/* statfs() */
+
+#include <sys/statfs.h>
+
 #include <libipulog/libipulog.h>
 struct ipulog_handle {
        int fd;
@@ -94,6 +98,7 @@ enum {
        bflag,
        cflag,
        dflag,
+       Dflag,
        eflag,
        Eflag,
        fflag,
@@ -111,6 +116,7 @@ enum {
        Uflag,
        uflag,
        vflag,
+       Wflag,
        Xflag,
 };
 
@@ -149,8 +155,10 @@ extern struct NetFlow NetFlow1;
 extern struct NetFlow NetFlow5;
 extern struct NetFlow NetFlow7;
 
+#define START_VALUE -5
 #define mark_is_tos parms[Mflag].count
 static unsigned scan_interval = 5;
+static int min_free = 0;
 static int frag_lifetime = 30;
 static int inactive_lifetime = 60;
 static int active_lifetime = 300;
@@ -257,7 +265,10 @@ void usage()
                "-y <remote:port>\tAddress of the NetFlow collector\n"
                "-f <writable file>\tFile to write data into\n"
                "-T <n>\tRotate log file every n epochs\n"
-               "-E <[1..60]>\tSize of an epoch in minutes\n",
+               "-W <n>\tSet current epoch to n. Useful when restarting fprobe\n"
+               "-E <[1..60]>\tSize of an epoch in minutes\n"
+               "-D <number of blocks>\tNumber of disk blocks to preserve as free space\n"
+               ,
                VERSION, BULK_QUANTITY_MAX, bulk_quantity, sched_min, sched_max);
        exit(0);
 }
@@ -367,25 +378,75 @@ inline void copy_flow(struct Flow *src, struct Flow *dst)
        dst->flags = src->flags;
 }
 
+void get_cur_epoch() {
+       int fd;
+       fd = open("/tmp/fprobe_last_epoch",O_RDONLY);
+       if (fd != -1) {
+               char snum[7];
+               ssize_t len;
+               len = read(fd, snum, sizeof(snum)-1);
+               if (len != -1) {
+                       snum[len]='\0';
+                       sscanf(snum,"%d",&cur_epoch);
+                       close(fd);
+               }
+       }
+       return;
+}
+
+
+void update_cur_epoch_file(int n) {
+       int fd, len;
+       char snum[7];
+       len=snprintf(snum,6,"%d",n);
+       fd = open("/tmp/fprobe_last_epoch",O_WRONLY|O_CREAT|O_TRUNC);
+       if (fd == -1) {
+               my_log(LOG_ERR, "open() failed: /tmp/fprobe_last_epoch.The next restart will resume logging from epoch id 0.");
+               return;
+       }
+       write(fd, snum, len);
+       close(fd);
+}
+
 unsigned get_log_fd(char *fname, unsigned cur_fd) {
        struct Time now;
        unsigned cur_uptime;
+       /* We check if the amount of space left on the disk < some threshold and start reusing logs, or bail out if that
+        * doesn't solve the problem */
+
+       struct statfs statfs;
        int ret_fd;
        gettime(&now);
        cur_uptime = getuptime_minutes(&now);
 
+       if (fstatfs(cur_fd, &statfs) && cur_fd!=START_VALUE) {
+               my_log(LOG_ERR, "PANIC! Can't stat disk to calculate free blocks");
+       }
+       else {
+               if (min_free && statfs.f_bfree < min_free) 
+                       switch(cur_epoch) {
+                               case 0: /* Uh oh. Our first file filled up all of the free space. Just bail out. */
+                                       my_log(LOG_ERR, "The first epoch filled up all the free space on disk. Bailing out.");
+                                       exit(1);
+                               default:
+                                       my_log(LOG_INFO, "Disk almost full. I'm going to drop data. Max epochs = %d\n",cur_epoch);
+                                       cur_epoch = -1;
+                       }
+       }
+
        /* Epoch length in minutes */
-       if ((cur_uptime - prev_uptime) > epoch_length || cur_fd==-1) {
+       if ((cur_uptime - prev_uptime) > epoch_length || cur_fd<0 || cur_epoch==-1) {
                char nextname[MAX_PATH_LEN];
                int write_fd;
                prev_uptime = cur_uptime;
                cur_epoch = (cur_epoch + 1) % log_epochs;
                close(cur_fd);
                snprintf(nextname,MAX_PATH_LEN,"%s.%d",fname,cur_epoch);
-               if ((write_fd = open(nextname, O_WRONLY|O_CREAT)) < 0) {
-                       fprintf(stderr, "open(): %s (%s)\n", nextname, strerror(errno));
+               if ((write_fd = open(nextname, O_WRONLY|O_CREAT|O_TRUNC)) < 0) {
+                       my_log(LOG_ERR, "open(): %s (%s)\n", nextname, strerror(errno));
                        exit(1);
                }
+               update_cur_epoch_file(cur_epoch);
                ret_fd = write_fd;
        }
        else
@@ -752,8 +813,9 @@ void *emit_thread()
                                                if (netflow->SeqOffset)
                                                        *((uint32_t *) (emit_packet + netflow->SeqOffset)) = htonl(peers[0].seq);
                                                peers[i].write_fd = get_log_fd(peers[i].fname, peers[i].write_fd);
-                                               ret = write(peers[0].write_fd, emit_packet, size);
+                                               ret = write(peers[i].write_fd, emit_packet, size);
                                                if (ret < size) {
+
 #if ((DEBUG) & DEBUG_E) || defined MESSAGES
                                                        my_log(LOG_ERR, "write(to #%d, seq %d, flows %d, size %d) == %d: %s",
                                                                i + 1, peers[i].seq, emit_count, size, ret, strerror(errno));
@@ -766,7 +828,7 @@ void *emit_thread()
                                                                emit_count, i + 1, peers[i].seq);
                                                }
 #endif
-                                               peers[0].seq += emit_count;
+                                               peers[i].seq += emit_count;
 
                                                /* Rate limit */
                                                if (emit_rate_bytes) {
@@ -1212,11 +1274,13 @@ int main(int argc, char **argv)
        }
 
        if (parms[Uflag].count) ulog_gmask = atoi(parms[Uflag].arg);
+       if (parms[Wflag].count) cur_epoch = atoi(parms[Wflag].arg);
        if (parms[Tflag].count) log_epochs = atoi(parms[Tflag].arg);
        if (parms[Eflag].count) epoch_length = atoi(parms[Eflag].arg);
        if (parms[sflag].count) scan_interval = atoi(parms[sflag].arg);
        if (parms[gflag].count) frag_lifetime = atoi(parms[gflag].arg);
        if (parms[dflag].count) inactive_lifetime = atoi(parms[dflag].arg);
+       if (parms[Dflag].count) min_free = atoi(parms[Dflag].arg);
        if (parms[eflag].count) active_lifetime = atoi(parms[eflag].arg);
        if (parms[nflag].count) {
                switch (atoi(parms[nflag].arg)) {
@@ -1397,9 +1461,11 @@ bad_collector:
                if (!(peers[npeers].fname = malloc(strnlen(parms[fflag].arg,MAX_PATH_LEN)))) goto err_malloc;
                strncpy(peers[npeers].fname, parms[fflag].arg, MAX_PATH_LEN);
                
-               peers[npeers].write_fd = -1;
+               peers[npeers].write_fd = START_VALUE;
                peers[npeers].type = PEER_FILE;
                peers[npeers].seq = 0;
+
+               get_cur_epoch();
                npeers++;
        }
        else 
@@ -1422,20 +1488,23 @@ bad_collector:
 
        my_log_open(ident, verbosity, log_dest);
        if (!(log_dest & 2)) {
-               switch (fork()) {
-                       case -1:
-                               fprintf(stderr, "fork(): %s", strerror(errno));
-                               exit(1);
-
-                       case 0:
-                               setsid();
-                               freopen("/dev/null", "r", stdin);
-                               freopen("/dev/null", "w", stdout);
-                               freopen("/dev/null", "w", stderr);
-                               break;
-
-                       default:
-                               exit(0);
+               /* Crash-proofing - Sapan*/
+               while (1) {
+                       int pid=fork();
+                       if (pid==-1) {
+                                       fprintf(stderr, "fork(): %s", strerror(errno));
+                                       exit(1);
+                       }
+                       else if (pid==0) {
+                                       setsid();
+                                       freopen("/dev/null", "r", stdin);
+                                       freopen("/dev/null", "w", stdout);
+                                       freopen("/dev/null", "w", stderr);
+                                       break;
+                       }
+                       else {
+                               while (wait3(NULL,0,NULL) < 1);
+                       }
                }
        } else {
                setvbuf(stdout, (char *)0, _IONBF, 0);