Add bw, dns, and uptime checks.
[myops.git] / web / collect / client / check_bw.py
diff --git a/web/collect/client/check_bw.py b/web/collect/client/check_bw.py
new file mode 100755 (executable)
index 0000000..0f8db34
--- /dev/null
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+
+import commands
+import os
+import sys
+import re
+import socket
+import struct
+import time
+
+#import ctypes
+# TODO: maybe when there's more time; for better readability.
+#class History(Structure):
+#    _fields_ = [ ("version", c_int),
+#                 ("index", c_int),
+#                 ("history", c_float * HISTORY_LENGTH), ]
+
+# allocate fixed space on disk to save persistent state.
+# what to store in this file?
+# slice_history : x,x,x,x,x,...
+# root_history : y,y,y,y,y,y...
+
+HISTORY_LENGTH = 24*30  # 30 days, if checked once an hour
+HISTORY_fmt = ('iif', 'f'*HISTORY_LENGTH )
+HISTORY_version = 1
+
+
+def get_network_bytes(interface):
+    for line in open('/proc/net/dev', 'r'):
+        if interface in line:
+            data = line.split('%s:' % interface)[1].split()
+            rx_bytes, tx_bytes = (data[0], data[8])
+            return (float(rx_bytes), float(tx_bytes))
+    return None
+
+def read_safe_history(filename):
+    """
+        This function guarantees that space is preserved.
+        If one of the file operations fail, it will throw an exception.
+    """
+    if os.path.exists(filename):
+        # read existing data
+        fd = os.open(filename, os.O_RDONLY)
+        a = os.read(fd, os.path.getsize(filename))
+        try:
+            (version, i, last_value) = struct.unpack_from(HISTORY_fmt[0], a, 0)
+            assert version == HISTORY_version
+            history = struct.unpack_from(HISTORY_fmt[1], a, struct.calcsize(HISTORY_fmt[0]))
+            history = [ h for h in history ]
+        except:
+            # TODO: in the future a more clever version migration might be nice.
+            os.remove(filename) # just nuke the old version
+            # create for the first time, with empty data
+            (i, last_value, history) = (0, 0.0, [0]*HISTORY_LENGTH)
+            write_safe_history(filename, (i, last_value, history), False)
+
+        os.close(fd)
+            
+    else:
+        # create for the first time, with empty data
+        (i, last_value, history) = (0, 0.0, [0]*HISTORY_LENGTH)
+        write_safe_history(filename, (i, last_value, history), False)
+
+    return (i, last_value, history)
+
+def write_safe_history(filename, (i, last_value, history), check_for_file=True):
+    # length should match, and the file should already exist
+    assert len(history) == HISTORY_LENGTH
+    if check_for_file:
+        assert os.path.exists(filename)
+
+    # open without TRUNC nor APPEND, then seek to beginning to preserve space on disk
+    fd = os.open(filename, os.O_WRONLY|os.O_CREAT)
+    os.lseek(fd, 0, 0)
+    ret  = os.write(fd, struct.pack(HISTORY_fmt[0], HISTORY_version, i, last_value))
+    ret += os.write(fd, struct.pack(HISTORY_fmt[1], *history))
+    os.close(fd)
+    return ret
+
+def add_to_history((i, last_value, history), data):
+    try:
+        # note, this won't be the case when reboot occurs, or on first run.
+        assert last_value > 0.0
+        assert data > last_value
+        #print "Recording: %s"% (data-last_value)
+        history[i] = data-last_value
+        i += 1
+        i = i % HISTORY_LENGTH
+    except:
+        # on init when last_value is 0, or reboot when counter resets.
+        # do not record data except for last_value, do not increment index
+        pass
+
+    last_value = data
+    return (i, last_value, history)
+
+def record_data(filename, data):
+    rh = read_safe_history(filename)
+    return write_safe_history(filename, add_to_history(rh, data))
+
+def get_percentile(filename, percentile):
+    (idx,last_version, history) = read_safe_history(filename)
+    summary = history[idx:] + history[:idx]
+    measured = filter(lambda x: x != 0, summary)
+    if len(measured) == 0: 
+        return 0
+
+    # convert bytes to bw
+    bw = map(lambda x: x/(60*60*24.0), measured)
+    bw.sort()
+    l = len(bw)
+    pct = bw[int(l*percentile)]
+    #print bw
+
+    return pct
+
+def timed(method):
+
+    def timeit(*args, **kw):
+        ts = time.time()
+        result = method(*args, **kw)
+        te = time.time()
+
+        #print '%r (%r, %r) %2.2f sec' % \
+        #      (method.__name__, args, kw, te-ts)
+        return (result, te-ts)
+
+    return timeit
+
+@timed
+def check_dns(ip, protocol='udp'):
+    try:
+        #ip = ip[:-1] + "0"
+        ro = DNS.Request(name="www.yahoo.com", qtype="A", server=ip)
+        r = ro.req(protocol=protocol)
+        r = "OK"
+    except DNS.Base.DNSError, e:
+        r = "Error: %s" % e
+    return r
+        
+def get_nameserver_ips(filename):
+    ip_re = re.compile("\d+\.\d+\.\d+\.\d+")
+    ret = {}
+    if not os.path.exists(filename):
+        return ret
+
+    f = open(filename, 'r')
+
+    if 'resolv' in filename:
+        for l in f:
+            for field in l.strip().split():
+                if ip_re.match(field) and field not in ret:
+                    ret[field] = 0
+
+    if 'ifcfg' in filename:
+        for l in f:
+            if 'DNS' not in l:
+                continue
+            for field in l.strip().split('='):
+                field = field.replace('"', '')
+                field = field.replace("'", '')
+                if ip_re.match(field) and field not in ret:
+                    ret[field] = 0
+    return ret
+
+def main():
+
+    for interface in ['eth0', 'eth1', 'eth2', 'eth3']:
+        t_bytes = get_network_bytes(interface)
+        if t_bytes != None:
+            break
+    if t_bytes == None:
+        # massive fail.  cannot continue.
+        sys.exit(1)
+
+    # take diff b/t sum(t_bytes) and last_value
+    record_data("bw_history.dat", sum(t_bytes))
+    record_data("bw_history_rx.dat", t_bytes[0])
+    record_data("bw_history_tx.dat", t_bytes[1])
+
+    print get_percentile("bw_history.dat", 0.90),
+    print get_percentile("bw_history_rx.dat", 0.90), 
+    print get_percentile("bw_history_tx.dat", 0.90), 
+    
+    print ""
+
+
+if __name__ == "__main__":
+    main()
+
+
+# TODO: comon?
+#url = """http://comon.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&select='dns1udp>80 && dns2udp>80&&name=="%s"'&format=formatcsv&dumpcols='dns1udp,dns1tcp,dns2udp,dns2tcp'""" % os.popen("hostname").read().strip()