Never miss an old file
[myops.git] / web / collect / client / check_uptime.py
1 #!/usr/bin/python
2
3 import commands
4 import os
5 import sys
6 import re
7 import socket
8 import struct
9 import time
10
11 #import ctypes
12 # TODO: maybe when there's more time; for better readability.
13 #class History(Structure):
14 #    _fields_ = [ ("version", c_int),
15 #                 ("index", c_int),
16 #                 ("history", c_float * HISTORY_LENGTH), ]
17
18 # allocate fixed space on disk to save persistent state.
19 # what to store in this file?
20 # slice_history : x,x,x,x,x,...
21 # root_history : y,y,y,y,y,y...
22
23 HISTORY_LENGTH = 24*30  # 30 days, if checked once an hour
24 HISTORY_fmt = ('ii', 'f'*HISTORY_LENGTH )
25 HISTORY_version = 1
26
27
28 def get_network_bytes(interface):
29     for line in open('/proc/net/dev', 'r'):
30         if interface in line:
31             data = line.split('%s:' % interface)[1].split()
32             rx_bytes, tx_bytes = (data[0], data[8])
33             return (float(rx_bytes), float(tx_bytes))
34     return None
35
36 def get_uptime():
37     for line in open('/proc/uptime', 'r'):
38         data = line.split()[0]
39         return float(data)
40     return None
41
42 def read_safe_history(filename):
43     """
44         This function guarantees that space is preserved.
45         If one of the file operations fail, it will throw an exception.
46     """
47     if os.path.exists(filename):
48         # read existing data
49         fd = os.open(filename, os.O_RDONLY)
50         a = os.read(fd, os.path.getsize(filename))
51         try:
52             (version, i) = struct.unpack_from(HISTORY_fmt[0], a, 0)
53             assert version == HISTORY_version
54             history = struct.unpack_from(HISTORY_fmt[1], a, struct.calcsize(HISTORY_fmt[0]))
55             history = [ h for h in history ]
56         except:
57             # TODO: in the future a more clever version migration might be nice.
58             os.remove(filename) # just nuke the old version
59             # create for the first time, with empty data
60             (i, history) = (0, [0]*HISTORY_LENGTH)
61             write_safe_history(filename, (i, history), False)
62
63         os.close(fd)
64             
65     else:
66         # create for the first time, with empty data
67         (i, history) = (0, [0]*HISTORY_LENGTH)
68         write_safe_history(filename, (i, history), False)
69
70     return (i, history)
71
72 def write_safe_history(filename, (i, history), check_for_file=True):
73     # length should match, and the file should already exist
74     assert len(history) == HISTORY_LENGTH
75     if check_for_file:
76         assert os.path.exists(filename)
77
78     # open without TRUNC nor APPEND, then seek to beginning to preserve space on disk
79     fd = os.open(filename, os.O_WRONLY|os.O_CREAT)
80     os.lseek(fd, 0, 0)
81     ret  = os.write(fd, struct.pack(HISTORY_fmt[0], HISTORY_version, i ))
82     ret += os.write(fd, struct.pack(HISTORY_fmt[1], *history))
83     os.close(fd)
84     return ret
85
86 def add_to_history((i, history), data):
87     try:
88         assert data > 0.0
89         history[i] = data
90         i += 1
91         i = i % HISTORY_LENGTH
92     except:
93         # do not record data if data <= 0
94         pass
95     return (i, history)
96
97 def record_data(filename, data):
98     rh = read_safe_history(filename)
99     return write_safe_history(filename, add_to_history(rh, data))
100
101 def get_avg_uptime(filename):
102     (idx, history) = read_safe_history(filename)
103     summary = history[idx:] + history[:idx]
104     measured = filter(lambda x: x != 0, summary)
105     if len(measured) == 0: 
106         return 0
107     return float(sum(measured))/float(len(measured))
108
109 def timed(method):
110
111     def timeit(*args, **kw):
112         ts = time.time()
113         result = method(*args, **kw)
114         te = time.time()
115
116         #print '%r (%r, %r) %2.2f sec' % \
117         #      (method.__name__, args, kw, te-ts)
118         return (result, te-ts)
119
120     return timeit
121
122 @timed
123 def check_dns(ip, protocol='udp'):
124     try:
125         #ip = ip[:-1] + "0"
126         ro = DNS.Request(name="www.yahoo.com", qtype="A", server=ip)
127         r = ro.req(protocol=protocol)
128         r = "OK"
129     except DNS.Base.DNSError, e:
130         r = "Error: %s" % e
131     return r
132         
133 def get_nameserver_ips(filename):
134     ip_re = re.compile("\d+\.\d+\.\d+\.\d+")
135     ret = {}
136     if not os.path.exists(filename):
137         return ret
138
139     f = open(filename, 'r')
140
141     if 'resolv' in filename:
142         for l in f:
143             for field in l.strip().split():
144                 if ip_re.match(field) and field not in ret:
145                     ret[field] = 0
146
147     if 'ifcfg' in filename:
148         for l in f:
149             if 'DNS' not in l:
150                 continue
151             for field in l.strip().split('='):
152                 field = field.replace('"', '')
153                 field = field.replace("'", '')
154                 if ip_re.match(field) and field not in ret:
155                     ret[field] = 0
156     return ret
157
158 def main():
159
160     ut = get_uptime()
161     if ut == None:
162         # massive fail.  cannot continue.
163         sys.exit(1)
164
165     record_data("uptime_history.dat", ut)
166
167     print get_avg_uptime("uptime_history.dat"),
168     
169     print ""
170
171
172 if __name__ == "__main__":
173     main()
174
175
176 # TODO: comon?
177 #url = """http://comon.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&select='dns1udp>80 && dns2udp>80&&name=="%s"'&format=formatcsv&dumpcols='dns1udp,dns1tcp,dns2udp,dns2tcp'""" % os.popen("hostname").read().strip()