Never miss an old file
[myops.git] / web / collect / client / check_dns.py
1 #!/usr/bin/python
2
3 # can't probe comon directly from node.
4 # http://comon.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&select='dns1udp>80 && dns2udp>80&&name=="planetlab-01.cs.princeton.edu"'&format=formatcsv&dumpcols='dns1udp,dns1tcp,dns2udp,dns2tcp'
5
6 import commands
7 import os
8 import re
9 import socket
10 import struct
11 import DNS
12 import time
13 #import ctypes
14 # TODO: maybe when there's more time; for better readability.
15 #class History(Structure):
16 #    _fields_ = [ ("version", c_int),
17 #                 ("index", c_int),
18 #                 ("history", c_float * HISTORY_LENGTH), ]
19
20 # allocate fixed space on disk to save persistent state.
21 # what to store in this file?
22 # slice_history : x,x,x,x,x,...
23 # root_history : y,y,y,y,y,y...
24
25 HISTORY_LENGTH = 24*30  # 30 days, if checked once an hour
26 HISTORY_fmt = ('ii', 'f'*HISTORY_LENGTH )
27 HISTORY_version = 1
28
29 def read_safe_history(filename):
30     """
31         This function guarantees that space is preserved.
32         If one of the file operations fail, it will throw an exception.
33     """
34     if os.path.exists(filename):
35         # read existing data
36         fd = os.open(filename, os.O_RDONLY)
37         a = os.read(fd, os.path.getsize(filename))
38         try:
39             (version, i) = struct.unpack_from(HISTORY_fmt[0], a, 0)
40             assert version == HISTORY_version
41             history = struct.unpack_from(HISTORY_fmt[1], a, struct.calcsize(HISTORY_fmt[0]))
42             history = [ h for h in history ]
43         except:
44             # TODO: in the future a more clever version migration might be nice.
45             os.remove(filename) # just nuke the old version
46             # create for the first time, with empty data
47             (i, history) = (0, [0]*HISTORY_LENGTH)
48             write_safe_history(filename, (i, history), False)
49
50         os.close(fd)
51             
52     else:
53         # create for the first time, with empty data
54         (i, history) = (0, [0]*HISTORY_LENGTH)
55         write_safe_history(filename, (i, history), False)
56
57     return (i, history)
58
59 def write_safe_history(filename, (i, history), check_for_file=True):
60     # length should match, and the file should already exist
61     assert len(history) == HISTORY_LENGTH
62     if check_for_file:
63         assert os.path.exists(filename)
64
65     # open without TRUNC nor APPEND, then seek to beginning to preserve space on disk
66     fd = os.open(filename, os.O_WRONLY|os.O_CREAT)
67     os.lseek(fd, 0, 0)
68     ret  = os.write(fd, struct.pack(HISTORY_fmt[0], HISTORY_version, i))
69     ret += os.write(fd, struct.pack(HISTORY_fmt[1], *history))
70     os.close(fd)
71     return ret
72
73 def add_to_history((i, history), data):
74     history[i] = data
75     i += 1
76     i = i % HISTORY_LENGTH
77     return (i, history)
78
79 def record_status_record(filename, status):
80     rh = read_safe_history(filename)
81     return write_safe_history(filename, add_to_history(rh, status))
82
83 def get_success_ratio(filename):
84     rh = read_safe_history(filename)
85     idx = rh[0]
86     summary = rh[1][idx:] + rh[1][:idx]
87     measured = filter(lambda x: x != 0, summary)
88     if len(measured) == 0: 
89         return 0
90
91     return float(len(filter(lambda x: x > 0, measured)))/float(len(measured))
92
93 def timed(method):
94
95     def timeit(*args, **kw):
96         ts = time.time()
97         result = method(*args, **kw)
98         te = time.time()
99
100         #print '%r (%r, %r) %2.2f sec' % \
101         #      (method.__name__, args, kw, te-ts)
102         return (result, te-ts)
103
104     return timeit
105
106 @timed
107 def check_dns(ip, protocol='udp'):
108     try:
109         #ip = ip[:-1] + "0"
110         ro = DNS.Request(name="www.yahoo.com", qtype="A", server=ip)
111         r = ro.req(protocol=protocol)
112         r = "OK"
113     except DNS.Base.DNSError, e:
114         r = "Error: %s" % e
115     return r
116         
117 def get_nameserver_ips(filename):
118     ip_re = re.compile("\d+\.\d+\.\d+\.\d+")
119     ret = {}
120     if not os.path.exists(filename):
121         return ret
122
123     f = open(filename, 'r')
124
125     if 'resolv' in filename:
126         for l in f:
127             for field in l.strip().split():
128                 if ip_re.match(field) and field not in ret:
129                     ret[field] = 0
130
131     if 'ifcfg' in filename:
132         for l in f:
133             if 'DNS' not in l:
134                 continue
135             for field in l.strip().split('='):
136                 field = field.replace('"', '')
137                 field = field.replace("'", '')
138                 if ip_re.match(field) and field not in ret:
139                     ret[field] = 0
140     return ret
141
142 def main():
143
144     root_ips  = get_nameserver_ips('/etc/resolv.conf')
145     slice_ips = get_nameserver_ips( '/vservers/princeton_comon/etc/resolv.conf')
146
147     for i,ip in enumerate(root_ips.keys()): 
148         (s,t) = check_dns(ip, 'udp')
149         if "Error" in s: t = -1
150         record_status_record("dns_history_root_udp%s.dat" % i, t)
151
152         (s,t) = check_dns(ip, 'tcp')
153         if "Error" in s: t = -1
154         record_status_record("dns_history_root_tcp%s.dat" % i, t)
155     
156     for i,ip in enumerate(slice_ips.keys()):
157         (s,t) = check_dns(ip, 'udp')
158         if "Error" in s: t = -1
159         record_status_record("dns_history_slice_udp%s.dat" % i, t)
160
161         (s,t) = check_dns(ip, 'tcp')
162         if "Error" in s: t = -1
163         record_status_record("dns_history_slice_tcp%s.dat" % i, t)
164
165     if set(root_ips.keys()) == set(slice_ips.keys()):
166         print "CONF-ROOT_SLICE-MATCH",
167     else:
168         print "CONF-ROOT_SLICE-MISMATCH",
169         #if set(root_ips.keys()) != set(slice_ips.keys()):
170         #if set(root_ips.keys()) != set(ifcfg_ips.keys()) and len(set(ifcfg_ips.keys())) > 0:
171         #    print "CONF-IFCFG_ROOT-MISMATCH",
172
173     print get_success_ratio('dns_history_root_udp0.dat'),
174     print get_success_ratio('dns_history_root_udp1.dat'),
175     print get_success_ratio('dns_history_slice_udp0.dat'),
176     print get_success_ratio('dns_history_slice_udp1.dat'),
177     c_dns = os.popen("curl -s http://localhost:3121 | grep -a DNSFail").read().strip()
178     if len(c_dns) > 9 and "DNS" in c_dns:
179         c_dns = "cm " + c_dns[9:]
180     else:
181         c_dns = ""
182     print c_dns,
183
184     print ""
185
186
187 if __name__ == "__main__":
188     main()
189
190
191 # TODO: comon?
192 #url = """http://comon.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&select='dns1udp>80 && dns2udp>80&&name=="%s"'&format=formatcsv&dumpcols='dns1udp,dns1tcp,dns2udp,dns2tcp'""" % os.popen("hostname").read().strip()