fix errors. disable graph plotting untill it works
[tests.git] / node_ssh / nst.py
1 #!/usr/bin/python
2
3 import time, sys, urllib, os, tempfile, random
4 import xmlrpclib
5 from optparse import OptionParser
6 from getpass import getpass
7 from time import sleep
8
9 parser = OptionParser()
10 parser.add_option("-c", "--config", action="store", dest="config",  help="Path to alternate config file")
11 parser.add_option("-x", "--url", action="store", dest="url", help = "API URL")
12 parser.add_option("-s", "--slice", action="store", dest="slice", help = "Name of slice to use")
13 parser.add_option("-n", "--nodes", action="store", dest="nodes", help = "File that contains a list of nodes to try to access")
14 parser.add_option("-k", "--key", action="store", dest="key", help = "Path to alternate public key")
15 parser.add_option("-u", "--user", action="store", dest="user", help = "API user name")
16 parser.add_option("-p", "--password", action="store", dest="password", help = "API password") 
17 parser.add_option("-v", "--verbose", action="store_true",  dest="verbose", help="Be verbose (default: %default)")
18 (options, args) = parser.parse_args()
19
20 # If user is specified but password is not
21 if options.user is not None and options.password is None:
22     try:
23         options.password = getpass()
24     except (EOFError, KeyboardInterrupt):
25         print
26         sys.exit(0)
27
28 class Config:
29     
30     def __init__(self, options):
31         
32         # if options are specified use them
33         # otherwise use options from config file
34         if options.config: config_file = options.config
35         else: config_file = '/usr/share/planetlab/tests/nst/nst_config'
36         
37         try:
38             execfile(config_file, self.__dict__)
39         except:
40             raise "Could not find nst config in " + config_file
41
42         if options.url: self.NST_API_SERVER = options.url
43         if options.slice: self.NST_SLICE = options.slice
44         if options.key: self.NST_KEY_PATH = options.key
45         if options.user: self.NST_USER = options.user
46         if options.password: self.NST_PASSWORD = options.password
47         if options.nodes: self.NST_NODES = options.nodes
48         else: self.NST_NODES = None
49
50         self.api = xmlrpclib.Server(self.NST_API_SERVER)
51         self.auth = {}
52         self.auth['Username'] = self.NST_USER
53         self.auth['AuthString'] = self.NST_PASSWORD
54         self.auth['AuthMethod'] = 'password'
55         self.key = self.NST_KEY_PATH
56         self.slice = self.NST_SLICE
57         self.nodes = self.NST_NODES
58         self.verbose = options.verbose  
59         
60         self.data_path = '/usr/share/planetlab/tests/nst/data/'
61         self.plots_path = '/usr/share/planetlab/tests/nst/plots/'       
62         
63
64 # get formatted tic string for gnuplot
65 def getTimeTicString(t1, t2, step):
66         first_hour = list(time.localtime(t1))
67         if not first_hour[4] == first_hour[5] == 0:
68                 first_hour[4] = 0
69                 first_hour[5] = 0
70         
71         first_hour_time = int(time.mktime(first_hour))
72         first_hour_time += 3600
73         
74         backsteps = (first_hour_time - t1)
75         backsteps /= step
76         start = first_hour_time - backsteps * step
77         
78         tics = []
79         thistime = start
80         while thistime < t2:
81                 tics.append("\"%s\" %d" % \
82                         (time.strftime("%H:%M", time.localtime(thistime)), thistime))
83                 thistime += step
84         
85         ticstr = ", ".join(tics)
86         return ticstr
87
88
89 # count total number of nodes in PlanetLab, according to the api
90 # count total number  of nodes in slice, according to the api 
91 def count_nodes_by_api(config):
92
93         # count all nodes       
94         all_nodes = config.api.GetNodes(config.auth, {}, ['node_id', 'slice_ids'])
95         all_nodes_output = "%d\t%d" % (round(time.time()), len(all_nodes))
96
97         # count all nodes in slice
98         if config.slice == 'root':
99             nodes_in_slice = all_nodes
100             nodes_in_slice_output = all_nodes_output
101         else:
102             slice_id =config.api.GetSlices(config.auth, {'name': config.slice}, ['slice_id'])[0]['slice_id']
103             nodes_in_slice = [row['node_id'] for row in \
104                               all_nodes if slice_id in row['slice_ids']]
105             nodes_in_slice_output =  "%d\t%d" % (round(time.time()), len(nodes_in_slice))
106
107         # write result to datafiles
108         all_nodes_file_name = config.data_path + os.sep + "nodes" 
109         all_nodes_file = open(all_nodes_file_name, 'w')
110         all_nodes_file.write(all_nodes_output)
111         all_nodes_file.close()
112         
113         nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
114         nodes_in_slice_file = open(nodes_in_slice_file_name, 'w')
115         nodes_in_slice_file.write(nodes_in_slice_output)
116         nodes_in_slice_file.close()
117         
118         if config.verbose:
119             print "all node: " + all_nodes_output
120             print "nodes in slice: " + nodes_in_slice_output
121                 
122
123 # count total number of "good" nodes, according to CoMon
124 def count_nodes_good_by_comon(config):
125         
126         
127         comon = urllib.urlopen("http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=nameonly&select='resptime%20%3E%200%20&&%20((drift%20%3E%201m%20||%20(dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080)%20||%20gbfree%20%3C%205%20||%20sshstatus%20%3E%202h)%20==%200)'")
128         good_nodes = comon.readlines()
129
130         comon_output =  "%d\t%d" % (round(time.time()), len(good_nodes))
131         nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
132         nodes_good_comon_file = open(nodes_good_comon_file_name, 'a')
133         nodes_good_comon_file.write(comon_output)
134         nodes_good_comon_file.close()
135         
136         if config.verbose:
137             print "comon: " + comon_output 
138         
139 # count total number of nodes reachable by ssh
140 def count_nodes_can_ssh(config):
141
142         api = config.api
143         slice = config.slice
144         key = config.key
145         verbose = config.verbose
146         auth = config.auth
147         nodes = config.nodes
148
149         if verbose:
150             verbose_text = ""
151             print "Creating list of nodes to ssh to"
152         else:
153             verbose_text = ">/dev/null 2>&1"
154         
155         # creaet node dict
156         all_nodes = api.GetNodes(auth, {}, ['hostname', 'boot_state', 'last_updated'])
157         node_dict = {}
158         for node in all_nodes:
159             node_dict[node['hostname']] = node
160
161         # create node list
162         if nodes:
163             nodes_file = open(nodes, 'r')
164             nodes_filename = nodes_file.name
165             lines = nodes_file.readlines()
166             node_list = [node.replace('\n', '') for node in lines]
167             nodes_file.close()
168             
169         else:
170             node_list = node_dict.keys()
171             nodes_filename = tempfile.mktemp()
172             nodes_file = open(nodes_filename, 'w')
173             for node in node_list:
174                 nodes_file.write("%(node)s\n" % locals())
175             nodes_file.close()
176         
177         # creaet node dict
178         node_dict = {}
179         for node in all_nodes:
180             node_dict[node['hostname']] = node
181
182         private_key = key.split(".pub")[0] 
183         
184         # create ssh command
185         if verbose:
186             print "Attemptng to ssh to nodes in " + nodes_filename
187
188         ssh_filename = tempfile.mktemp()
189         ssh_file = open(ssh_filename, 'w')
190         ssh_file.write("""
191         export MQ_SLICE="%(slice)s"
192         export MQ_NODES="%(nodes_filename)s"
193
194         eval `ssh-agent` >/dev/null 2>&1
195         trap "kill $SSH_AGENT_PID" 0
196         ssh-add %(private_key)s >/dev/null 2>&1 
197         
198         multiquery 'hostname' 2>/dev/null |
199         grep "bytes" | 
200         grep -v ": 0 bytes"             
201         """ % locals())
202         ssh_file.close()
203         ssh_results = os.popen("bash %(ssh_filename)s" % locals()).readlines()
204         good_nodes= [result.split(':')[0] for result in ssh_results]
205         
206         # remove temp files 
207         if os.path.exists(nodes_filename): os.unlink(nodes_filename)
208         if os.path.exists(ssh_filename): os.unlink(ssh_filename)
209         
210         # count number of node we can ssh into
211         ssh_count = len(good_nodes)
212         
213         # determine whince nodes are dead:
214         dead_nodes = set(node_list).difference(good_nodes)
215         
216         # write dead nodes to file
217         curr_time = round(time.time())
218         dead_node_count_output = "%d\t%d" % (curr_time, len(dead_nodes))
219         dead_nodes_file_name = config.data_path + os.sep + "dead_nodes"
220         dead_nodes_file = open(dead_nodes_file_name, 'w')
221
222         for hostname in dead_nodes:
223             boot_state = node_dict[hostname]['boot_state']
224             last_updated = 0
225             if node_dict[hostname]['last_updated']: 
226                 last_updated = node_dict[hostname]['last_updated'] 
227             dead_nodes_file.write("%(curr_time)d\t%(hostname)s\t%(boot_state)s\t%(last_updated)d\n" % \
228                                   locals())     
229         dead_nodes_file.close() 
230                 
231         # write good node count 
232         ssh_result_output =  "%d\t%d" % (round(time.time()), ssh_count)
233         nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
234         nodes_can_ssh_file = open(nodes_can_ssh_file_name, 'w')
235         nodes_can_ssh_file.write(ssh_result_output)
236         nodes_can_ssh_file.close()
237         
238         if verbose:
239             print "nodes that can ssh: " + ssh_result_output
240             print "dead nodes: " + dead_node_count_output   
241         
242         
243 # remove all nodes from a slice
244 def empty_slice(config):
245
246         if config.verbose:
247             print "Removing %s from all nodes" % config.slice
248
249         all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id'])]
250         config.api.DeleteSliceFromNodes(config.auth, config.slice, all_nodes)
251
252         
253 # add slice to all nodes. 
254 # make sure users key is up to date   
255 def init_slice(config):
256
257     api  = config.api   
258     auth = config.auth
259     slice = config.slice        
260     key_path = config.key
261     verbose = config.verbose 
262     slices = api.GetSlices(auth, [slice], \
263                                   ['slice_id', 'name', 'person_ids', 'node_ids'])
264     if not slices:
265         raise "No such slice %s" % slice
266     slice = slices[0]
267
268     # make sure user is in slice
269     person = api.GetPersons(auth, auth['Username'], \
270                                    ['person_id', 'email', 'slice_ids', 'key_ids'])[0]
271     if slice['slice_id'] not in person['slice_ids']:
272         raise "%s not in %s slice. Must be added first" % \
273               (person['email'], slice['name'])
274          
275     # make sure user key is up to date  
276     current_key = open(key_path, 'r').readline().strip()
277     if len(current_key) == 0:
278         raise "Key cannot be empty" 
279
280     keys = api.GetKeys(auth, person['key_ids'])
281     if not keys:
282         if verbose:
283             print "Adding new key " + key_path
284         api.AddPersonKey(auth, person['person_id'], \
285                                 {'key_type': 'ssh', 'key': current_key})
286
287     elif not filter(lambda k: k['key'] == current_key, keys):
288         if verbose:
289             print "%s was modified or is new. Updating PLC"
290         old_key = keys[0]
291         api.UpdateKey(auth, old_key['key_id'], \
292                              {'key': current_key})
293
294
295         
296     # add slice to all nodes                    
297     if verbose:
298         print "Generating list of all nodes " 
299     all_nodes = [row['node_id'] for row in \
300                  api.GetNodes(auth, {}, ['node_id'])]
301     if verbose:
302         print "Adding %s to all nodes" % slice['name']
303
304     new_nodes = set(all_nodes).difference(slice['node_ids'])                    
305     api.AddSliceToNodes(auth, slice['slice_id'], list(new_nodes))
306         
307         
308 # create the fill/empty plot
309 def plot_fill_empty():
310         #ticstep = 3600 # 1 hour
311         #plotlength = 36000 # 10 hours
312         ticstep = 1800
313         plotlength = 10800
314
315         plots_path = config.plots_path
316         
317         all_nodes_file_name = config.data_path + os.sep + "nodes"       
318         nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
319         nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
320         nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
321         
322         tmpfilename = tempfile.mktemp()
323         tmpfile = open(tmpfilename, 'w')
324         
325         starttime = -1
326         stoptime = -1
327         for datafilename in [all_nodes_file_name,
328                              nodes_in_slice_file_name, \
329                              nodes_can_ssh_file_name, \
330                              nodes_good_comon_file_name]: 
331                 datafile = open(datafilename, 'r')
332                 line1 = datafile.readline()
333                 datafile.seek(-32,2)
334                 line2 = datafile.readlines().pop()
335                 thisstarttime = int(line1.split("\t")[0])
336                 if starttime == -1 or thisstarttime < starttime:
337                         starttime = thisstarttime
338                 thisstoptime = int(line2.split("\t")[0])
339                 if stoptime == -1 or thisstoptime > stoptime:
340                         stoptime = thisstoptime
341         
342         stopx = stoptime
343         startx = max(starttime, stopx - plotlength)
344         starttime = startx
345         
346         tics = getTimeTicString(starttime, stoptime, ticstep)
347         
348         startdate = time.strftime("%b %m, %Y - %H:%M", time.localtime(startx))
349         stopdate = time.strftime("%H:%M", time.localtime(stopx))
350         
351         tmpfile.write("""
352         set term png
353         set output "%(plots_path)s/fill_empty.png"
354         
355         set title "Number of Nodes / Time - %(startdate)s to %(stopdate)s"
356         set xlabel "Time"
357         set ylabel "Number of Nodes"
358         
359         set xtics (%(tics)s)
360         set xrange[%(startx)d:%(stopx)d]
361         set yrange[0:950]
362         
363         plot "%(all_nodes_file_name)s" u 1:2 w lines title "Total Nodes", \
364                 "%(nodes_in_slice_file_name)s" u 1:2 w lines title "Nodes in Slice", \
365                 "%(nodes_good_comon_file_name)s" u 1:2 w lines title \
366                         "Healthy Nodes (according to CoMon)", \
367                 "%(nodes_can_ssh_file_name)s" u 1:2 w lines title "Nodes Reachable by SSH"
368         
369         """ % locals())
370         
371         tmpfile.close()
372         
373         os.system("%s %s" % (gnuplot_path, tmpfilename))
374         
375         if os.path.exists(tmpfilename):
376                 os.unlink(tmpfilename)
377
378
379
380 config = Config(options)
381 sleep_time = 900
382
383 if config.slice == 'root':
384
385     if config.verbose:
386         print "Logging in as root"
387 else:
388     # set up slice and add it to nodes
389     init_slice(config)
390    
391     if config.verbose:
392         print "Waiting %(sleep_time)d seconds for nodes to update" % locals()    
393     # wait nodes to get the data
394     sleep(sleep_time)           
395
396 # gather data
397 count_nodes_can_ssh(config)     
398 count_nodes_by_api(config)
399 count_nodes_good_by_comon(config)
400     
401 # update plots
402 #plot_fill_empty()
403 #os.system("cp plots/*.png ~/public_html/planetlab/tests")                              
404
405 # clean up
406 empty_slice(config)             
407