Initial checkin of node test app
[tests.git] / node_ssh / nst.py
1 #!/usr/bin/python
2
3 import time, sys, urllib, os, tempfile, random
4 import xmlrpclib
5 from optparse import OptionParser
6 from getpass import getpass
7 from time import sleep
8
9 parser = OptionParser()
10 parser.add_option("-c", "--config", action="store", dest="config",  help="Path to alternate config file")
11 parser.add_option("-x", "--url", action="store", dest="url", help = "API URL")
12 parser.add_option("-s", "--slice", action="store", dest="slice", help = "Name of slice to use")
13 parser.add_option("-n", "--nodes", action="store", dest="nodes", help = "File that contains a list of nodes to try to access")
14 parser.add_option("-k", "--key", action="store", dest="key", help = "Path to alternate public key")
15 parser.add_option("-u", "--user", action="store", dest="user", help = "API user name")
16 parser.add_option("-p", "--password", action="store", dest="password", help = "API password") 
17 parser.add_option("-v", "--verbose", action="store_true",  dest="verbose", help="Be verbose (default: %default)")
18 (options, args) = parser.parse_args()
19
20 # If user is specified but password is not
21 if options.user is not None and options.password is None:
22     try:
23         options.password = getpass()
24     except (EOFError, KeyboardInterrupt):
25         print
26         sys.exit(0)
27
28 class Config:
29     
30     def __init__(self, options):
31         
32         # if options are specified use them
33         # otherwise use options from config file
34         if options.config: config_file = options.config
35         else: config_file = '/usr/share/planetlab/tests/nst/nst_config'
36         
37         try:
38             execfile(config_file, self.__dict__)
39         except:
40             raise "Could not find nst config in " + config_file
41
42         if options.url: self.NST_API_SERVER = options.url
43         if options.slice: self.NST_SLICE = options.slice
44         if options.key: self.NST_KEY_PATH = options.key
45         if options.user: self.NST_USER = options.user
46         if options.password: self.NST_PASSWORD = options.password
47         if options.nodes: self.NST_NODES = options.nodes
48         else: self.NST_NODES = None
49
50         self.api = xmlrpclib.Server(self.NST_API_SERVER)
51         self.auth = {}
52         self.auth['Username'] = self.NST_USER
53         self.auth['AuthString'] = self.NST_PASSWORD
54         self.auth['AuthMethod'] = 'password'
55         self.key = self.NST_KEY_PATH
56         self.slice = self.NST_SLICE
57         self.nodes = self.NST_NODES
58         self.multiquery_path = self.NST_MULTIQUERY_PATH
59         self.verbose = options.verbose  
60         
61         self.data_path = '/usr/share/planetlab/tests/nst/data/'
62         self.plots_path = '/usr/share/planetlab/tests/nst/plots/'       
63         
64
65 # get formatted tic string for gnuplot
66 def getTimeTicString(t1, t2, step):
67         first_hour = list(time.localtime(t1))
68         if not first_hour[4] == first_hour[5] == 0:
69                 first_hour[4] = 0
70                 first_hour[5] = 0
71         
72         first_hour_time = int(time.mktime(first_hour))
73         first_hour_time += 3600
74         
75         backsteps = (first_hour_time - t1)
76         backsteps /= step
77         start = first_hour_time - backsteps * step
78         
79         tics = []
80         thistime = start
81         while thistime < t2:
82                 tics.append("\"%s\" %d" % \
83                         (time.strftime("%H:%M", time.localtime(thistime)), thistime))
84                 thistime += step
85         
86         ticstr = ", ".join(tics)
87         return ticstr
88
89
90 # count total number of nodes in PlanetLab, according to the api
91 # count total number  of nodes in slice, according to the api 
92 def count_nodes_by_api(config):
93
94         # count all nodes       
95         all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id', 'slice_ids'])]
96         all_nodes_output = "%d\t%d" % (round(time.time()), len(all_nodes))
97
98         # count all nodes in slice
99         if config.slice == 'root':
100             nodes_in_slice = all_nodes
101             nodes_in_slice_output = all_nodes_output
102         else:
103             slice_id =config.api.GetSlices(config.auth, {'name': config.slice}, ['slice_id'])[0]['slice_id']
104             nodes_in_slice = [row['node_id'] for row in \
105                               all_nodes if slice_id in row['slice_ids']]
106             nodes_in_slice_output =  "%d\t%d" % (round(time.time()), len(nodes_in_slice))
107
108         # write result to datafiles
109         all_nodes_file_name = config.data_path + os.sep + "nodes" 
110         all_nodes_file = open(all_nodes_file_name, 'w')
111         all_nodes_file.write(all_nodes_output)
112         all_nodes_file.close()
113         
114         nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
115         nodes_in_slice_file = open(nodes_in_slice_file_name, 'w')
116         nodes_in_slice_file.write(nodes_in_slice_output)
117         nodes_in_slice_file.close()
118         
119         if config.verbose:
120             print "all node: " + all_nodes_output
121             print "nodes in slice: " + nodes_in_slice_output
122                 
123
124 # count total number of "good" nodes, according to CoMon
125 def count_nodes_good_by_comon(config):
126         
127         
128         comon = urllib.urlopen("http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=nameonly&select='resptime%20%3E%200%20&&%20((drift%20%3E%201m%20||%20(dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080)%20||%20gbfree%20%3C%205%20||%20sshstatus%20%3E%202h)%20==%200)'")
129         good_nodes = comon.readlines()
130
131         comon_output =  "%d\t%d" % (round(time.time()), len(good_nodes))
132         nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
133         nodes_good_comon_file = open(nodes_good_comon_file_name, 'a')
134         nodes_good_comon_file.write(comon_output)
135         nodes_good_comon_file.close()
136         
137         if config.verbose:
138             print "comon: " + comon_output 
139         
140 # count total number of nodes reachable by ssh
141 def count_nodes_can_ssh(config):
142
143         api = config.api
144         slice = config.slice
145         key = config.key
146         verbose = config.verbose
147         auth = config.auth
148         nodes = config.nodes
149         multiquery_path = config.multiquery_path
150
151         if verbose:
152             verbose_text = ""
153             print "Creating list of nodes to ssh to"
154         else:
155             verbose_text = ">/dev/null 2>&1"
156         
157         # creaet node dict
158         all_nodes = api.GetNodes(auth, {}, ['hostname', 'boot_state', 'last_updated'])
159         node_dict = {}
160         for node in all_nodes:
161             node_dict[node['hostname']] = node
162
163         # create node list
164         if nodes:
165             nodes_file = open(nodes, 'r')
166             nodes_filename = nodes_file.name
167             lines = nodes_file.readlines()
168             node_list = [node.replace('\n', '') for node in lines]
169             nodes_file.close()
170             
171         else:
172             node_list = node_dict.keys()
173             nodes_filename = tempfile.mktemp()
174             nodes_file = open(nodes_filename, 'w')
175             nodes_file.write_lines(node_list)
176             nodes_file.close()
177         
178         # creaet node dict
179         node_dict = {}
180         for node in all_nodes:
181             node_dict[node['hostname']] = node
182
183         private_key = key.split(".pub")[0] 
184         
185         # create ssh command
186         if verbose:
187             print "Attemptng to ssh to nodes in " + nodes_filename
188
189         ssh_filename = tempfile.mktemp()
190         ssh_file = open(ssh_filename, 'w')
191         ssh_file.write("""
192         export PATH=$PATH:%(multiquery_path)s
193         export MQ_SLICE="%(slice)s"
194         export MQ_NODES="%(nodes_filename)s"
195
196         eval `ssh-agent` >/dev/null 2>&1
197         trap "kill $SSH_AGENT_PID" 0
198         ssh-add %(private_key)s >/dev/null 2>&1 
199         
200         multiquery 'hostname' 2>/dev/null |
201         grep "bytes" | 
202         grep -v ": 0 bytes"             
203         """ % locals())
204         ssh_file.close()
205         ssh_results = os.popen("python /tmp/test").readlines()
206         ssh_result = eval(ssh_results[0].replace('\\n', '')) 
207
208         # create a list of hostname out of results that are not empty
209         good_nodes = []
210         for result in ssh_result:
211             if result.find("bytes") > -1:
212                 result_parts = result.split(":")
213                 hostname = result_parts[0]
214                 good_nodes.append(hostname)
215
216         # count number of node we can ssh into
217         ssh_count = len(good_nodes)
218         
219         # determine whince nodes are dead:
220         dead_nodes = set(node_list).difference(good_nodes)
221         
222         # write dead nodes to file
223         curr_time = round(time.time())
224         dead_node_count_output = "%d\t%d" % (curr_time, len(dead_nodes))
225         dead_nodes_file_name = config.data_path + os.sep + "dead_nodes"
226         dead_nodes_file = open(dead_nodes_file_name, 'a')
227         for hostname in dead_nodes:
228             boot_state = node_dict[hostname]['boot_state']
229             last_updated = 0
230             if node_dict[hostname]['last_updated']: 
231                 last_updated = node_dict[hostname]['last_updated'] 
232             dead_nodes_file.write("%(curr_time)d\t%(hostname)s\t%(boot_state)s\t%(last_updated)d\n" % \
233                                   locals())     
234         dead_nodes_file.close() 
235                 
236         # write good node count 
237         ssh_result_output =  "%d\t%d" % (round(time.time()), ssh_count)
238         nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
239         nodes_can_ssh_file = open(nodes_can_ssh_file_name, 'a')
240         nodes_can_ssh_file.write(ssh_result_output)
241         nodes_can_ssh_file.close()
242         
243         if verbose:
244             print "nodes that can ssh: " + ssh_result_output
245             print "dead nodes: " + dead_node_count_output   
246         
247         
248 # remove all nodes from a slice
249 def empty_slice(config):
250
251         if config.verbose:
252             print "Removing %s from all nodes" % config.slice
253
254         all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id'])]
255         config.api.DeleteSliceFromNodes(config.auth, slice, all_nodes)
256
257         
258 # add slice to all nodes. 
259 # make sure users key is up to date   
260 def init_slice(config):
261
262     # make sure slice exists
263     slices = config.api.GetSlices(config.auth, [config.slice], \
264                                   ['slice_id', 'name', 'person_ids'])
265     if not slices:
266         raise "No such slice %s" % config.slice
267     slice = slices[0]
268
269     # make sure user is in slice
270     person = config.api.GetPersons(config.auth, config.auth['Username'], \
271                                    ['person_id', 'email', 'slice_ids', 'key_ids'])[0]
272     if slice['slice_id'] not in person['slice_ids']:
273         raise "%s not in %s slice. Must be added first" % \
274               (person['email'], slice['name'])
275          
276     # make sure user key is up to date  
277     current_key = open(key_path, 'r').readline().strip()
278     if len(current_key) == 0:
279         raise "Key cannot be empty" 
280
281     keys = config.api.GetKeys(auth, person['key_ids'])
282     if not keys:
283         if config.verbose:
284             print "Adding new key " + key_path
285         config.api.AddPersonKey(config.auth, person['person_id'], \
286                                 {'key_type': 'ssh', 'key': current_key})
287
288     elif not filter(lambda k: k['key'] == current_key, keys):
289         if config.verbose:
290             print "%s was modified or is new. Updating PLC"
291         old_key = keys[0]
292         config.api.UpdateKey(config.auth, old_key['key_id'], \
293                              {'key': current_key})
294
295     # add slice to all nodes                    
296     if config.verbose:
297         print "Adding %s to all nodes" + slice
298     all_nodes = [row['node_id'] for row in \
299                  config.api.GetNodes(config.auth, {}, ['node_id'])]
300     config.api.AddSliceToNodes(config.auth, config.slice, all_nodes)
301         
302         
303 # create the fill/empty plot
304 def plot_fill_empty():
305         #ticstep = 3600 # 1 hour
306         #plotlength = 36000 # 10 hours
307         ticstep = 1800
308         plotlength = 10800
309
310         all_nodes_file_name = config.data_path + os.sep + "nodes"       
311         nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
312         nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
313         nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
314         
315         tmpfilename = tempfile.mktemp()
316         tmpfile = open(tmpfilename, 'w')
317         
318         starttime = -1
319         stoptime = -1
320         for datafilename in [all_nodes_file_name,
321                              nodes_in_slice_file_name, \
322                              nodes_can_ssh_file_name, \
323                              nodes_good_comon_file_name]: 
324                 datafile = open(datafilename, 'r')
325                 line1 = datafile.readline()
326                 datafile.seek(-32,2)
327                 line2 = datafile.readlines().pop()
328                 thisstarttime = int(line1.split("\t")[0])
329                 if starttime == -1 or thisstarttime < starttime:
330                         starttime = thisstarttime
331                 thisstoptime = int(line2.split("\t")[0])
332                 if stoptime == -1 or thisstoptime > stoptime:
333                         stoptime = thisstoptime
334         
335         stopx = stoptime
336         startx = max(starttime, stopx - plotlength)
337         starttime = startx
338         
339         tics = getTimeTicString(starttime, stoptime, ticstep)
340         
341         startdate = time.strftime("%b %m, %Y - %H:%M", time.localtime(startx))
342         stopdate = time.strftime("%H:%M", time.localtime(stopx))
343         
344         tmpfile.write("""
345         set term png
346         set output "%(plots_path)s/fill_empty.png"
347         
348         set title "Number of Nodes / Time - %(startdate)s to %(stopdate)s"
349         set xlabel "Time"
350         set ylabel "Number of Nodes"
351         
352         set xtics (%(tics)s)
353         set xrange[%(startx)d:%(stopx)d]
354         set yrange[0:950]
355         
356         plot "%(all_nodes_file_name)s" u 1:2 w lines title "Total Nodes", \
357                 "%(nodes_in_slice_file_name)s" u 1:2 w lines title "Nodes in Slice", \
358                 "%(nodes_good_comon_file_name)s" u 1:2 w lines title \
359                         "Healthy Nodes (according to CoMon)", \
360                 "%(nodes_can_ssh_file_name)s" u 1:2 w lines title "Nodes Reachable by SSH"
361         
362         """ % locals())
363         
364         tmpfile.close()
365         
366         os.system("%s %s" % (gnuplot_path, tmpfilename))
367         
368         if os.path.exists(tmpfilename):
369                 os.unlink(tmpfilename)
370
371
372
373 config = Config(options)
374
375
376 if config.slice == 'root':
377
378     if config.verbose:
379         print "Logging in as root"
380 else:
381     # set up slice and add it to nodes
382     init_slice(config)
383     
384     # wait 15 mins for nodes to get the data
385     sleep(900)          
386
387 # gather data
388 count_nodes_can_ssh(config)     
389 count_nodes_by_api(config)
390 count_nodes_good_by_comon(config)
391     
392 # update plots
393 plot_fill_empty()
394 #os.system("cp plots/*.png ~/public_html/planetlab/tests")                              
395
396 # clean up
397 empty_slice(config)             
398