--- /dev/null
+#!/usr/bin/python
+
+import time, sys, urllib, os, tempfile, random
+import xmlrpclib
+from optparse import OptionParser
+from getpass import getpass
+from time import sleep
+
+parser = OptionParser()
+parser.add_option("-c", "--config", action="store", dest="config", help="Path to alternate config file")
+parser.add_option("-x", "--url", action="store", dest="url", help = "API URL")
+parser.add_option("-s", "--slice", action="store", dest="slice", help = "Name of slice to use")
+parser.add_option("-n", "--nodes", action="store", dest="nodes", help = "File that contains a list of nodes to try to access")
+parser.add_option("-k", "--key", action="store", dest="key", help = "Path to alternate public key")
+parser.add_option("-u", "--user", action="store", dest="user", help = "API user name")
+parser.add_option("-p", "--password", action="store", dest="password", help = "API password")
+parser.add_option("-v", "--verbose", action="store_true", dest="verbose", help="Be verbose (default: %default)")
+(options, args) = parser.parse_args()
+
+# If user is specified but password is not
+if options.user is not None and options.password is None:
+ try:
+ options.password = getpass()
+ except (EOFError, KeyboardInterrupt):
+ print
+ sys.exit(0)
+
+class Config:
+
+ def __init__(self, options):
+
+ # if options are specified use them
+ # otherwise use options from config file
+ if options.config: config_file = options.config
+ else: config_file = '/usr/share/planetlab/tests/nst/nst_config'
+
+ try:
+ execfile(config_file, self.__dict__)
+ except:
+ raise "Could not find nst config in " + config_file
+
+ if options.url: self.NST_API_SERVER = options.url
+ if options.slice: self.NST_SLICE = options.slice
+ if options.key: self.NST_KEY_PATH = options.key
+ if options.user: self.NST_USER = options.user
+ if options.password: self.NST_PASSWORD = options.password
+ if options.nodes: self.NST_NODES = options.nodes
+ else: self.NST_NODES = None
+
+ self.api = xmlrpclib.Server(self.NST_API_SERVER)
+ self.auth = {}
+ self.auth['Username'] = self.NST_USER
+ self.auth['AuthString'] = self.NST_PASSWORD
+ self.auth['AuthMethod'] = 'password'
+ self.key = self.NST_KEY_PATH
+ self.slice = self.NST_SLICE
+ self.nodes = self.NST_NODES
+ self.multiquery_path = self.NST_MULTIQUERY_PATH
+ self.verbose = options.verbose
+
+ self.data_path = '/usr/share/planetlab/tests/nst/data/'
+ self.plots_path = '/usr/share/planetlab/tests/nst/plots/'
+
+
+# get formatted tic string for gnuplot
+def getTimeTicString(t1, t2, step):
+ first_hour = list(time.localtime(t1))
+ if not first_hour[4] == first_hour[5] == 0:
+ first_hour[4] = 0
+ first_hour[5] = 0
+
+ first_hour_time = int(time.mktime(first_hour))
+ first_hour_time += 3600
+
+ backsteps = (first_hour_time - t1)
+ backsteps /= step
+ start = first_hour_time - backsteps * step
+
+ tics = []
+ thistime = start
+ while thistime < t2:
+ tics.append("\"%s\" %d" % \
+ (time.strftime("%H:%M", time.localtime(thistime)), thistime))
+ thistime += step
+
+ ticstr = ", ".join(tics)
+ return ticstr
+
+
+# count total number of nodes in PlanetLab, according to the api
+# count total number of nodes in slice, according to the api
+def count_nodes_by_api(config):
+
+ # count all nodes
+ all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id', 'slice_ids'])]
+ all_nodes_output = "%d\t%d" % (round(time.time()), len(all_nodes))
+
+ # count all nodes in slice
+ if config.slice == 'root':
+ nodes_in_slice = all_nodes
+ nodes_in_slice_output = all_nodes_output
+ else:
+ slice_id =config.api.GetSlices(config.auth, {'name': config.slice}, ['slice_id'])[0]['slice_id']
+ nodes_in_slice = [row['node_id'] for row in \
+ all_nodes if slice_id in row['slice_ids']]
+ nodes_in_slice_output = "%d\t%d" % (round(time.time()), len(nodes_in_slice))
+
+ # write result to datafiles
+ all_nodes_file_name = config.data_path + os.sep + "nodes"
+ all_nodes_file = open(all_nodes_file_name, 'w')
+ all_nodes_file.write(all_nodes_output)
+ all_nodes_file.close()
+
+ nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
+ nodes_in_slice_file = open(nodes_in_slice_file_name, 'w')
+ nodes_in_slice_file.write(nodes_in_slice_output)
+ nodes_in_slice_file.close()
+
+ if config.verbose:
+ print "all node: " + all_nodes_output
+ print "nodes in slice: " + nodes_in_slice_output
+
+
+# count total number of "good" nodes, according to CoMon
+def count_nodes_good_by_comon(config):
+
+
+ comon = urllib.urlopen("http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=nameonly&select='resptime%20%3E%200%20&&%20((drift%20%3E%201m%20||%20(dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080)%20||%20gbfree%20%3C%205%20||%20sshstatus%20%3E%202h)%20==%200)'")
+ good_nodes = comon.readlines()
+
+ comon_output = "%d\t%d" % (round(time.time()), len(good_nodes))
+ nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
+ nodes_good_comon_file = open(nodes_good_comon_file_name, 'a')
+ nodes_good_comon_file.write(comon_output)
+ nodes_good_comon_file.close()
+
+ if config.verbose:
+ print "comon: " + comon_output
+
+# count total number of nodes reachable by ssh
+def count_nodes_can_ssh(config):
+
+ api = config.api
+ slice = config.slice
+ key = config.key
+ verbose = config.verbose
+ auth = config.auth
+ nodes = config.nodes
+ multiquery_path = config.multiquery_path
+
+ if verbose:
+ verbose_text = ""
+ print "Creating list of nodes to ssh to"
+ else:
+ verbose_text = ">/dev/null 2>&1"
+
+ # creaet node dict
+ all_nodes = api.GetNodes(auth, {}, ['hostname', 'boot_state', 'last_updated'])
+ node_dict = {}
+ for node in all_nodes:
+ node_dict[node['hostname']] = node
+
+ # create node list
+ if nodes:
+ nodes_file = open(nodes, 'r')
+ nodes_filename = nodes_file.name
+ lines = nodes_file.readlines()
+ node_list = [node.replace('\n', '') for node in lines]
+ nodes_file.close()
+
+ else:
+ node_list = node_dict.keys()
+ nodes_filename = tempfile.mktemp()
+ nodes_file = open(nodes_filename, 'w')
+ nodes_file.write_lines(node_list)
+ nodes_file.close()
+
+ # creaet node dict
+ node_dict = {}
+ for node in all_nodes:
+ node_dict[node['hostname']] = node
+
+ private_key = key.split(".pub")[0]
+
+ # create ssh command
+ if verbose:
+ print "Attemptng to ssh to nodes in " + nodes_filename
+
+ ssh_filename = tempfile.mktemp()
+ ssh_file = open(ssh_filename, 'w')
+ ssh_file.write("""
+ export PATH=$PATH:%(multiquery_path)s
+ export MQ_SLICE="%(slice)s"
+ export MQ_NODES="%(nodes_filename)s"
+
+ eval `ssh-agent` >/dev/null 2>&1
+ trap "kill $SSH_AGENT_PID" 0
+ ssh-add %(private_key)s >/dev/null 2>&1
+
+ multiquery 'hostname' 2>/dev/null |
+ grep "bytes" |
+ grep -v ": 0 bytes"
+ """ % locals())
+ ssh_file.close()
+ ssh_results = os.popen("python /tmp/test").readlines()
+ ssh_result = eval(ssh_results[0].replace('\\n', ''))
+
+ # create a list of hostname out of results that are not empty
+ good_nodes = []
+ for result in ssh_result:
+ if result.find("bytes") > -1:
+ result_parts = result.split(":")
+ hostname = result_parts[0]
+ good_nodes.append(hostname)
+
+ # count number of node we can ssh into
+ ssh_count = len(good_nodes)
+
+ # determine whince nodes are dead:
+ dead_nodes = set(node_list).difference(good_nodes)
+
+ # write dead nodes to file
+ curr_time = round(time.time())
+ dead_node_count_output = "%d\t%d" % (curr_time, len(dead_nodes))
+ dead_nodes_file_name = config.data_path + os.sep + "dead_nodes"
+ dead_nodes_file = open(dead_nodes_file_name, 'a')
+ for hostname in dead_nodes:
+ boot_state = node_dict[hostname]['boot_state']
+ last_updated = 0
+ if node_dict[hostname]['last_updated']:
+ last_updated = node_dict[hostname]['last_updated']
+ dead_nodes_file.write("%(curr_time)d\t%(hostname)s\t%(boot_state)s\t%(last_updated)d\n" % \
+ locals())
+ dead_nodes_file.close()
+
+ # write good node count
+ ssh_result_output = "%d\t%d" % (round(time.time()), ssh_count)
+ nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
+ nodes_can_ssh_file = open(nodes_can_ssh_file_name, 'a')
+ nodes_can_ssh_file.write(ssh_result_output)
+ nodes_can_ssh_file.close()
+
+ if verbose:
+ print "nodes that can ssh: " + ssh_result_output
+ print "dead nodes: " + dead_node_count_output
+
+
+# remove all nodes from a slice
+def empty_slice(config):
+
+ if config.verbose:
+ print "Removing %s from all nodes" % config.slice
+
+ all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id'])]
+ config.api.DeleteSliceFromNodes(config.auth, slice, all_nodes)
+
+
+# add slice to all nodes.
+# make sure users key is up to date
+def init_slice(config):
+
+ # make sure slice exists
+ slices = config.api.GetSlices(config.auth, [config.slice], \
+ ['slice_id', 'name', 'person_ids'])
+ if not slices:
+ raise "No such slice %s" % config.slice
+ slice = slices[0]
+
+ # make sure user is in slice
+ person = config.api.GetPersons(config.auth, config.auth['Username'], \
+ ['person_id', 'email', 'slice_ids', 'key_ids'])[0]
+ if slice['slice_id'] not in person['slice_ids']:
+ raise "%s not in %s slice. Must be added first" % \
+ (person['email'], slice['name'])
+
+ # make sure user key is up to date
+ current_key = open(key_path, 'r').readline().strip()
+ if len(current_key) == 0:
+ raise "Key cannot be empty"
+
+ keys = config.api.GetKeys(auth, person['key_ids'])
+ if not keys:
+ if config.verbose:
+ print "Adding new key " + key_path
+ config.api.AddPersonKey(config.auth, person['person_id'], \
+ {'key_type': 'ssh', 'key': current_key})
+
+ elif not filter(lambda k: k['key'] == current_key, keys):
+ if config.verbose:
+ print "%s was modified or is new. Updating PLC"
+ old_key = keys[0]
+ config.api.UpdateKey(config.auth, old_key['key_id'], \
+ {'key': current_key})
+
+ # add slice to all nodes
+ if config.verbose:
+ print "Adding %s to all nodes" + slice
+ all_nodes = [row['node_id'] for row in \
+ config.api.GetNodes(config.auth, {}, ['node_id'])]
+ config.api.AddSliceToNodes(config.auth, config.slice, all_nodes)
+
+
+# create the fill/empty plot
+def plot_fill_empty():
+ #ticstep = 3600 # 1 hour
+ #plotlength = 36000 # 10 hours
+ ticstep = 1800
+ plotlength = 10800
+
+ all_nodes_file_name = config.data_path + os.sep + "nodes"
+ nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
+ nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
+ nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
+
+ tmpfilename = tempfile.mktemp()
+ tmpfile = open(tmpfilename, 'w')
+
+ starttime = -1
+ stoptime = -1
+ for datafilename in [all_nodes_file_name,
+ nodes_in_slice_file_name, \
+ nodes_can_ssh_file_name, \
+ nodes_good_comon_file_name]:
+ datafile = open(datafilename, 'r')
+ line1 = datafile.readline()
+ datafile.seek(-32,2)
+ line2 = datafile.readlines().pop()
+ thisstarttime = int(line1.split("\t")[0])
+ if starttime == -1 or thisstarttime < starttime:
+ starttime = thisstarttime
+ thisstoptime = int(line2.split("\t")[0])
+ if stoptime == -1 or thisstoptime > stoptime:
+ stoptime = thisstoptime
+
+ stopx = stoptime
+ startx = max(starttime, stopx - plotlength)
+ starttime = startx
+
+ tics = getTimeTicString(starttime, stoptime, ticstep)
+
+ startdate = time.strftime("%b %m, %Y - %H:%M", time.localtime(startx))
+ stopdate = time.strftime("%H:%M", time.localtime(stopx))
+
+ tmpfile.write("""
+ set term png
+ set output "%(plots_path)s/fill_empty.png"
+
+ set title "Number of Nodes / Time - %(startdate)s to %(stopdate)s"
+ set xlabel "Time"
+ set ylabel "Number of Nodes"
+
+ set xtics (%(tics)s)
+ set xrange[%(startx)d:%(stopx)d]
+ set yrange[0:950]
+
+ plot "%(all_nodes_file_name)s" u 1:2 w lines title "Total Nodes", \
+ "%(nodes_in_slice_file_name)s" u 1:2 w lines title "Nodes in Slice", \
+ "%(nodes_good_comon_file_name)s" u 1:2 w lines title \
+ "Healthy Nodes (according to CoMon)", \
+ "%(nodes_can_ssh_file_name)s" u 1:2 w lines title "Nodes Reachable by SSH"
+
+ """ % locals())
+
+ tmpfile.close()
+
+ os.system("%s %s" % (gnuplot_path, tmpfilename))
+
+ if os.path.exists(tmpfilename):
+ os.unlink(tmpfilename)
+
+
+
+config = Config(options)
+
+
+if config.slice == 'root':
+
+ if config.verbose:
+ print "Logging in as root"
+else:
+ # set up slice and add it to nodes
+ init_slice(config)
+
+ # wait 15 mins for nodes to get the data
+ sleep(900)
+
+# gather data
+count_nodes_can_ssh(config)
+count_nodes_by_api(config)
+count_nodes_good_by_comon(config)
+
+# update plots
+plot_fill_empty()
+#os.system("cp plots/*.png ~/public_html/planetlab/tests")
+
+# clean up
+empty_slice(config)
+