From 264368f250e095e2a2bfd781210b86cf972e6117 Mon Sep 17 00:00:00 2001 From: Tony Mack Date: Fri, 7 Dec 2007 15:06:59 +0000 Subject: [PATCH] Initial checkin of node test app --- node_ssh/Makefile | 39 +++++ node_ssh/nst.cron | 6 + node_ssh/nst.py | 398 ++++++++++++++++++++++++++++++++++++++++++++ node_ssh/nst_config | 14 ++ 4 files changed, 457 insertions(+) create mode 100644 node_ssh/Makefile create mode 100644 node_ssh/nst.cron create mode 100644 node_ssh/nst.py create mode 100644 node_ssh/nst_config diff --git a/node_ssh/Makefile b/node_ssh/Makefile new file mode 100644 index 0000000..3d1cac9 --- /dev/null +++ b/node_ssh/Makefile @@ -0,0 +1,39 @@ +CODE = nst.py + +CONFIG = nst_config + +CRON = nst.cron + +CROND = /etc/init.d/crond + +APPS = nst + +datadir := /usr/share +appdir := /planetlab/tests/nst + +all: $(APPS) + +nst: $(CODE) $(CONFIG) $(CRON) data + install -D -m 755 nst.py $(datadir)/$(appdir)/$(CODE) + install -D -m 750 nst_config $(datadir)/$(appdir)/$(CONFIG) + install -D -m 755 nst.cron /etc/cron.d/$(CRON) + + # create symlink to executable + ln -s $(datadir)/$(appdir)/$(CODE) /usr/bin/nst + + # reload cron + $(CROND) reload + + +data: + install -d -m 755 $(datadir)/$(appdir)/keys/ + install -d -m 755 $(datadir)/$(appdir)/plots/ + install -d -m 755 $(datadir)/$(appdir)/data/ +clean: + rm -Rf $(datadir)/$(appdir) + rm -rf /etc/cron.d/$(CRON) + rm -rf /usr/bin/nst + + $(CROND) reload + +.PHONY: clean data diff --git a/node_ssh/nst.cron b/node_ssh/nst.cron new file mode 100644 index 0000000..00ee599 --- /dev/null +++ b/node_ssh/nst.cron @@ -0,0 +1,6 @@ +NTS_PATH = "/var/planetlab/test/nts" + +*/2 * * * * cd $NTS_PATH; python nts.py; + + + diff --git a/node_ssh/nst.py b/node_ssh/nst.py new file mode 100644 index 0000000..c262079 --- /dev/null +++ b/node_ssh/nst.py @@ -0,0 +1,398 @@ +#!/usr/bin/python + +import time, sys, urllib, os, tempfile, random +import xmlrpclib +from optparse import OptionParser +from getpass import getpass +from time import sleep + +parser = OptionParser() +parser.add_option("-c", "--config", action="store", dest="config", help="Path to alternate config file") +parser.add_option("-x", "--url", action="store", dest="url", help = "API URL") +parser.add_option("-s", "--slice", action="store", dest="slice", help = "Name of slice to use") +parser.add_option("-n", "--nodes", action="store", dest="nodes", help = "File that contains a list of nodes to try to access") +parser.add_option("-k", "--key", action="store", dest="key", help = "Path to alternate public key") +parser.add_option("-u", "--user", action="store", dest="user", help = "API user name") +parser.add_option("-p", "--password", action="store", dest="password", help = "API password") +parser.add_option("-v", "--verbose", action="store_true", dest="verbose", help="Be verbose (default: %default)") +(options, args) = parser.parse_args() + +# If user is specified but password is not +if options.user is not None and options.password is None: + try: + options.password = getpass() + except (EOFError, KeyboardInterrupt): + print + sys.exit(0) + +class Config: + + def __init__(self, options): + + # if options are specified use them + # otherwise use options from config file + if options.config: config_file = options.config + else: config_file = '/usr/share/planetlab/tests/nst/nst_config' + + try: + execfile(config_file, self.__dict__) + except: + raise "Could not find nst config in " + config_file + + if options.url: self.NST_API_SERVER = options.url + if options.slice: self.NST_SLICE = options.slice + if options.key: self.NST_KEY_PATH = options.key + if options.user: self.NST_USER = options.user + if options.password: self.NST_PASSWORD = options.password + if options.nodes: self.NST_NODES = options.nodes + else: self.NST_NODES = None + + self.api = xmlrpclib.Server(self.NST_API_SERVER) + self.auth = {} + self.auth['Username'] = self.NST_USER + self.auth['AuthString'] = self.NST_PASSWORD + self.auth['AuthMethod'] = 'password' + self.key = self.NST_KEY_PATH + self.slice = self.NST_SLICE + self.nodes = self.NST_NODES + self.multiquery_path = self.NST_MULTIQUERY_PATH + self.verbose = options.verbose + + self.data_path = '/usr/share/planetlab/tests/nst/data/' + self.plots_path = '/usr/share/planetlab/tests/nst/plots/' + + +# get formatted tic string for gnuplot +def getTimeTicString(t1, t2, step): + first_hour = list(time.localtime(t1)) + if not first_hour[4] == first_hour[5] == 0: + first_hour[4] = 0 + first_hour[5] = 0 + + first_hour_time = int(time.mktime(first_hour)) + first_hour_time += 3600 + + backsteps = (first_hour_time - t1) + backsteps /= step + start = first_hour_time - backsteps * step + + tics = [] + thistime = start + while thistime < t2: + tics.append("\"%s\" %d" % \ + (time.strftime("%H:%M", time.localtime(thistime)), thistime)) + thistime += step + + ticstr = ", ".join(tics) + return ticstr + + +# count total number of nodes in PlanetLab, according to the api +# count total number of nodes in slice, according to the api +def count_nodes_by_api(config): + + # count all nodes + all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id', 'slice_ids'])] + all_nodes_output = "%d\t%d" % (round(time.time()), len(all_nodes)) + + # count all nodes in slice + if config.slice == 'root': + nodes_in_slice = all_nodes + nodes_in_slice_output = all_nodes_output + else: + slice_id =config.api.GetSlices(config.auth, {'name': config.slice}, ['slice_id'])[0]['slice_id'] + nodes_in_slice = [row['node_id'] for row in \ + all_nodes if slice_id in row['slice_ids']] + nodes_in_slice_output = "%d\t%d" % (round(time.time()), len(nodes_in_slice)) + + # write result to datafiles + all_nodes_file_name = config.data_path + os.sep + "nodes" + all_nodes_file = open(all_nodes_file_name, 'w') + all_nodes_file.write(all_nodes_output) + all_nodes_file.close() + + nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice" + nodes_in_slice_file = open(nodes_in_slice_file_name, 'w') + nodes_in_slice_file.write(nodes_in_slice_output) + nodes_in_slice_file.close() + + if config.verbose: + print "all node: " + all_nodes_output + print "nodes in slice: " + nodes_in_slice_output + + +# count total number of "good" nodes, according to CoMon +def count_nodes_good_by_comon(config): + + + comon = urllib.urlopen("http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=nameonly&select='resptime%20%3E%200%20&&%20((drift%20%3E%201m%20||%20(dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080)%20||%20gbfree%20%3C%205%20||%20sshstatus%20%3E%202h)%20==%200)'") + good_nodes = comon.readlines() + + comon_output = "%d\t%d" % (round(time.time()), len(good_nodes)) + nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good" + nodes_good_comon_file = open(nodes_good_comon_file_name, 'a') + nodes_good_comon_file.write(comon_output) + nodes_good_comon_file.close() + + if config.verbose: + print "comon: " + comon_output + +# count total number of nodes reachable by ssh +def count_nodes_can_ssh(config): + + api = config.api + slice = config.slice + key = config.key + verbose = config.verbose + auth = config.auth + nodes = config.nodes + multiquery_path = config.multiquery_path + + if verbose: + verbose_text = "" + print "Creating list of nodes to ssh to" + else: + verbose_text = ">/dev/null 2>&1" + + # creaet node dict + all_nodes = api.GetNodes(auth, {}, ['hostname', 'boot_state', 'last_updated']) + node_dict = {} + for node in all_nodes: + node_dict[node['hostname']] = node + + # create node list + if nodes: + nodes_file = open(nodes, 'r') + nodes_filename = nodes_file.name + lines = nodes_file.readlines() + node_list = [node.replace('\n', '') for node in lines] + nodes_file.close() + + else: + node_list = node_dict.keys() + nodes_filename = tempfile.mktemp() + nodes_file = open(nodes_filename, 'w') + nodes_file.write_lines(node_list) + nodes_file.close() + + # creaet node dict + node_dict = {} + for node in all_nodes: + node_dict[node['hostname']] = node + + private_key = key.split(".pub")[0] + + # create ssh command + if verbose: + print "Attemptng to ssh to nodes in " + nodes_filename + + ssh_filename = tempfile.mktemp() + ssh_file = open(ssh_filename, 'w') + ssh_file.write(""" + export PATH=$PATH:%(multiquery_path)s + export MQ_SLICE="%(slice)s" + export MQ_NODES="%(nodes_filename)s" + + eval `ssh-agent` >/dev/null 2>&1 + trap "kill $SSH_AGENT_PID" 0 + ssh-add %(private_key)s >/dev/null 2>&1 + + multiquery 'hostname' 2>/dev/null | + grep "bytes" | + grep -v ": 0 bytes" + """ % locals()) + ssh_file.close() + ssh_results = os.popen("python /tmp/test").readlines() + ssh_result = eval(ssh_results[0].replace('\\n', '')) + + # create a list of hostname out of results that are not empty + good_nodes = [] + for result in ssh_result: + if result.find("bytes") > -1: + result_parts = result.split(":") + hostname = result_parts[0] + good_nodes.append(hostname) + + # count number of node we can ssh into + ssh_count = len(good_nodes) + + # determine whince nodes are dead: + dead_nodes = set(node_list).difference(good_nodes) + + # write dead nodes to file + curr_time = round(time.time()) + dead_node_count_output = "%d\t%d" % (curr_time, len(dead_nodes)) + dead_nodes_file_name = config.data_path + os.sep + "dead_nodes" + dead_nodes_file = open(dead_nodes_file_name, 'a') + for hostname in dead_nodes: + boot_state = node_dict[hostname]['boot_state'] + last_updated = 0 + if node_dict[hostname]['last_updated']: + last_updated = node_dict[hostname]['last_updated'] + dead_nodes_file.write("%(curr_time)d\t%(hostname)s\t%(boot_state)s\t%(last_updated)d\n" % \ + locals()) + dead_nodes_file.close() + + # write good node count + ssh_result_output = "%d\t%d" % (round(time.time()), ssh_count) + nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh" + nodes_can_ssh_file = open(nodes_can_ssh_file_name, 'a') + nodes_can_ssh_file.write(ssh_result_output) + nodes_can_ssh_file.close() + + if verbose: + print "nodes that can ssh: " + ssh_result_output + print "dead nodes: " + dead_node_count_output + + +# remove all nodes from a slice +def empty_slice(config): + + if config.verbose: + print "Removing %s from all nodes" % config.slice + + all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id'])] + config.api.DeleteSliceFromNodes(config.auth, slice, all_nodes) + + +# add slice to all nodes. +# make sure users key is up to date +def init_slice(config): + + # make sure slice exists + slices = config.api.GetSlices(config.auth, [config.slice], \ + ['slice_id', 'name', 'person_ids']) + if not slices: + raise "No such slice %s" % config.slice + slice = slices[0] + + # make sure user is in slice + person = config.api.GetPersons(config.auth, config.auth['Username'], \ + ['person_id', 'email', 'slice_ids', 'key_ids'])[0] + if slice['slice_id'] not in person['slice_ids']: + raise "%s not in %s slice. Must be added first" % \ + (person['email'], slice['name']) + + # make sure user key is up to date + current_key = open(key_path, 'r').readline().strip() + if len(current_key) == 0: + raise "Key cannot be empty" + + keys = config.api.GetKeys(auth, person['key_ids']) + if not keys: + if config.verbose: + print "Adding new key " + key_path + config.api.AddPersonKey(config.auth, person['person_id'], \ + {'key_type': 'ssh', 'key': current_key}) + + elif not filter(lambda k: k['key'] == current_key, keys): + if config.verbose: + print "%s was modified or is new. Updating PLC" + old_key = keys[0] + config.api.UpdateKey(config.auth, old_key['key_id'], \ + {'key': current_key}) + + # add slice to all nodes + if config.verbose: + print "Adding %s to all nodes" + slice + all_nodes = [row['node_id'] for row in \ + config.api.GetNodes(config.auth, {}, ['node_id'])] + config.api.AddSliceToNodes(config.auth, config.slice, all_nodes) + + +# create the fill/empty plot +def plot_fill_empty(): + #ticstep = 3600 # 1 hour + #plotlength = 36000 # 10 hours + ticstep = 1800 + plotlength = 10800 + + all_nodes_file_name = config.data_path + os.sep + "nodes" + nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice" + nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh" + nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good" + + tmpfilename = tempfile.mktemp() + tmpfile = open(tmpfilename, 'w') + + starttime = -1 + stoptime = -1 + for datafilename in [all_nodes_file_name, + nodes_in_slice_file_name, \ + nodes_can_ssh_file_name, \ + nodes_good_comon_file_name]: + datafile = open(datafilename, 'r') + line1 = datafile.readline() + datafile.seek(-32,2) + line2 = datafile.readlines().pop() + thisstarttime = int(line1.split("\t")[0]) + if starttime == -1 or thisstarttime < starttime: + starttime = thisstarttime + thisstoptime = int(line2.split("\t")[0]) + if stoptime == -1 or thisstoptime > stoptime: + stoptime = thisstoptime + + stopx = stoptime + startx = max(starttime, stopx - plotlength) + starttime = startx + + tics = getTimeTicString(starttime, stoptime, ticstep) + + startdate = time.strftime("%b %m, %Y - %H:%M", time.localtime(startx)) + stopdate = time.strftime("%H:%M", time.localtime(stopx)) + + tmpfile.write(""" + set term png + set output "%(plots_path)s/fill_empty.png" + + set title "Number of Nodes / Time - %(startdate)s to %(stopdate)s" + set xlabel "Time" + set ylabel "Number of Nodes" + + set xtics (%(tics)s) + set xrange[%(startx)d:%(stopx)d] + set yrange[0:950] + + plot "%(all_nodes_file_name)s" u 1:2 w lines title "Total Nodes", \ + "%(nodes_in_slice_file_name)s" u 1:2 w lines title "Nodes in Slice", \ + "%(nodes_good_comon_file_name)s" u 1:2 w lines title \ + "Healthy Nodes (according to CoMon)", \ + "%(nodes_can_ssh_file_name)s" u 1:2 w lines title "Nodes Reachable by SSH" + + """ % locals()) + + tmpfile.close() + + os.system("%s %s" % (gnuplot_path, tmpfilename)) + + if os.path.exists(tmpfilename): + os.unlink(tmpfilename) + + + +config = Config(options) + + +if config.slice == 'root': + + if config.verbose: + print "Logging in as root" +else: + # set up slice and add it to nodes + init_slice(config) + + # wait 15 mins for nodes to get the data + sleep(900) + +# gather data +count_nodes_can_ssh(config) +count_nodes_by_api(config) +count_nodes_good_by_comon(config) + +# update plots +plot_fill_empty() +#os.system("cp plots/*.png ~/public_html/planetlab/tests") + +# clean up +empty_slice(config) + diff --git a/node_ssh/nst_config b/node_ssh/nst_config new file mode 100644 index 0000000..1f6a881 --- /dev/null +++ b/node_ssh/nst_config @@ -0,0 +1,14 @@ +# node_test_ssh config file + +NST_API_SERVER='https://www.planet-lab.org/PLCAPI/' + +NST_KEY_PATH='/usr/share/planetlab/tests/nts/keys/nts.pub' + +NST_MULTIQUERY_PATH='/usr/share/codeploy' + +NST_SLICE='pl_nts' + +NST_USER='USERNAME' + +NST_PASSWORD='PASSWORD' + -- 2.45.2