From 264368f250e095e2a2bfd781210b86cf972e6117 Mon Sep 17 00:00:00 2001
From: Tony Mack <tmack@cs.princeton.edu>
Date: Fri, 7 Dec 2007 15:06:59 +0000
Subject: [PATCH] Initial checkin of node test app

---
 node_ssh/Makefile   |  39 +++++
 node_ssh/nst.cron   |   6 +
 node_ssh/nst.py     | 398 ++++++++++++++++++++++++++++++++++++++++++++
 node_ssh/nst_config |  14 ++
 4 files changed, 457 insertions(+)
 create mode 100644 node_ssh/Makefile
 create mode 100644 node_ssh/nst.cron
 create mode 100644 node_ssh/nst.py
 create mode 100644 node_ssh/nst_config

diff --git a/node_ssh/Makefile b/node_ssh/Makefile
new file mode 100644
index 0000000..3d1cac9
--- /dev/null
+++ b/node_ssh/Makefile
@@ -0,0 +1,39 @@
+CODE = nst.py
+
+CONFIG = nst_config
+
+CRON = nst.cron
+
+CROND = /etc/init.d/crond
+ 
+APPS = nst
+
+datadir := /usr/share
+appdir := /planetlab/tests/nst
+
+all: $(APPS)
+
+nst: $(CODE) $(CONFIG) $(CRON) data
+	install -D -m 755 nst.py $(datadir)/$(appdir)/$(CODE)
+	install -D -m 750 nst_config $(datadir)/$(appdir)/$(CONFIG)
+	install -D -m 755 nst.cron /etc/cron.d/$(CRON)
+
+	# create symlink to executable	
+	ln -s $(datadir)/$(appdir)/$(CODE) /usr/bin/nst
+
+        # reload cron
+	$(CROND) reload
+
+
+data:
+	install -d -m 755 $(datadir)/$(appdir)/keys/
+	install -d -m 755 $(datadir)/$(appdir)/plots/	
+	install -d -m 755 $(datadir)/$(appdir)/data/
+clean:
+	rm -Rf $(datadir)/$(appdir)
+	rm -rf /etc/cron.d/$(CRON)
+	rm -rf /usr/bin/nst
+
+	$(CROND) reload
+
+.PHONY: clean data
diff --git a/node_ssh/nst.cron b/node_ssh/nst.cron
new file mode 100644
index 0000000..00ee599
--- /dev/null
+++ b/node_ssh/nst.cron
@@ -0,0 +1,6 @@
+NTS_PATH = "/var/planetlab/test/nts"
+
+*/2 * * * * cd $NTS_PATH; python nts.py;
+
+
+
diff --git a/node_ssh/nst.py b/node_ssh/nst.py
new file mode 100644
index 0000000..c262079
--- /dev/null
+++ b/node_ssh/nst.py
@@ -0,0 +1,398 @@
+#!/usr/bin/python
+
+import time, sys, urllib, os, tempfile, random
+import xmlrpclib
+from optparse import OptionParser
+from getpass import getpass
+from time import sleep
+
+parser = OptionParser()
+parser.add_option("-c", "--config", action="store", dest="config",  help="Path to alternate config file")
+parser.add_option("-x", "--url", action="store", dest="url", help = "API URL")
+parser.add_option("-s", "--slice", action="store", dest="slice", help = "Name of slice to use")
+parser.add_option("-n", "--nodes", action="store", dest="nodes", help = "File that contains a list of nodes to try to access")
+parser.add_option("-k", "--key", action="store", dest="key", help = "Path to alternate public key")
+parser.add_option("-u", "--user", action="store", dest="user", help = "API user name")
+parser.add_option("-p", "--password", action="store", dest="password", help = "API password") 
+parser.add_option("-v", "--verbose", action="store_true",  dest="verbose", help="Be verbose (default: %default)")
+(options, args) = parser.parse_args()
+
+# If user is specified but password is not
+if options.user is not None and options.password is None:
+    try:
+        options.password = getpass()
+    except (EOFError, KeyboardInterrupt):
+        print
+        sys.exit(0)
+
+class Config:
+    
+    def __init__(self, options):
+	
+	# if options are specified use them
+        # otherwise use options from config file
+	if options.config: config_file = options.config
+	else: config_file = '/usr/share/planetlab/tests/nst/nst_config'
+	
+	try:
+	    execfile(config_file, self.__dict__)
+	except:
+	    raise "Could not find nst config in " + config_file
+
+	if options.url: self.NST_API_SERVER = options.url
+	if options.slice: self.NST_SLICE = options.slice
+	if options.key: self.NST_KEY_PATH = options.key
+	if options.user: self.NST_USER = options.user
+	if options.password: self.NST_PASSWORD = options.password
+	if options.nodes: self.NST_NODES = options.nodes
+	else: self.NST_NODES = None
+
+	self.api = xmlrpclib.Server(self.NST_API_SERVER)
+	self.auth = {}
+	self.auth['Username'] = self.NST_USER
+	self.auth['AuthString'] = self.NST_PASSWORD
+	self.auth['AuthMethod'] = 'password'
+	self.key = self.NST_KEY_PATH
+	self.slice = self.NST_SLICE
+	self.nodes = self.NST_NODES
+	self.multiquery_path = self.NST_MULTIQUERY_PATH
+	self.verbose = options.verbose 	
+	
+	self.data_path = '/usr/share/planetlab/tests/nst/data/'
+	self.plots_path = '/usr/share/planetlab/tests/nst/plots/'	
+	
+
+# get formatted tic string for gnuplot
+def getTimeTicString(t1, t2, step):
+	first_hour = list(time.localtime(t1))
+	if not first_hour[4] == first_hour[5] == 0:
+		first_hour[4] = 0
+		first_hour[5] = 0
+	
+	first_hour_time = int(time.mktime(first_hour))
+	first_hour_time += 3600
+	
+	backsteps = (first_hour_time - t1)
+	backsteps /= step
+	start = first_hour_time - backsteps * step
+	
+	tics = []
+	thistime = start
+	while thistime < t2:
+		tics.append("\"%s\" %d" % \
+			(time.strftime("%H:%M", time.localtime(thistime)), thistime))
+		thistime += step
+	
+	ticstr = ", ".join(tics)
+	return ticstr
+
+
+# count total number of nodes in PlanetLab, according to the api
+# count total number  of nodes in slice, according to the api 
+def count_nodes_by_api(config):
+
+	# count all nodes	
+	all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id', 'slice_ids'])]
+	all_nodes_output = "%d\t%d" % (round(time.time()), len(all_nodes))
+
+	# count all nodes in slice
+	if config.slice == 'root':
+	    nodes_in_slice = all_nodes
+	    nodes_in_slice_output = all_nodes_output
+	else:
+	    slice_id =config.api.GetSlices(config.auth, {'name': config.slice}, ['slice_id'])[0]['slice_id']
+	    nodes_in_slice = [row['node_id'] for row in \
+		              all_nodes if slice_id in row['slice_ids']]
+	    nodes_in_slice_output =  "%d\t%d" % (round(time.time()), len(nodes_in_slice))
+
+	# write result to datafiles
+	all_nodes_file_name = config.data_path + os.sep + "nodes" 
+	all_nodes_file = open(all_nodes_file_name, 'w')
+	all_nodes_file.write(all_nodes_output)
+	all_nodes_file.close()
+	
+	nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
+	nodes_in_slice_file = open(nodes_in_slice_file_name, 'w')
+	nodes_in_slice_file.write(nodes_in_slice_output)
+	nodes_in_slice_file.close()
+	
+	if config.verbose:
+	    print "all node: " + all_nodes_output
+	    print "nodes in slice: " + nodes_in_slice_output
+ 		
+
+# count total number of "good" nodes, according to CoMon
+def count_nodes_good_by_comon(config):
+	
+	
+	comon = urllib.urlopen("http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=nameonly&select='resptime%20%3E%200%20&&%20((drift%20%3E%201m%20||%20(dns1udp%20%3E%2080%20&&%20dns2udp%20%3E%2080)%20||%20gbfree%20%3C%205%20||%20sshstatus%20%3E%202h)%20==%200)'")
+	good_nodes = comon.readlines()
+
+	comon_output =  "%d\t%d" % (round(time.time()), len(good_nodes))
+	nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
+	nodes_good_comon_file = open(nodes_good_comon_file_name, 'a')
+	nodes_good_comon_file.write(comon_output)
+	nodes_good_comon_file.close()
+	
+	if config.verbose:
+	    print "comon: " + comon_output 
+	
+# count total number of nodes reachable by ssh
+def count_nodes_can_ssh(config):
+
+	api = config.api
+	slice = config.slice
+	key = config.key
+	verbose = config.verbose
+	auth = config.auth
+	nodes = config.nodes
+	multiquery_path = config.multiquery_path
+
+	if verbose:
+	    verbose_text = ""
+	    print "Creating list of nodes to ssh to"
+	else:
+	    verbose_text = ">/dev/null 2>&1"
+	
+	# creaet node dict
+	all_nodes = api.GetNodes(auth, {}, ['hostname', 'boot_state', 'last_updated'])
+        node_dict = {}
+        for node in all_nodes:
+            node_dict[node['hostname']] = node
+
+	# create node list
+	if nodes:
+	    nodes_file = open(nodes, 'r')
+	    nodes_filename = nodes_file.name
+	    lines = nodes_file.readlines()
+	    node_list = [node.replace('\n', '') for node in lines]
+	    nodes_file.close()
+	    
+	else:
+	    node_list = node_dict.keys()
+	    nodes_filename = tempfile.mktemp()
+	    nodes_file = open(nodes_filename, 'w')
+	    nodes_file.write_lines(node_list)
+	    nodes_file.close()
+	
+	# creaet node dict
+	node_dict = {}
+        for node in all_nodes:
+            node_dict[node['hostname']] = node
+
+	private_key = key.split(".pub")[0] 
+	
+	# create ssh command
+	if verbose:
+	    print "Attemptng to ssh to nodes in " + nodes_filename
+
+	ssh_filename = tempfile.mktemp()
+	ssh_file = open(ssh_filename, 'w')
+	ssh_file.write("""
+	export PATH=$PATH:%(multiquery_path)s
+	export MQ_SLICE="%(slice)s"
+        export MQ_NODES="%(nodes_filename)s"
+
+	eval `ssh-agent` >/dev/null 2>&1
+        trap "kill $SSH_AGENT_PID" 0
+        ssh-add %(private_key)s >/dev/null 2>&1	
+	
+	multiquery 'hostname' 2>/dev/null |
+	grep "bytes" | 
+        grep -v ": 0 bytes"		
+	""" % locals())
+	ssh_file.close()
+	ssh_results = os.popen("python /tmp/test").readlines()
+	ssh_result = eval(ssh_results[0].replace('\\n', '')) 
+
+	# create a list of hostname out of results that are not empty
+	good_nodes = []
+	for result in ssh_result:
+	    if result.find("bytes") > -1:
+		result_parts = result.split(":")
+		hostname = result_parts[0]
+	    	good_nodes.append(hostname)
+
+	# count number of node we can ssh into
+	ssh_count = len(good_nodes)
+	
+	# determine whince nodes are dead:
+	dead_nodes = set(node_list).difference(good_nodes)
+	
+	# write dead nodes to file
+	curr_time = round(time.time())
+	dead_node_count_output = "%d\t%d" % (curr_time, len(dead_nodes))
+	dead_nodes_file_name = config.data_path + os.sep + "dead_nodes"
+	dead_nodes_file = open(dead_nodes_file_name, 'a')
+	for hostname in dead_nodes:
+	    boot_state = node_dict[hostname]['boot_state']
+	    last_updated = 0
+	    if node_dict[hostname]['last_updated']: 
+		last_updated = node_dict[hostname]['last_updated'] 
+	    dead_nodes_file.write("%(curr_time)d\t%(hostname)s\t%(boot_state)s\t%(last_updated)d\n" % \
+	   			  locals())	
+	dead_nodes_file.close() 
+ 		
+	# write good node count 
+	ssh_result_output =  "%d\t%d" % (round(time.time()), ssh_count)
+	nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
+	nodes_can_ssh_file = open(nodes_can_ssh_file_name, 'a')
+	nodes_can_ssh_file.write(ssh_result_output)
+	nodes_can_ssh_file.close()
+	
+	if verbose:
+	    print "nodes that can ssh: " + ssh_result_output
+	    print "dead nodes: " + dead_node_count_output   
+	
+	
+# remove all nodes from a slice
+def empty_slice(config):
+
+	if config.verbose:
+	    print "Removing %s from all nodes" % config.slice
+
+	all_nodes = [row['node_id'] for row in config.api.GetNodes(config.auth, {}, ['node_id'])]
+	config.api.DeleteSliceFromNodes(config.auth, slice, all_nodes)
+
+	
+# add slice to all nodes. 
+# make sure users key is up to date   
+def init_slice(config):
+
+    # make sure slice exists
+    slices = config.api.GetSlices(config.auth, [config.slice], \
+				  ['slice_id', 'name', 'person_ids'])
+    if not slices:
+        raise "No such slice %s" % config.slice
+    slice = slices[0]
+
+    # make sure user is in slice
+    person = config.api.GetPersons(config.auth, config.auth['Username'], \
+				   ['person_id', 'email', 'slice_ids', 'key_ids'])[0]
+    if slice['slice_id'] not in person['slice_ids']:
+        raise "%s not in %s slice. Must be added first" % \
+	      (person['email'], slice['name'])
+    	 
+    # make sure user key is up to date	
+    current_key = open(key_path, 'r').readline().strip()
+    if len(current_key) == 0:
+        raise "Key cannot be empty" 
+
+    keys = config.api.GetKeys(auth, person['key_ids'])
+    if not keys:
+        if config.verbose:
+ 	    print "Adding new key " + key_path
+        config.api.AddPersonKey(config.auth, person['person_id'], \
+			        {'key_type': 'ssh', 'key': current_key})
+
+    elif not filter(lambda k: k['key'] == current_key, keys):
+        if config.verbose:
+	    print "%s was modified or is new. Updating PLC"
+        old_key = keys[0]
+        config.api.UpdateKey(config.auth, old_key['key_id'], \
+			     {'key': current_key})
+
+    # add slice to all nodes  	 		
+    if config.verbose:
+        print "Adding %s to all nodes" + slice
+    all_nodes = [row['node_id'] for row in \
+		 config.api.GetNodes(config.auth, {}, ['node_id'])]
+    config.api.AddSliceToNodes(config.auth, config.slice, all_nodes)
+	
+	
+# create the fill/empty plot
+def plot_fill_empty():
+	#ticstep = 3600	# 1 hour
+	#plotlength = 36000 # 10 hours
+	ticstep = 1800
+	plotlength = 10800
+
+	all_nodes_file_name = config.data_path + os.sep + "nodes"	
+	nodes_in_slice_file_name = config.data_path + os.sep + "nodes_in_slice"
+	nodes_can_ssh_file_name = config.data_path + os.sep + "nodes_can_ssh"
+	nodes_good_comon_file_name = config.data_path + os.sep + "nodes_good"
+	
+	tmpfilename = tempfile.mktemp()
+	tmpfile = open(tmpfilename, 'w')
+	
+	starttime = -1
+	stoptime = -1
+	for datafilename in [all_nodes_file_name,
+			     nodes_in_slice_file_name, \
+			     nodes_can_ssh_file_name, \
+			     nodes_good_comon_file_name]: 
+		datafile = open(datafilename, 'r')
+		line1 = datafile.readline()
+		datafile.seek(-32,2)
+		line2 = datafile.readlines().pop()
+		thisstarttime = int(line1.split("\t")[0])
+		if starttime == -1 or thisstarttime < starttime:
+			starttime = thisstarttime
+		thisstoptime = int(line2.split("\t")[0])
+		if stoptime == -1 or thisstoptime > stoptime:
+			stoptime = thisstoptime
+	
+	stopx = stoptime
+	startx = max(starttime, stopx - plotlength)
+	starttime = startx
+	
+	tics = getTimeTicString(starttime, stoptime, ticstep)
+	
+	startdate = time.strftime("%b %m, %Y - %H:%M", time.localtime(startx))
+	stopdate = time.strftime("%H:%M", time.localtime(stopx))
+	
+	tmpfile.write("""
+	set term png
+	set output "%(plots_path)s/fill_empty.png"
+	
+	set title "Number of Nodes / Time - %(startdate)s to %(stopdate)s"
+	set xlabel "Time"
+	set ylabel "Number of Nodes"
+	
+	set xtics (%(tics)s)
+	set xrange[%(startx)d:%(stopx)d]
+	set yrange[0:950]
+	
+	plot "%(all_nodes_file_name)s" u 1:2 w lines title "Total Nodes", \
+		"%(nodes_in_slice_file_name)s" u 1:2 w lines title "Nodes in Slice", \
+		"%(nodes_good_comon_file_name)s" u 1:2 w lines title \
+			"Healthy Nodes (according to CoMon)", \
+		"%(nodes_can_ssh_file_name)s" u 1:2 w lines title "Nodes Reachable by SSH"
+	
+	""" % locals())
+	
+	tmpfile.close()
+	
+	os.system("%s %s" % (gnuplot_path, tmpfilename))
+	
+	if os.path.exists(tmpfilename):
+		os.unlink(tmpfilename)
+
+
+
+config = Config(options)
+
+
+if config.slice == 'root':
+
+    if config.verbose:
+        print "Logging in as root"
+else:
+    # set up slice and add it to nodes
+    init_slice(config)
+    
+    # wait 15 mins for nodes to get the data
+    sleep(900)	  	
+
+# gather data
+count_nodes_can_ssh(config)	
+count_nodes_by_api(config)
+count_nodes_good_by_comon(config)
+    
+# update plots
+plot_fill_empty()
+#os.system("cp plots/*.png ~/public_html/planetlab/tests")		 		
+
+# clean up
+empty_slice(config)		
+
diff --git a/node_ssh/nst_config b/node_ssh/nst_config
new file mode 100644
index 0000000..1f6a881
--- /dev/null
+++ b/node_ssh/nst_config
@@ -0,0 +1,14 @@
+# node_test_ssh config file
+
+NST_API_SERVER='https://www.planet-lab.org/PLCAPI/' 
+
+NST_KEY_PATH='/usr/share/planetlab/tests/nts/keys/nts.pub'
+
+NST_MULTIQUERY_PATH='/usr/share/codeploy'
+
+NST_SLICE='pl_nts'  
+
+NST_USER='USERNAME'
+
+NST_PASSWORD='PASSWORD'
+
-- 
2.47.0