From c4906d8b47549fc8aca2d6bde100efe6f9d8bf8d Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Fri, 29 Jun 2007 12:38:36 +0000 Subject: [PATCH] + blacklist.py -- manages a node blacklist on which no actions should ever be taken + bootcds.py -- collects bootcd information from debug state nodes + bwlimit.py -- fetch all nodes with broken bwlimits. + dumpact.py -- pretty print the act_all.pkl db generated by monitor.py + getnodekey.py -- generate a known_hosts file based on the ssh_rsa_key field of the PLC node db. + printpdb.py -- another pretty printer for pickle files. + soltesz.py -- utilitiy functions for pickles, config, etc. --- blacklist.py | 55 ++++++++++++++++ bootcds.py | 47 ++++++++++++++ bwlimit.py | 37 +++++++++++ dumpact.py | 52 +++++++++++++++ getnodekey.py | 37 +++++++++++ printpdb.py | 9 +++ soltesz.py | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 407 insertions(+) create mode 100755 blacklist.py create mode 100755 bootcds.py create mode 100755 bwlimit.py create mode 100755 dumpact.py create mode 100644 getnodekey.py create mode 100755 printpdb.py create mode 100644 soltesz.py diff --git a/blacklist.py b/blacklist.py new file mode 100755 index 0000000..11e1cfc --- /dev/null +++ b/blacklist.py @@ -0,0 +1,55 @@ +#!/usr/bin/python + +import os +import sys +import string +import time +import soltesz +import plc +import getopt + +def usage(): + print "blacklist.py --delete=" + +def main(): + + try: + longopts = ["delete=", "help"] + (opts, argv) = getopt.getopt(sys.argv[1:], "d:h", longopts) + except getopt.GetoptError, err: + print "Error: " + err.msg + sys.exit(1) + + l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : []) + + for (opt, optval) in opts: + if opt in ["-d", "--delete"]: + i = int(optval) + del l_blacklist[i] + else: + usage() + sys.exit(0) + + i_cnt = 0 + for i in l_blacklist: + print i_cnt, " ", i + i_cnt += 1 + + while 1: + line = sys.stdin.readline() + if not line: + break + line = line.strip() + if not line in l_blacklist: + l_blacklist.append(line) + + print "Total %d nodes in blacklist" % (len(l_blacklist)) + soltesz.dbDump("l_blacklist") + +if __name__ == '__main__': + import os + #try: + main() + #except Exception, error: + # print "Exception %s" % error + # sys.exit(0) diff --git a/bootcds.py b/bootcds.py new file mode 100755 index 0000000..a7189ca --- /dev/null +++ b/bootcds.py @@ -0,0 +1,47 @@ +#!/usr/bin/python + +import os +import sys +import string +import time +import soltesz +import plc + +bootcds = {} + +def main(): + global bootcds + + l_nodes = plc.getNodes() + d_nodes = {} + for host in l_nodes: + h = host['hostname'] + d_nodes[h] = host + + bootcds = soltesz.if_cached_else(1, "bootcds", lambda : {}) + for host in d_nodes: + if not host in bootcds: + ssh = soltesz.SSH('root', host) + val = ssh.runE("F=/mnt/cdrom/bootme/ID;G=/usr/bootme/ID; if [ -f $F ] ; then cat $F ; else cat $G ; fi") + print "%s == %s" % (host, val) + bootcds[host] = val + elif "timed out" in bootcds[host]: + # Call again with a longer timeout! + opts = soltesz.ssh_options + opts['ConnectTimeout'] = '60' + ssh = soltesz.SSH('root', host, opts) + val = ssh.runE("F=/mnt/cdrom/bootme/ID;G=/usr/bootme/ID; if [ -f $F ] ; then cat $F ; else cat $G ; fi") + print "TO: %s == %s" % (host, val) + bootcds[host] = val + + + soltesz.dbDump("bootcds", bootcds) + +if __name__ == '__main__': + import os + try: + main() + except Exception: + print "Saving data... exitting." + soltesz.dbDump("bootcds", bootcds) + sys.exit(0) diff --git a/bwlimit.py b/bwlimit.py new file mode 100755 index 0000000..09d3167 --- /dev/null +++ b/bwlimit.py @@ -0,0 +1,37 @@ +#!/usr/bin/python + +import os +import sys +import string +import time +import soltesz +import plc + +bwlimit = {} + +def main(): + global bwlimit + + l_nodes = plc.getNodes() + d_nodes = {} + for host in l_nodes: + h = host['hostname'] + d_nodes[h] = host + + for h in d_nodes: + host = d_nodes[h] + for nw_id in host['nodenetwork_ids']: + l_nw = plc.getNodeNetworks({'nodenetwork_id': host['nodenetwork_ids']}) + bwlimit[h] = [] + for nw in l_nw: + if nw['bwlimit'] != None and nw['bwlimit'] < 500000: + bwlimit[h].append(nw['bwlimit']) + if len(bwlimit[h]) == 0: + del bwlimit[h] + + for host in bwlimit: + print "%s %s" % (host, bwlimit[host]) + + +if __name__ == '__main__': + main() diff --git a/dumpact.py b/dumpact.py new file mode 100755 index 0000000..6a9c3b6 --- /dev/null +++ b/dumpact.py @@ -0,0 +1,52 @@ +#!/usr/bin/python + +# Read in the act_* databases and print out a human readable version + +import sys +import time +import getopt +import soltesz + +def main(): + + act_all = soltesz.dbLoad(sys.argv[1]) + plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb") + s_nodenames = "" + sickdb = {} + + sorted_keys = act_all.keys() + sorted_keys.sort() + for nodename in sorted_keys: + diag_nodelist = act_all[nodename] + lb = plcdb_hn2lb[nodename] + if lb not in sickdb: + sickdb[lb] = {} + sickdb[lb][nodename] = diag_nodelist + + sorted_keys = sickdb.keys() + sorted_keys.sort() + for loginbase in sorted_keys: + nodedict = sickdb[loginbase] + sort_nodekeys = nodedict.keys() + sort_nodekeys.sort() + print "%s :" % loginbase + for nodename in sort_nodekeys: + if len(act_all[nodename]) == 0: + print "%20s : %-40s has no events" % (loginbase, nodename) + else: + l_ev = act_all[nodename] + print " %s" % nodename + for diag_node in l_ev: + #s_time=time.strftime("%Y/%m/%d %H:%M:%S",time.gmtime(ev[1])) + keys = diag_node.keys() + keys.sort() + for k in keys: + if "message" not in k and "msg" not in k: + print "\t'%s' : %s" % (k, diag_node[k]) + print "\t--" + + print s_nodenames + + +if __name__ == '__main__': + main() diff --git a/getnodekey.py b/getnodekey.py new file mode 100644 index 0000000..78d9ce6 --- /dev/null +++ b/getnodekey.py @@ -0,0 +1,37 @@ +#!/usr/bin/python + +import os +import sys +import string +import time +import soltesz +import plc + +def main(): + + l_nodes = [ 'planetlab4.inf.ethz.ch', 'planetlab-1.di.fc.ul.pt', + 'planetlab2.singaren.net.sg', 'planetlab2.nbgisp.com', + 'planetlab1.koganei.wide.ad.jp', 'planetlab2.koganei.wide.ad.jp', + 'planetlab1.citadel.edu', 'pl2.ucs.indiana.edu', + 'plab1.engr.sjsu.edu', 'plab2.engr.sjsu.edu', + 'planetlab1.iin-bit.com.cn', 'planetlab1.cs.virginia.edu', + 'planetlab1.info.ucl.ac.be', 'node-1.mcgillplanetlab.org', ] + d_nodes = {} + for host in l_nodes: + n = plc.getNodes({'hostname' : host}) + d_nodes[host] = n + #print n + + for host in d_nodes: + ssh = soltesz.SSH('root', host) + val = ssh.runE("grep NODE_KEY /tmp/planet.cnf") + print "%s == %s" % (host, val) + + +if __name__ == '__main__': + import os + try: + main() + except Exception, error: + print "Exception %s" % error + sys.exit(0) diff --git a/printpdb.py b/printpdb.py new file mode 100755 index 0000000..a916a05 --- /dev/null +++ b/printpdb.py @@ -0,0 +1,9 @@ +#!/usr/bin/python + +import pprint +import sys +import soltesz + +pp = pprint.PrettyPrinter(indent=4) +o = soltesz.dbLoad(sys.argv[1]) +pp.pprint(o) diff --git a/soltesz.py b/soltesz.py new file mode 100644 index 0000000..24412c8 --- /dev/null +++ b/soltesz.py @@ -0,0 +1,170 @@ +import os +import sys +import pickle +import inspect +import shutil +from config import config +config = config() + +DEBUG= 0 +PICKLE_PATH="pdb" + +def dbLoad(name): + return SPickle().load(name) + +def dbExists(name): + #if self.config.debug: + # name = "debug.%s" % name + return SPickle().exists(name) + +def dbDump(name, obj=None): + # depth of the dump is 2 now, since we're redirecting to '.dump' + return SPickle().dump(name, obj, 2) + +def if_cached_else(cond, name, function): + s = SPickle() + if (cond and s.exists(name)) or \ + (cond and config.debug and s.exists("debug.%s" % name)): + o = s.load(name) + else: + o = function() + if cond: + s.dump(name, o) # cache the object using 'name' + return o + +class SPickle: + def __init__(self): + self.config = config + + def if_cached_else(self, cond, name, function): + if cond and self.exists(name): + o = self.load(name) + else: + o = function() + if cond: + self.dump(name, o) # cache the object using 'name' + return o + + def __file(self, name): + return "%s/%s.pkl" % (PICKLE_PATH, name) + + def exists(self, name): + return os.path.exists(self.__file(name)) + + def load(self, name): + """ + In debug mode, we should fail if neither file exists. + if the debug file exists, reset name + elif the original file exists, make a copy, reset name + else neither exist, raise an error + Otherwise, it's normal mode, if the file doesn't exist, raise error + Load the file + """ + + if self.config.debug: + if self.exists("debug.%s" % name): + name = "debug.%s" % name + elif self.exists(name): + debugname = "debug.%s" % name + if not self.exists(debugname): + shutil.copyfile(self.__file(name), self.__file(debugname)) + name = debugname + else: # neither exist + raise Exception, "No such pickle based on %s" % self.__file(name) + else: + if not self.exists(name): + raise Exception, "No such file %s" % name + + print "loading %s" % self.__file(name) + f = open(self.__file(name), 'r') + o = pickle.load(f) + f.close() + return o + + + # use the environment to extract the data associated with the local + # variable 'name' + def dump(self, name, obj=None, depth=1): + if obj == None: + o = inspect.getouterframes(inspect.currentframe()) + up1 = o[depth][0] # get the frame one prior to (up from) this frame + argvals = inspect.getargvalues(up1) + # TODO: check that 'name' is a local variable; otherwise this would fail. + obj = argvals[3][name] # extract the local variable name 'name' + if not os.path.isdir("%s/" % PICKLE_PATH): + os.mkdir("%s" % PICKLE_PATH) + if self.config.debug: + name = "debug.%s" % name + f = open(self.__file(name), 'w') + pickle.dump(obj, f) + f.close() + return + + +ssh_options = { 'StrictHostKeyChecking':'no', + 'BatchMode':'yes', + 'PasswordAuthentication':'no', + 'ConnectTimeout':'20'} + +class SSH: + def __init__(self, user, host, options = ssh_options): + self.options = options + self.user = user + self.host = host + return + + def __options_to_str(self): + options = "" + for o,v in self.options.iteritems(): + options = options + "-o %s=%s " % (o,v) + return options + + def run(self, cmd): + cmd = "ssh %s %s@%s '%s'" % (self.__options_to_str(), + self.user, self.host, cmd) + if ( DEBUG == 1 ): + print cmd, + (f_in, f_out, f_err) = os.popen3(cmd) + value = f_out.read() + if value == "": + raise Exception, f_err.read() + if ( DEBUG == 1 ): + print " == %s" % value + f_out.close() + f_in.close() + f_err.close() + return value + + def runE(self, cmd): + cmd = "ssh %s %s@%s '%s'" % (self.__options_to_str(), + self.user, self.host, cmd) + if ( DEBUG == 1 ): + print cmd, + (f_in, f_out, f_err) = os.popen3(cmd) + + value = f_out.read() + if value == "": # An error has occured + value = f_err.read() + + if ( DEBUG == 1 ): + print " == %s" % value + f_out.close() + f_in.close() + f_err.close() + return value.strip() + +import time +class MyTimer: + def __init__(self): + self.start = time.time() + + def end(self): + self.end = time.time() + t = self.end-self.start + return t + + def diff(self): + self.end = time.time() + t = self.end-self.start + self.start = self.end + return t -- 2.43.0