+ blacklist.py -- manages a node blacklist on which no actions should ever be
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 29 Jun 2007 12:38:36 +0000 (12:38 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 29 Jun 2007 12:38:36 +0000 (12:38 +0000)
taken
+ bootcds.py -- collects bootcd information from debug state nodes
+ bwlimit.py -- fetch all nodes with broken bwlimits.
+ dumpact.py -- pretty print the act_all.pkl db generated by monitor.py
+ getnodekey.py -- generate a known_hosts file based on the ssh_rsa_key field
of the PLC node db.
+ printpdb.py -- another pretty printer for pickle files.
+ soltesz.py -- utilitiy functions for pickles, config, etc.

blacklist.py [new file with mode: 0755]
bootcds.py [new file with mode: 0755]
bwlimit.py [new file with mode: 0755]
dumpact.py [new file with mode: 0755]
getnodekey.py [new file with mode: 0644]
printpdb.py [new file with mode: 0755]
soltesz.py [new file with mode: 0644]

diff --git a/blacklist.py b/blacklist.py
new file mode 100755 (executable)
index 0000000..11e1cfc
--- /dev/null
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+import soltesz
+import plc
+import getopt
+
+def usage():
+       print "blacklist.py --delete=<i>" 
+
+def main():
+
+       try:
+               longopts = ["delete=", "help"]
+               (opts, argv) = getopt.getopt(sys.argv[1:], "d:h", longopts)
+       except getopt.GetoptError, err:
+               print "Error: " + err.msg
+               sys.exit(1)
+
+       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
+
+       for (opt, optval) in opts:
+               if opt in ["-d", "--delete"]:
+                       i = int(optval)
+                       del l_blacklist[i]
+               else:
+                       usage()
+                       sys.exit(0)
+
+       i_cnt = 0
+       for i in l_blacklist:
+               print i_cnt, " ", i
+               i_cnt += 1
+
+       while 1:
+               line = sys.stdin.readline()
+               if not line:
+                       break
+               line = line.strip()
+               if not line in l_blacklist:
+                       l_blacklist.append(line)
+
+       print "Total %d nodes in blacklist" % (len(l_blacklist))
+       soltesz.dbDump("l_blacklist")
+       
+if __name__ == '__main__':
+       import os
+       #try:
+       main()
+       #except Exception, error:
+       #       print "Exception %s" % error
+       #       sys.exit(0)
diff --git a/bootcds.py b/bootcds.py
new file mode 100755 (executable)
index 0000000..a7189ca
--- /dev/null
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+import soltesz
+import plc
+
+bootcds = {}
+
+def main():
+       global bootcds
+
+       l_nodes = plc.getNodes()
+       d_nodes = {}
+       for host in l_nodes:
+               h = host['hostname']
+               d_nodes[h] = host
+
+       bootcds = soltesz.if_cached_else(1, "bootcds", lambda : {})
+       for host in d_nodes:
+               if not host in bootcds:
+                       ssh = soltesz.SSH('root', host)
+                       val = ssh.runE("F=/mnt/cdrom/bootme/ID;G=/usr/bootme/ID; if [ -f $F ] ; then cat $F ; else cat $G ; fi")
+                       print "%s == %s" % (host, val)
+                       bootcds[host] = val
+               elif "timed out" in bootcds[host]:
+                       # Call again with a longer timeout!
+                       opts = soltesz.ssh_options
+                       opts['ConnectTimeout'] = '60'
+                       ssh = soltesz.SSH('root', host, opts)
+                       val = ssh.runE("F=/mnt/cdrom/bootme/ID;G=/usr/bootme/ID; if [ -f $F ] ; then cat $F ; else cat $G ; fi")
+                       print "TO: %s == %s" % (host, val)
+                       bootcds[host] = val
+                       
+
+       soltesz.dbDump("bootcds", bootcds)
+       
+if __name__ == '__main__':
+       import os
+       try:
+               main()
+       except Exception:
+               print "Saving data... exitting."
+               soltesz.dbDump("bootcds", bootcds)
+               sys.exit(0)
diff --git a/bwlimit.py b/bwlimit.py
new file mode 100755 (executable)
index 0000000..09d3167
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+import soltesz
+import plc
+
+bwlimit = {}
+
+def main():
+       global bwlimit
+
+       l_nodes = plc.getNodes()
+       d_nodes = {}
+       for host in l_nodes:
+               h = host['hostname']
+               d_nodes[h] = host
+
+       for h in d_nodes:
+               host = d_nodes[h]
+               for nw_id in host['nodenetwork_ids']:
+                       l_nw = plc.getNodeNetworks({'nodenetwork_id': host['nodenetwork_ids']})
+                       bwlimit[h] = []
+                       for nw in l_nw:
+                               if nw['bwlimit'] != None and nw['bwlimit'] < 500000:
+                                       bwlimit[h].append(nw['bwlimit'])
+                       if len(bwlimit[h]) == 0:
+                               del bwlimit[h]
+       
+       for host in bwlimit:
+               print "%s %s" % (host, bwlimit[host])
+                       
+       
+if __name__ == '__main__':
+       main()
diff --git a/dumpact.py b/dumpact.py
new file mode 100755 (executable)
index 0000000..6a9c3b6
--- /dev/null
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+
+# Read in the act_* databases and print out a human readable version
+
+import sys
+import time
+import getopt
+import soltesz
+
+def main():
+
+       act_all = soltesz.dbLoad(sys.argv[1])
+       plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+       s_nodenames = ""
+       sickdb = {}
+
+       sorted_keys = act_all.keys()
+       sorted_keys.sort()
+       for nodename in sorted_keys:
+               diag_nodelist = act_all[nodename]
+               lb = plcdb_hn2lb[nodename]
+               if lb not in sickdb:
+                       sickdb[lb] = {}
+               sickdb[lb][nodename] = diag_nodelist
+
+       sorted_keys = sickdb.keys()
+       sorted_keys.sort()
+       for loginbase in sorted_keys:
+               nodedict = sickdb[loginbase]
+               sort_nodekeys = nodedict.keys()
+               sort_nodekeys.sort()
+               print "%s :" % loginbase
+               for nodename in sort_nodekeys:
+                       if len(act_all[nodename]) == 0:
+                               print "%20s : %-40s has no events" % (loginbase, nodename)
+                       else:
+                               l_ev = act_all[nodename]
+                               print "    %s" % nodename
+                               for diag_node in l_ev:
+                                       #s_time=time.strftime("%Y/%m/%d %H:%M:%S",time.gmtime(ev[1]))
+                                       keys = diag_node.keys()
+                                       keys.sort()
+                                       for k in keys:
+                                               if "message" not in k and "msg" not in k:
+                                                       print "\t'%s' : %s" % (k, diag_node[k])
+                                       print "\t--"
+
+       print s_nodenames
+
+       
+if __name__ == '__main__':
+       main()
diff --git a/getnodekey.py b/getnodekey.py
new file mode 100644 (file)
index 0000000..78d9ce6
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+import soltesz
+import plc
+
+def main():
+
+       l_nodes = [ 'planetlab4.inf.ethz.ch', 'planetlab-1.di.fc.ul.pt',
+             'planetlab2.singaren.net.sg', 'planetlab2.nbgisp.com',
+             'planetlab1.koganei.wide.ad.jp', 'planetlab2.koganei.wide.ad.jp',
+             'planetlab1.citadel.edu', 'pl2.ucs.indiana.edu',
+             'plab1.engr.sjsu.edu', 'plab2.engr.sjsu.edu',
+             'planetlab1.iin-bit.com.cn', 'planetlab1.cs.virginia.edu',
+             'planetlab1.info.ucl.ac.be', 'node-1.mcgillplanetlab.org', ]
+       d_nodes = {}
+       for host in l_nodes:
+               n = plc.getNodes({'hostname' : host})
+               d_nodes[host] = n
+               #print n
+
+       for host in d_nodes:
+               ssh = soltesz.SSH('root', host)
+               val = ssh.runE("grep NODE_KEY /tmp/planet.cnf")
+               print "%s == %s" % (host, val)
+
+       
+if __name__ == '__main__':
+       import os
+       try:
+               main()
+       except Exception, error:
+               print "Exception %s" % error
+               sys.exit(0)
diff --git a/printpdb.py b/printpdb.py
new file mode 100755 (executable)
index 0000000..a916a05
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/bin/python
+
+import pprint
+import sys
+import soltesz
+
+pp = pprint.PrettyPrinter(indent=4) 
+o = soltesz.dbLoad(sys.argv[1])
+pp.pprint(o) 
diff --git a/soltesz.py b/soltesz.py
new file mode 100644 (file)
index 0000000..24412c8
--- /dev/null
@@ -0,0 +1,170 @@
+import os
+import sys
+import pickle
+import inspect
+import shutil
+from config import config
+config = config()
+
+DEBUG= 0
+PICKLE_PATH="pdb"
+
+def dbLoad(name):
+       return SPickle().load(name)
+
+def dbExists(name):
+       #if self.config.debug:
+       #       name = "debug.%s" % name
+       return SPickle().exists(name)
+
+def dbDump(name, obj=None):
+       # depth of the dump is 2 now, since we're redirecting to '.dump'
+       return SPickle().dump(name, obj, 2)
+
+def if_cached_else(cond, name, function):
+       s = SPickle()
+       if (cond and s.exists(name)) or \
+          (cond and config.debug and s.exists("debug.%s" % name)):
+               o = s.load(name)
+       else:
+               o = function()
+               if cond:
+                       s.dump(name, o) # cache the object using 'name'
+       return o
+
+class SPickle:
+       def __init__(self):
+               self.config = config
+
+       def if_cached_else(self, cond, name, function):
+               if cond and self.exists(name):
+                       o = self.load(name)
+               else:
+                       o = function()
+                       if cond:
+                               self.dump(name, o)      # cache the object using 'name'
+               return o
+
+       def __file(self, name):
+               return "%s/%s.pkl" % (PICKLE_PATH, name)
+               
+       def exists(self, name):
+               return os.path.exists(self.__file(name))
+
+       def load(self, name):
+               """ 
+               In debug mode, we should fail if neither file exists.
+                       if the debug file exists, reset name
+                       elif the original file exists, make a copy, reset name
+                       else neither exist, raise an error
+               Otherwise, it's normal mode, if the file doesn't exist, raise error
+               Load the file
+               """
+
+               if self.config.debug:
+                       if self.exists("debug.%s" % name):
+                               name = "debug.%s" % name
+                       elif self.exists(name):
+                               debugname = "debug.%s" % name
+                               if not self.exists(debugname):
+                                       shutil.copyfile(self.__file(name), self.__file(debugname))
+                               name = debugname
+                       else:   # neither exist
+                               raise Exception, "No such pickle based on %s" % self.__file(name)
+               else:
+                       if not self.exists(name):
+                               raise Exception, "No such file %s" % name
+
+               print "loading %s" % self.__file(name)
+               f = open(self.__file(name), 'r')
+               o = pickle.load(f)
+               f.close()
+               return o
+                       
+       
+       # use the environment to extract the data associated with the local
+       # variable 'name'
+       def dump(self, name, obj=None, depth=1):
+               if obj == None:
+                       o = inspect.getouterframes(inspect.currentframe())
+                       up1 = o[depth][0] # get the frame one prior to (up from) this frame
+                       argvals = inspect.getargvalues(up1)
+                       # TODO: check that 'name' is a local variable; otherwise this would fail.
+                       obj = argvals[3][name] # extract the local variable name 'name'
+               if not os.path.isdir("%s/" % PICKLE_PATH):
+                       os.mkdir("%s" % PICKLE_PATH)
+               if self.config.debug:
+                       name = "debug.%s" % name
+               f = open(self.__file(name), 'w')
+               pickle.dump(obj, f)
+               f.close()
+               return
+
+
+ssh_options = { 'StrictHostKeyChecking':'no', 
+                               'BatchMode':'yes', 
+                               'PasswordAuthentication':'no',
+                               'ConnectTimeout':'20'}
+
+class SSH:
+       def __init__(self, user, host, options = ssh_options):
+               self.options = options
+               self.user = user
+               self.host = host
+               return
+
+       def __options_to_str(self):
+               options = ""
+               for o,v in self.options.iteritems():
+                       options = options + "-o %s=%s " % (o,v)
+               return options
+
+       def run(self, cmd):
+               cmd = "ssh %s %s@%s '%s'" % (self.__options_to_str(), 
+                                                                       self.user, self.host, cmd)
+               if ( DEBUG == 1 ):
+                       print cmd,
+               (f_in, f_out, f_err) = os.popen3(cmd)
+               value = f_out.read()
+               if value == "":
+                       raise Exception, f_err.read()
+               if ( DEBUG == 1 ):
+                       print " == %s" % value
+               f_out.close()
+               f_in.close()
+               f_err.close()
+               return value
+
+       def runE(self, cmd):
+               cmd = "ssh %s %s@%s '%s'" % (self.__options_to_str(), 
+                                                                       self.user, self.host, cmd)
+               if ( DEBUG == 1 ):
+                       print cmd,
+               (f_in, f_out, f_err) = os.popen3(cmd)
+
+               value = f_out.read()
+               if value == "": # An error has occured
+                       value = f_err.read()
+
+               if ( DEBUG == 1 ):
+                       print " == %s" % value
+               f_out.close()
+               f_in.close()
+               f_err.close()
+               return value.strip()
+               
+import time
+class MyTimer:
+       def __init__(self):
+               self.start = time.time()
+
+       def end(self):
+               self.end = time.time()
+               t = self.end-self.start
+               return t
+
+       def diff(self):
+               self.end = time.time()
+               t = self.end-self.start
+               self.start = self.end
+               return t