add scrappy statistics gathering scripts
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 12 Jun 2009 16:27:07 +0000 (16:27 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 12 Jun 2009 16:27:07 +0000 (16:27 +0000)
17 files changed:
statistics/aggregate-nm.py [new file with mode: 0755]
statistics/aggregate.py [new file with mode: 0755]
statistics/aggregatehistory.py [new file with mode: 0755]
statistics/correspondence.py [new file with mode: 0644]
statistics/harvestrt.py [new file with mode: 0755]
statistics/monitorstats.py [new file with mode: 0644]
statistics/nodebad.py [new file with mode: 0755]
statistics/nodecommon.py [new file with mode: 0644]
statistics/nodediff-graph-better.py [new file with mode: 0755]
statistics/nodediff-graph.py [new file with mode: 0644]
statistics/nodediff-length.py [new file with mode: 0755]
statistics/nodediff.py [new file with mode: 0755]
statistics/nodequeryold.py [new file with mode: 0755]
statistics/parserpms.py [new file with mode: 0755]
statistics/rtsurvey.py [new file with mode: 0755]
statistics/sliceavg.py [new file with mode: 0755]
statistics/usedtickets.py [new file with mode: 0755]

diff --git a/statistics/aggregate-nm.py b/statistics/aggregate-nm.py
new file mode 100755 (executable)
index 0000000..70a8574
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/python
+
+from monitor.wrapper import plc
+api = plc.getAuthAPI()
+
+from monitor import database
+import time
+from datetime import datetime, timedelta
+import calendar
+
+import sys
+import time
+from monitor.model import *
+
+from monitorstats import *
+
+
+def main():
+       from monitor import parser as parsermodule
+
+       parser = parsermodule.getParser()
+       parser.set_defaults(node=None, aggname='aggregatenm', archivedir='archive-pdb', field='nm', value='Y', fromtime=None, load=False, state='BOOT')
+       parser.add_option("", "--node", dest="node", metavar="nodename.edu", 
+                                               help="A single node name to add to the nodegroup")
+       parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--aggname", dest="aggname", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--field", dest="field", metavar="key",
+                                               help="Which record field to extract from all files.")
+       parser.add_option("", "--value", dest="value", metavar="val",
+                                               help="Which value to look for in field.")
+       parser.add_option("", "--state", dest="state", metavar="key",
+                                               help="Which boot state to accept.")
+       parser.add_option("", "--load", action="store_true",
+                                               help="load aggregatenm rather than recreate it.")
+       parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       config = parsermodule.parse_args(parser)
+
+       archive = get_archive(config.archivedir)
+       agg = {}
+
+       if config.fromtime:
+               begin = config.fromtime
+       else:
+               begin = "2008-09-28"
+
+       d = datetime_fromstr(begin)
+       tdelta = timedelta(1)
+       verbose = 1
+
+       if not config.load:
+               while True:
+                       file = get_filefromglob(d, "production.findbad", config.archivedir)
+                       print archive.path
+                       fb = archive.load(file)
+                       try:
+                               print "nodes: ", len(fb['nodes'])
+                               state_count=0
+                               for node in fb['nodes']:
+                                       fb_nodeinfo  = fb['nodes'][node]['values']
+                                       time = d.strftime("%Y-%m-%d")
+
+                                       if type(fb_nodeinfo) == type([]):
+                                               continue
+
+                                       if fb_nodeinfo['state'] != config.state:
+                                               continue
+                                       state_count += 1
+
+                                       if node not in agg:
+                                               agg[node] = { 'total' : 0, 'up' : 0}
+
+                                       agg[node]['total'] += 1
+                                       if fb_nodeinfo[config.field] == config.value:
+                                               agg[node]['up'] += 1
+                               print "%s nodes in state %s" % ( state_count, config.state )
+
+                               del fb
+                               verbose = 0
+                       except SystemExit:
+                               sys.exit(1)
+                       except KeyboardInterrupt:
+                               sys.exit(1)
+                       except:
+                               import traceback; print traceback.print_exc()
+                               print d.strftime("%Y-%m-%d"), "No record"
+
+                       d = d + tdelta
+                       if d > datetime.now(): break
+       else:
+               agg = database.dbLoad(config.aggname)
+       
+       for node in agg:
+               if agg[node]['total'] > 0:
+                       if agg[node]['up'] != agg[node]['total']:
+                               print "%s %s" %  (node, float(agg[node]['up']) / float(agg[node]['total']))
+
+       database.dbDump(config.aggname, agg)
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/aggregate.py b/statistics/aggregate.py
new file mode 100755 (executable)
index 0000000..371a2b8
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+
+
+from monitor import database
+import time
+import sys
+
+actall = database.dbLoad("act_all_080825")
+agg = database.dbLoad("aggregatehistory")
+
+for node in actall.keys():
+       for record in actall[node]:
+               if 'date_created' in record:
+                       t = record['date_created']
+               elif 'time' in record:
+                       t = record['time']
+               else:
+                       continue
+
+               acttime = time.strftime("%Y-%m-%d", time.localtime(t)) 
+
+               if acttime > '2007-11-06':
+                       if 'noop' in record['action']:
+                               if node in agg:
+                                       for ntime,state in agg[node]:
+                                               if state == 'BOOT':
+                                                       if ntime > acttime:
+                                                               if type(record['action']) == type([]):
+                                                                       action = record['action'][0]
+                                                               else:
+                                                                       action = record['action']
+                                                               print acttime, action, ntime, state, node
+
+                               #print time.strftime("%Y-%m-%d", time.localtime(t)), record['action'], node
+
+#for node in agg:
+#      for ntime,state in agg[node]:
+#              if state == 'BOOT':
+#                      print ntime, state, node
diff --git a/statistics/aggregatehistory.py b/statistics/aggregatehistory.py
new file mode 100755 (executable)
index 0000000..588d24c
--- /dev/null
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+
+import plc
+api = plc.getAuthAPI()
+
+import database
+import reboot
+import time
+from datetime import datetime, timedelta
+import calendar
+
+import sys
+import time
+from monitor.model import *
+from nodecommon import *
+
+def get_filefromglob(d, str):
+       import os
+       import glob
+       # TODO: This is aweful.
+       path = "archive-pdb"
+       archive = database.SPickle(path)
+       glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str)
+       os.chdir(path)
+       #print glob_str
+       file = glob.glob(glob_str)[0]
+       #print "loading %s" % file
+       os.chdir("..")
+       return file[:-4]
+       #fb = archive.load(file[:-4])
+
+
+def fb_print_nodeinfo(fbnode, verbose, date=None):
+       if verbose: print "              state |  ssh  |  pcu  | bootcd | category | kernel"
+       if 'checked' in fbnode:
+               print "%11.11s " % diff_time(fbnode['checked']),
+       else:
+               if date: print date,
+               else: print "Unknown",
+               
+       if fbnode['bootcd']:
+               fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
+       else:
+               fbnode['bootcd'] = "unknown"
+       fbnode['state'] = color_boot_state(get_current_state(fbnode))
+       if len(fbnode['kernel'].split()) >= 3:
+               fbnode['kernel'] = fbnode['kernel'].split()[2]
+       print "    %(state)5s | %(ssh)5.5s | %(pcu)5.5s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+
+def pcu_print_info(pcuinfo, hostname):
+       print "   Checked: ",
+       if 'checked' in pcuinfo:
+               print "%11.11s " % diff_time(pcuinfo['checked'])
+       else:
+               print "Unknown"
+
+       print "\t            user   |          password | port | hostname "
+       print "\t %17s | %17s | %4s | %30s | %s" % \
+               (pcuinfo['username'], pcuinfo['password'], 
+                pcuinfo[hostname], reboot.pcu_name(pcuinfo), pcuinfo['model'])
+
+       if 'portstatus' in pcuinfo and pcuinfo['portstatus'] != {}:
+               if pcuinfo['portstatus']['22'] == "open":
+                       print "\t ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no %s@%s" % (pcuinfo['username'], reboot.pcu_name(pcuinfo))
+               if pcuinfo['portstatus']['23'] == "open":
+                       print "\t telnet %s" % (reboot.pcu_name(pcuinfo))
+               if pcuinfo['portstatus']['80'] == "open" or \
+                       pcuinfo['portstatus']['443'] == "open":
+                       print "\t http://%s" % (reboot.pcu_name(pcuinfo))
+               if pcuinfo['portstatus']['443'] == "open":
+                       print "\t racadm.py -r %s -u %s -p '%s'" % (pcuinfo['ip'], pcuinfo['username'], pcuinfo['password'])
+                       print "\t cmdhttps/locfg.pl -s %s -f iloxml/Reset_Server.xml -u %s -p '%s' | grep MESSAGE" % \
+                               (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
+
+agg = {}
+
+def main():
+       import parser as parsermodule
+
+       parser = parsermodule.getParser()
+       parser.set_defaults(node=None, fields='state', fromtime=None)
+       parser.add_option("", "--node", dest="node", metavar="nodename.edu", 
+                                               help="A single node name to add to the nodegroup")
+       parser.add_option("", "--fields", dest="fields", metavar="key",
+                                               help="Which record field to extract from all files.")
+       parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       config = parsermodule.parse_args(parser)
+
+       path = "archive-pdb"
+       archive = database.SPickle(path)
+
+       if config.fromtime:
+               begin = config.fromtime
+       else:
+               begin = "2007-11-06"
+
+       d = datetime_fromstr(begin)
+       tdelta = timedelta(1)
+       verbose = 1
+
+       while True:
+               try:
+                       file = get_filefromglob(d, "production.findbad")
+                       fb = archive.load(file)
+                       for node in fb['nodes']:
+                               fb_nodeinfo  = fb['nodes'][node]['values']
+                               state = fb_nodeinfo['state']
+                               time = d.strftime("%Y-%m-%d")
+                               if node not in agg:
+                                       agg[node] = []
+                               if len(agg[node]) == 0:
+                                       agg[node].append((time, state))
+                               else:
+                                       oldtime = agg[node][-1][0]
+                                       oldstate = agg[node][-1][1]
+                                       if oldstate != state:
+                                               agg[node].append((time, state))
+                       del fb
+                       verbose = 0
+               except KeyboardInterrupt:
+                       sys.exit(1)
+               except:
+                       #import traceback; print traceback.print_exc()
+                       print d.strftime("%Y-%m-%d"), "No record"
+
+               d = d + tdelta
+               if d > datetime.now(): break
+       
+       database.dbDump("aggregatehistory", agg)
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/correspondence.py b/statistics/correspondence.py
new file mode 100644 (file)
index 0000000..db9ad7b
--- /dev/null
@@ -0,0 +1,33 @@
+
+
+def dt_mod_range(dt, range=(60*60*24*7)):
+       t_stamp = time.mktime(dt.timetuple())
+       t_stamp -= (t_stamp % range)
+       dt_ret = datetime.datetime.fromtimestamp(t_stamp)
+       return dt_ret
+
+SUPPORT =3
+MONITOR =22
+
+weekly_bin = {}
+c = 0
+for ticket in tickets.keys():
+       if tickets[ticket]['queue'] != MONITOR: continue
+       for t in tickets[ticket]['transactions']:
+               if t['type'] == 'Correspond':
+                       #print t['datecreated'], t['field'], t['oldvalue'], t['type'], t['newvalue'], t['subject']
+                       k = dt_mod_range(t['datecreated'])
+                       s_key = k.strftime("%Y-%m-%d")
+                       if s_key not in weekly_bin: weekly_bin[s_key] = 0
+                       
+                       weekly_bin[s_key] += 1
+                       
+               #       c += 1
+               #if c > 100 : break;
+       #break;
+
+dates = weekly_bin.keys()
+dates.sort()
+for t in dates:
+       print t, ",", weekly_bin[t]
+
diff --git a/statistics/harvestrt.py b/statistics/harvestrt.py
new file mode 100755 (executable)
index 0000000..f3940e0
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+import os
+import time
+from datetime import datetime, timedelta
+import sys
+
+def popen(cmdstr):
+       f = os.popen(cmdstr)
+       ret = f.read()
+       return ret
+
+def datetime_fromstr(str):
+       if '-' in str:
+               try:
+                       tup = time.strptime(str, "%Y-%m-%d")
+               except:
+                       tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+       elif '/' in str:
+               tup = time.strptime(str, "%m/%d/%Y")
+       else:
+               tup = time.strptime(str, "%m/%d/%Y")
+       ret = datetime.fromtimestamp(time.mktime(tup))
+       return ret
+
+
+def main():
+       queue = sys.argv[1]
+       d1 = datetime_fromstr(sys.argv[2])
+       iterations = int(sys.argv[3])
+       i = 0
+       while i < iterations:
+               d1_s = d1.strftime("%Y-%m-%d")
+               d2 = d1 + timedelta(30)
+               d2_s = d2.strftime("%Y-%m-%d")
+               query = "Queue='%s' and " % queue 
+               query = query + "Told > '%s' and Told < '%s'" % (d1_s, d2_s)
+               cmd = """rt ls -t ticket "%s" | grep -v "No matching" | wc -l  """ % query
+               print cmd
+               ret = popen(cmd)
+               print d1_s, ",", ret[:-1]
+               d1=d2
+               i += 1
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/monitorstats.py b/statistics/monitorstats.py
new file mode 100644 (file)
index 0000000..8fc24d5
--- /dev/null
@@ -0,0 +1,80 @@
+
+from monitor import database
+from datetime import datetime, timedelta
+import os
+import glob
+import time
+
+from monitor import config
+
+def datetime_fromstr(str):
+       if '-' in str:
+               try:
+                       tup = time.strptime(str, "%Y-%m-%d")
+               except:
+                       tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+       elif '/' in str:
+               tup = time.strptime(str, "%m/%d/%Y")
+       else:
+               tup = time.strptime(str, "%m/%d/%Y")
+       ret = datetime.fromtimestamp(time.mktime(tup))
+       return ret
+
+def get_filefromglob(d, str, path="archive-pdb", returnlist=False):
+       # TODO: This is aweful.
+       startpath = os.getcwd()
+       os.chdir(config.MONITOR_SCRIPT_ROOT + "/" + path)
+
+       #archive = database.SPickle(path)
+       glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str)
+       fg_list = [ x[:-4] for x in glob.glob(glob_str) ]
+
+       os.chdir(startpath)
+
+       if returnlist:
+               return sorted(fg_list)
+       else:
+               return fg_list[0]
+
+def get_archive(path):
+       full_path = config.MONITOR_SCRIPT_ROOT + "/" + path
+       return database.SPickle(full_path)
+       
+def print_graph(data, begin, end, xaxis, offset=500, window=100):
+       s1=[]
+       s2=[]
+       s3=[]
+       for row in data:
+               s1.append(row[0])
+               s2.append(row[1])
+               s3.append(row[2])
+       
+       delta=offset
+       s1 = map(lambda x: x-delta, s1)
+       rlow= zip(s1,s3)
+       rhigh = zip(s1,s2)
+       diff_low  = map(lambda x: x[0]-x[1], rlow)
+       diff_high = map(lambda x: x[0]+x[1], rhigh)
+       s1 = map(lambda x: str(x), s1)
+       diff_low = map(lambda x: str(x), diff_low)
+       diff_high = map(lambda x: str(x), diff_high)
+       print s1
+       print diff_low
+       print diff_high
+       print "http://chart.apis.google.com/chart?cht=lc&chds=0,100&chxt=x,y&chxl=0:%s1:|500|550|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % (xaxis, ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high) )
+
+def get_xaxis(list, width=16, wide=False):
+       # 3 for odd
+       # 4 for even
+       # 5 for wide odd
+       # 6 for wide even
+       list_len = len(list)
+       if list_len == 0: return "||"
+
+       is_even = list_len % 2 == 0
+       #if is_even:
+       #       xaxis = "|" + list[0][:width] + "|" + list[-1][:width] + "|"
+       #else:
+       xaxis = "|" + list[0][:width] + "|" + list[list_len/2 + 1][:width] + "|" + list[-1][:width] + "|"
+       return xaxis
+
diff --git a/statistics/nodebad.py b/statistics/nodebad.py
new file mode 100755 (executable)
index 0000000..eec69be
--- /dev/null
@@ -0,0 +1,152 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+
+
+from monitor import database
+from nodequeryold import verify,query_to_dict,node_select
+from monitor.common import *
+
+from monitor.wrapper import plc
+api = plc.getAuthAPI()
+from monitor.model import *
+
+round = 1
+externalState = {'round': round, 'nodes': {}}
+count = 0
+
+def main(config):
+       global externalState
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               externalState['round'] += 1
+
+       #l_nodes = syncplcdb.create_plcdb()
+       l_plcnodes = database.dbLoad("l_plcnodes")
+
+       l_nodes = get_nodeset(config)
+       #if config.node:
+       #       l_nodes = [config.node]
+       ##else:
+       #       l_nodes = [node['hostname'] for node in l_plcnodes]
+       
+       checkAndRecordState(l_nodes, l_plcnodes)
+
+def checkAndRecordState(l_nodes, l_plcnodes):
+       global externalState
+       global count
+       global_round = externalState['round']
+
+       for nodename in l_nodes:
+               if nodename not in externalState['nodes']:
+                       externalState['nodes'][nodename] = {'round': 0, 'values': []}
+
+               node_round   = externalState['nodes'][nodename]['round']
+               if node_round < global_round:
+                       # do work
+                       values = collectStatusAndState(nodename, l_plcnodes)
+                       global_round = externalState['round']
+                       externalState['nodes'][nodename]['values'] = values
+                       externalState['nodes'][nodename]['round'] = global_round
+               else:
+                       count += 1
+
+               if count % 20 == 0:
+                       database.dbDump(config.dbname, externalState)
+
+       database.dbDump(config.dbname, externalState)
+
+fb = database.dbLoad('findbad')
+
+def getnodesup(nodelist):
+       up = 0
+       for node in nodelist:
+               if node['hostname'] in fb['nodes'].keys():
+                       try:
+                               if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
+                                       up = up + 1
+                       except:
+                               pass
+       return up
+
+def get(fb, path):
+       indexes = path.split("/")
+       values = fb
+       for index in indexes:
+               if index in values:
+                       values = values[index]
+               else:
+                       return None
+       return values
+
+def collectStatusAndState(nodename, l_plcnodes):
+       global count
+
+       d_node = None
+       for node in l_plcnodes:
+               if node['hostname'] == nodename:
+                       d_node = node
+                       break
+       if not d_node:
+               return None
+
+       pf = PersistFlags(nodename, 1, db='node_persistflags')
+
+       if not pf.checkattr('last_changed'):
+               pf.last_changed = time.time()
+               
+       pf.last_checked = time.time()
+
+       if not pf.checkattr('status'):
+               pf.status = "unknown"
+
+       state_path     = "nodes/" + nodename + "/values/state"
+       bootstate_path = "nodes/" + nodename + "/values/plcnode/boot_state"
+
+       if get(fb, state_path) == "BOOT":
+               if pf.status != "good": pf.last_changed = time.time()
+               pf.status = "good"
+       elif get(fb, state_path)  == "DEBUG":
+               bs = get(fb, bootstate_path)
+               if pf.status != bs: pf.last_changed = time.time()
+               pf.status = bs
+       else:
+               if pf.status != "down": pf.last_changed = time.time()
+               pf.status = "down"
+
+       count += 1
+       print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(pf.last_changed))
+       # updated by other modules
+       #pf.enabled = 
+       #pf.suspended = 
+
+       pf.save()
+
+       return True
+
+if __name__ == '__main__':
+       import parser as parsermodule
+       parser = parsermodule.getParser(['nodesets'])
+       parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, 
+                                               increment=False, dbname="nodebad", cachenodes=False)
+       
+       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
+                                               help="Specify the name of the database to which the information is saved")
+       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
+                                               help="Increment round number to force refresh or retry")
+       parser = parsermodule.getParser(['defaults'], parser)
+       config = parsermodule.parse_args(parser)
+
+       try:
+               main(config)
+       except Exception, err:
+               import traceback
+               print traceback.print_exc()
+               print "Exception: %s" % err
+               print "Saving data... exitting."
+               database.dbDump(config.dbname, externalState)
+               sys.exit(0)
diff --git a/statistics/nodecommon.py b/statistics/nodecommon.py
new file mode 100644 (file)
index 0000000..042f80f
--- /dev/null
@@ -0,0 +1,200 @@
+
+import struct
+import time
+from monitor.util import file
+from monitor.wrapper import plc
+from datetime import datetime 
+from monitor import database
+esc = struct.pack('i', 27)
+RED    = esc + "[1;31m"
+GREEN  = esc + "[1;32m"
+YELLOW = esc + "[1;33m"
+BLUE   = esc + "[1;34m"
+LIGHTBLUE      = esc + "[1;36m"
+NORMAL  = esc + "[0;39m"
+
+def red(str):
+       return RED + str + NORMAL
+
+def yellow(str):
+       return YELLOW + str + NORMAL
+
+def green(str):
+       return GREEN + str + NORMAL
+
+def lightblue(str):
+       return LIGHTBLUE + str + NORMAL
+
+def blue(str):
+       return BLUE + str + NORMAL
+
+def get_current_state(fbnode):
+       if 'state' in fbnode:
+               state = fbnode['state']
+       else:
+               state = "none"
+       l = state.lower()
+       if l == "debug": l = 'dbg '
+       return l
+
+def color_pcu_state(fbnode):
+       import reboot
+
+       if 'plcnode' in fbnode and 'pcu_ids' in fbnode['plcnode'] and len(fbnode['plcnode']['pcu_ids']) > 0 :
+               values = reboot.get_pcu_values(fbnode['plcnode']['pcu_ids'][0])
+               if values == None:
+                       return fbnode['pcu']
+       else:
+               if 'pcu' not in fbnode:
+                       return 'NOPCU'
+               else:
+                       return fbnode['pcu']
+
+       if 'reboot' in values:
+               rb = values['reboot']
+               if rb == 0 or rb == "0":
+                       return fbnode['pcu'] + "OK  "
+                       #return fbnode['pcu'] + "OK  "
+                       #return green(fbnode['pcu'])
+               elif "NetDown" == rb  or "Not_Run" == rb:
+                       return fbnode['pcu'] + "DOWN"
+                       #return yellow(fbnode['pcu'])
+               else:
+                       return fbnode['pcu'] + "BAD "
+                       #return red(fbnode['pcu'])
+       else:
+               #return red(fbnode['pcu'])
+               return fbnode['pcu'] + "BAD "
+
+def color_boot_state(l):
+       if    l == "dbg": return yellow("debg")
+       elif  l == "dbg ": return yellow("debg")
+       elif  l == "diag": return lightblue(l)
+       elif  l == "disable": return red("dsbl")
+       elif  l == "down": return red(l)
+       elif  l == "boot": return green(l)
+       elif  l == "rins": return blue(l)
+       else:
+               return l
+
+def diff_time(timestamp, abstime=True):
+       import math
+       now = time.time()
+       if timestamp == None:
+               return "unknown"
+       if abstime:
+               diff = now - timestamp
+       else:
+               diff = timestamp
+       # return the number of seconds as a difference from current time.
+       t_str = ""
+       if diff < 60: # sec in min.
+               t = diff / 1
+               t_str = "%s sec ago" % int(math.ceil(t))
+       elif diff < 60*60: # sec in hour
+               t = diff / (60)
+               t_str = "%s min ago" % int(math.ceil(t))
+       elif diff < 60*60*24: # sec in day
+               t = diff / (60*60)
+               t_str = "%s hrs ago" % int(math.ceil(t))
+       elif diff < 60*60*24*14: # sec in week
+               t = diff / (60*60*24)
+               t_str = "%s days ago" % int(math.ceil(t))
+       elif diff <= 60*60*24*30: # approx sec in month
+               t = diff / (60*60*24*7)
+               t_str = "%s wks ago" % int(math.ceil(t))
+       elif diff > 60*60*24*30: # approx sec in month
+               t = diff / (60*60*24*30)
+               t_str = "%s mnths ago" % int(t)
+       return t_str
+
+def getvalue(fb, path):
+    indexes = path.split("/")
+    values = fb
+    for index in indexes:
+        if index in values:
+            values = values[index]
+        else:
+            return None
+    return values
+
+def nodegroup_display(node, fb, conf=None):
+       from unified_model import PersistFlags
+       if node['hostname'] in fb['nodes']:
+               node['current'] = get_current_state(fb['nodes'][node['hostname']]['values'])
+       else:
+               node['current'] = 'none'
+
+       if fb['nodes'][node['hostname']]['values'] == []:
+               return ""
+
+       s = fb['nodes'][node['hostname']]['values']['kernel'].split()
+       if len(s) >=3:
+               node['kernel'] = s[2]
+       else:
+               node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel']
+               
+       if '2.6' not in node['kernel']: node['kernel'] = ""
+       if conf and not conf.nocolor:
+           node['boot_state']  = color_boot_state(node['boot_state'])
+           node['current']     = color_boot_state(node['current'])
+       #node['boot_state']     = node['boot_state']
+       #node['current']        = node['current']
+       node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu']
+       node['lastupdate'] = diff_time(node['last_contact'])
+       pf = PersistFlags(node['hostname'], 1, db='node_persistflags')
+       try:
+               node['lc'] = diff_time(pf.last_changed)
+       except:
+               node['lc'] = "err"
+       ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime']
+       if ut != "null":
+               ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False)
+       node['uptime'] = ut
+
+       return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node
+
+def datetime_fromstr(str):
+       if '-' in str:
+               try:
+                       tup = time.strptime(str, "%Y-%m-%d")
+               except:
+                       tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+       elif '/' in str:
+               tup = time.strptime(str, "%m/%d/%Y")
+       else:
+               tup = time.strptime(str, "%m/%d/%Y")
+       ret = datetime.fromtimestamp(time.mktime(tup))
+       return ret
+
+def get_nodeset(config):
+       """
+               Given the config values passed in, return the set of hostnames that it
+               evaluates to.
+       """
+       api = plc.getAuthAPI()
+       l_nodes = database.dbLoad("l_plcnodes")
+
+       if config.nodelist:
+               f_nodes = util.file.getListFromFile(config.nodelist)
+               l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
+       elif config.node:
+               f_nodes = [config.node]
+               l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
+       elif config.nodegroup:
+               ng = api.GetNodeGroups({'name' : config.nodegroup})
+               l_nodes = api.GetNodes(ng[0]['node_ids'], ['hostname'])
+       elif config.site:
+               site = api.GetSites(config.site)
+               l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname'])
+               
+       l_nodes = [node['hostname'] for node in l_nodes]
+
+       # perform this query after the above options, so that the filter above
+       # does not break.
+       if config.nodeselect:
+               fb = database.dbLoad("findbad")
+               l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb)
+
+       return l_nodes
+       
diff --git a/statistics/nodediff-graph-better.py b/statistics/nodediff-graph-better.py
new file mode 100755 (executable)
index 0000000..68e14e1
--- /dev/null
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+#from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file, select=None):
+       fb = arch.load(file)
+
+       nodelist = fb['nodes'].keys()
+       nodelist = node_select(select, nodelist, fb)
+       return nodelist
+
+def print_nodelist(nodelist, file):
+       for node in nodelist:
+               if file:
+                       print >>file, node
+               else:
+                       print node
+       
+
+def main():
+       parser = parsermodule.getParser()
+       parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None, select2=None)
+       parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--select", dest="select", metavar="key",
+                                               help="Select .")
+       parser.add_option("", "--select2", dest="select2", metavar="key",
+                                               help="Select .")
+       parser.add_option("", "--print", dest="printnodes", action="store_true",
+                                               help="print the nodes that have come up or down.")
+       parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+                                               help="Specify a ending date at which queries end.")
+       config = parsermodule.parse_args(parser)
+       archive = get_archive(config.archivedir)
+
+       if not config.begin or not config.end:
+               print parsermodule.usage(parser)
+               sys.exit(1)
+
+       tdelta = timedelta(1)
+       d_s1 = datetime_fromstr(config.begin)
+       d_s2 = datetime_fromstr(config.begin) + tdelta
+       d_end = datetime_fromstr(config.end)
+
+       print d_s1
+       print d_s2
+       print d_end
+
+       data = []
+       while d_end > d_s2:
+
+               try:
+                       f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+                       f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+               except:
+                       timestr = d_s2.strftime("%Y-%m-%d")
+                       print timestr, ",", 0, ",", 0
+                       d_s1 = d_s2
+                       d_s2 = d_s1 + tdelta
+                       continue
+
+               s1 = set(nodes_from_time(archive, f_s1, config.select))
+               s2 = set(nodes_from_time(archive, f_s2, config.select))
+               s3 = set(nodes_from_time(archive, f_s2, config.select2))
+
+
+               timestr = d_s2.strftime("%Y-%m-%d")
+               print timestr, ",", len(s2),",",  len(s3)
+               if not config.printnodes:
+               #       f_up = open("up-%s" % timestr, 'w')
+               #       f_down = open("dn-%s" % timestr, 'w')
+                       f_up = None
+                       f_down = None
+                       pass
+               else:
+                       print "%s nodes up" % len(s2-s1)
+                       print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+                       f_up = None
+                       f_down = None
+
+               #print_nodelist(s2-s1, f_up)
+
+               if config.printnodes:
+                       print ""
+                       print "%s nodes down" % len(s1-s2)
+                       print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+               #print_nodelist(s1-s2, f_down)
+               if not config.printnodes:
+                       if f_up: f_up.close()
+                       if f_up: f_down.close()
+
+               d_s1 = d_s2
+               d_s2 = d_s1 + tdelta
+       
+       s1=[]
+       s2=[]
+       s3=[]
+       for row in data:
+               s1.append(row[0])
+               s2.append(row[1])
+               s3.append(row[2])
+       
+       s1 = map(lambda x: x-500, s1)
+       rlow= zip(s1,s3)
+       rhigh = zip(s1,s2)
+       diff_low  = map(lambda x: x[0]-x[1], rlow)
+       diff_high = map(lambda x: x[0]+x[1], rhigh)
+       s1 = map(lambda x: str(x), s1)
+       diff_low = map(lambda x: str(x), diff_low)
+       diff_high = map(lambda x: str(x), diff_high)
+       print s1
+       print diff_low
+       print diff_high
+       print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high)
+
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/nodediff-graph.py b/statistics/nodediff-graph.py
new file mode 100644 (file)
index 0000000..e5f9d39
--- /dev/null
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+       fb = arch.load(file)
+
+       nodelist = fb['nodes'].keys()
+       nodelist = node_select(config.select, nodelist, fb)
+       return nodelist
+       
+
+def main():
+       parser = parsermodule.getParser()
+       parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, select=None)
+       parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--select", dest="select", metavar="key",
+                                               help="Select .")
+       parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+                                               help="Specify a ending date at which queries end.")
+       config = parsermodule.parse_args(parser)
+       archive = get_archive(config.archivedir)
+
+       if not config.begin or not config.end:
+               print parsermodule.usage(parser)
+               sys.exit(1)
+
+       tdelta = timedelta(1)
+       d_s1 = datetime_fromstr(config.begin)
+       d_s2 = datetime_fromstr(config.begin) + tdelta
+       d_end   = datetime_fromstr(config.end)
+
+       data = []
+       while d_end > d_s2:
+
+               f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+               f_s2   = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+
+               s1 = set(nodes_from_time(archive, f_s1))
+               s2 = set(nodes_from_time(archive, f_s2))
+
+               print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+               data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+               #print "len s2 : ", len(s2)
+               #print "len s1 : ", len(s1)
+               #print "%s nodes up" % len(s2-s1)
+               #print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+               #for node in s2 - s1: print node
+               #print ""
+               #print "%s nodes down" % len(s1-s2)
+               #print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+       #       for node in s1 - s2: print node
+               d_s1 = d_s2
+               d_s2 = d_s1 + tdelta
+       
+       s1=[]
+       s2=[]
+       s3=[]
+       for row in data:
+               s1.append(row[0])
+               s2.append(row[1])
+               s3.append(row[2])
+       
+       s1 = map(lambda x: x-500, s1)
+       rlow= zip(s1,s3)
+       rhigh = zip(s1,s2)
+       diff_low  = map(lambda x: x[0]-x[1], rlow)
+       diff_high = map(lambda x: x[0]+x[1], rhigh)
+       s1 = map(lambda x: str(x), s1)
+       diff_low = map(lambda x: str(x), diff_low)
+       diff_high = map(lambda x: str(x), diff_high)
+       print s1
+       print diff_low
+       print diff_high
+       print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high)
+
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/nodediff-length.py b/statistics/nodediff-length.py
new file mode 100755 (executable)
index 0000000..f2e468f
--- /dev/null
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+       fb = arch.load(file)
+
+       nodelist = fb['nodes'].keys()
+       nodelist = node_select(config.select, nodelist, fb)
+       return nodelist
+
+def print_nodelist(nodelist, file):
+       for node in nodelist:
+               if file:
+                       print >>file, node
+               else:
+                       print node
+       
+
+def main():
+       parser = parsermodule.getParser()
+       parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None)
+       parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--select", dest="select", metavar="key",
+                                               help="Select .")
+       parser.add_option("", "--print", dest="printnodes", action="store_true",
+                                               help="print the nodes that have come up or down.")
+       parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+                                               help="Specify a ending date at which queries end.")
+       config = parsermodule.parse_args(parser)
+       archive = get_archive(config.archivedir)
+
+       if not config.begin or not config.end:
+               print parsermodule.usage(parser)
+               sys.exit(1)
+
+       tdelta = timedelta(1)
+       d_s1 = datetime_fromstr(config.begin)
+       d_s2 = datetime_fromstr(config.end)
+       d_end = d_s2
+
+       print d_s1
+       print d_s2
+       print d_end
+
+       data = []
+       while d_end >= d_s2:
+
+               f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+               f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+
+               s1 = set(nodes_from_time(archive, f_s1))
+               s2 = set(nodes_from_time(archive, f_s2))
+
+               print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+               data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+               #print "len s2 : ", len(s2)
+               #print "len s1 : ", len(s1)
+               timestr = d_s2.strftime("%Y-%m-%d")
+               if not config.printnodes:
+                       f_up = open("up-%s" % timestr, 'w')
+                       f_down = open("dn-%s" % timestr, 'w')
+               else:
+                       print "%s nodes up" % len(s2-s1)
+                       print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+                       f_up = None
+                       f_down = None
+
+               print_nodelist(s2-s1, f_up)
+
+               if config.printnodes:
+                       print ""
+                       print "%s nodes down" % len(s1-s2)
+                       print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+               print_nodelist(s1-s2, f_down)
+               if not config.printnodes:
+                       f_up.close()
+                       f_down.close()
+
+               d_s1 = d_s2
+               d_s2 = d_s1 + tdelta
+       
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/nodediff.py b/statistics/nodediff.py
new file mode 100755 (executable)
index 0000000..7e6674d
--- /dev/null
@@ -0,0 +1,128 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+       fb = arch.load(file)
+
+       nodelist = fb['nodes'].keys()
+       nodelist = node_select(config.select, nodelist, fb)
+       del fb
+       return nodelist
+
+def print_nodelist(nodelist, file):
+       for node in nodelist:
+               if file:
+                       print >>file, node
+               else:
+                       print node
+       
+def main():
+       parser = parsermodule.getParser()
+       parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, 
+                                               sequential=False, printnodes=False, select=None)
+
+       parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+                                               help="Pickle file aggregate output.")
+       parser.add_option("", "--select", dest="select", metavar="key",
+                                               help="Select .")
+       parser.add_option("", "--sequential", dest="sequential", action="store_true",
+                                               help="Compare EVERY timestep between begin and end .")
+       parser.add_option("", "--print", dest="printnodes", action="store_true",
+                                               help="print the nodes that have come up or down.")
+       parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+                                               help="Specify a starting date from which to begin the query.")
+       parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+                                               help="Specify a ending date at which queries end.")
+
+       config = parsermodule.parse_args(parser)
+       archive = get_archive(config.archivedir)
+
+       if not config.begin or not config.end:
+               print parsermodule.usage(parser)
+               sys.exit(1)
+
+       tdelta = timedelta(1)
+       d_s1 = datetime_fromstr(config.begin)
+       d_s2 = datetime_fromstr(config.begin) + tdelta
+       d_end = datetime_fromstr(config.end)
+
+       print d_s1
+       print d_s2
+       print d_end
+
+       file_list = []
+               # then the iterations are day-based.
+       while d_end > d_s2:
+               f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir, True)
+               if not config.sequential:
+                       file_list.append(f_s1)
+               else:
+                       file_list += f_s1
+                       
+               d_s1 = d_s2
+               d_s2 = d_s1 + tdelta
+               
+       print file_list
+       file_list = file_list[4:]
+
+       xaxis = get_xaxis(file_list)
+
+       data = []
+       f_s1 = None
+       f_s2 = None
+       i = 0
+       for file in file_list:
+
+               i+=1
+               f_s2 = file
+               if f_s1 is None:
+                       f_s1 = f_s2
+                       continue
+
+               s1 = set(nodes_from_time(archive, f_s1))
+               s2 = set(nodes_from_time(archive, f_s2))
+
+               print f_s1
+               print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+               data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+               print "%s nodes up" % len(s2-s1)
+               print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+               f_up = None
+               f_down = None
+
+               if config.printnodes:
+                       print_nodelist(s2-s1, f_up)
+
+               print ""
+               print "%s nodes down" % len(s1-s2)
+               print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+               if config.printnodes:
+                       print_nodelist(s1-s2, f_down)
+
+               f_s1 = f_s2
+               f_s2 = None
+
+       print_graph(data, config.begin, config.end, xaxis)
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+       main()
diff --git a/statistics/nodequeryold.py b/statistics/nodequeryold.py
new file mode 100755 (executable)
index 0000000..a2aba4b
--- /dev/null
@@ -0,0 +1,216 @@
+#!/usr/bin/python
+
+import sys
+from nodecommon import *
+import glob
+import os
+from monitor.util import file
+
+import time
+import re
+
+#fb = {}
+fb = None
+fbpcu = None
+
+class NoKeyException(Exception): pass
+
+def fb_print_nodeinfo(fbnode, hostname, fields=None):
+       fbnode['hostname'] = hostname
+       fbnode['checked'] = diff_time(fbnode['checked'])
+       if fbnode['bootcd']:
+               fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
+       else:
+               fbnode['bootcd'] = "unknown"
+       fbnode['pcu'] = color_pcu_state(fbnode)
+
+       if not fields:
+               if 'ERROR' in fbnode['category']:
+                       fbnode['kernel'] = ""
+               else:
+                       fbnode['kernel'] = fbnode['kernel'].split()[2]
+               fbnode['boot_state'] = fbnode['plcnode']['boot_state']
+
+               try:
+                       if len(fbnode['nodegroups']) > 0:
+                               fbnode['category'] = fbnode['nodegroups'][0]
+               except:
+                       #print "ERROR!!!!!!!!!!!!!!!!!!!!!"
+                       pass
+
+               print "%(hostname)-45s | %(checked)11.11s | %(boot_state)5.5s| %(state)8.8s | %(ssh)5.5s | %(pcu)6.6s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+       else:
+               format = ""
+               for f in fields:
+                       format += "%%(%s)s " % f
+               print format % fbnode
+
+def get(fb, path):
+    indexes = path.split("/")
+    values = fb
+    for index in indexes:
+        if index in values:
+            values = values[index]
+        else:
+            raise NoKeyException(index)
+    return values
+
+def verifyType(constraints, data):
+       """
+               constraints is a list of key, value pairs.
+               # [ {... : ...}==AND , ... , ... , ] == OR
+       """
+       con_or_true = False
+       for con in constraints:
+               #print "con: %s" % con
+               if len(con.keys()) == 0:
+                       con_and_true = False
+               else:
+                       con_and_true = True
+
+               for key in con.keys():
+                       #print "looking at key: %s" % key
+                       if data is None:
+                               con_and_true = False
+                               break
+
+                       try:
+                               get(data,key)
+                               o = con[key]
+                               if o.name() == "Match":
+                                       if get(data,key) is not None:
+                                               value_re = re.compile(o.value)
+                                               con_and_true = con_and_true & (value_re.search(get(data,key)) is not None)
+                                       else:
+                                               con_and_true = False
+                               elif o.name() == "ListMatch":
+                                       if get(data,key) is not None:
+                                               match = False
+                                               for listitem in get(data,key):
+                                                       value_re = re.compile(o.value)
+                                                       if value_re.search(listitem) is not None:
+                                                               match = True
+                                                               break
+                                               con_and_true = con_and_true & match
+                                       else:
+                                               con_and_true = False
+                               elif o.name() == "Is":
+                                       con_and_true = con_and_true & (get(data,key) == o.value)
+                               elif o.name() == "FilledIn":
+                                       con_and_true = con_and_true & (len(get(data,key)) > 0)
+                               elif o.name() == "PortOpen":
+                                       if get(data,key) is not None:
+                                               v = get(data,key)
+                                               con_and_true = con_and_true & (v[str(o.value)] == "open")
+                                       else:
+                                               con_and_true = False
+                               else:
+                                       value_re = re.compile(o.value)
+                                       con_and_true = con_and_true & (value_re.search(get(data,key)) is not None)
+
+                       except NoKeyException, key:
+                               print "missing key %s" % key,
+                               pass
+                               #print "missing key %s" % key
+                               #con_and_true = False
+
+               con_or_true = con_or_true | con_and_true
+
+       return con_or_true
+
+def verify(constraints, data):
+       """
+               constraints is a list of key, value pairs.
+               # [ {... : ...}==AND , ... , ... , ] == OR
+       """
+       con_or_true = False
+       for con in constraints:
+               #print "con: %s" % con
+               if len(con.keys()) == 0:
+                       con_and_true = False
+               else:
+                       con_and_true = True
+
+               for key in con.keys():
+                       #print "looking at key: %s" % key
+                       if key in data: 
+                               value_re = re.compile(con[key])
+                               if type([]) == type(data[key]):
+                                       local_or_true = False
+                                       for val in data[key]:
+                                               local_or_true = local_or_true | (value_re.search(val) is not None)
+                                       con_and_true = con_and_true & local_or_true
+                               else:
+                                       con_and_true = con_and_true & (value_re.search(data[key]) is not None)
+                       elif key not in data:
+                               print "missing key %s" % key,
+                               pass
+                               #print "missing key %s" % key
+                               #con_and_true = False
+
+               con_or_true = con_or_true | con_and_true
+
+       return con_or_true
+
+def query_to_dict(query):
+       
+       ad = []
+
+       or_queries = query.split('||')
+       for or_query in or_queries:
+               and_queries = or_query.split('&&')
+
+               d = {}
+
+               for and_query in and_queries:
+                       (key, value) = and_query.split('=')
+                       d[key] = value
+
+               ad.append(d)
+       
+       return ad
+
+def pcu_in(fbdata):
+       if 'plcnode' in fbdata:
+               if 'pcu_ids' in fbdata['plcnode']:
+                       if len(fbdata['plcnode']['pcu_ids']) > 0:
+                               return True
+       return False
+
+def node_select(str_query, nodelist=None, fbdb=None):
+       global fb
+
+       hostnames = []
+       if str_query is None: return hostnames
+
+       #print str_query
+       dict_query = query_to_dict(str_query)
+       #print dict_query
+
+       if fbdb is not None:
+               fb = fbdb
+
+       for node in fb['nodes'].keys():
+               if nodelist is not None: 
+                       if node not in nodelist: continue
+       
+               fb_nodeinfo  = fb['nodes'][node]['values']
+
+               if fb_nodeinfo == []:
+                       #print node, "has lost values"
+                       continue
+                       #sys.exit(1)
+               #fb_nodeinfo['pcu'] = color_pcu_state(fb_nodeinfo)
+               fb_nodeinfo['hostname'] = node
+               if 'plcnode' in fb_nodeinfo:
+                       fb_nodeinfo.update(fb_nodeinfo['plcnode'])
+
+               if verify(dict_query, fb_nodeinfo):
+                       #print node #fb_nodeinfo
+                       hostnames.append(node)
+               else:
+                       #print "NO MATCH", node
+                       pass
+       
+       return hostnames
+
diff --git a/statistics/parserpms.py b/statistics/parserpms.py
new file mode 100755 (executable)
index 0000000..6ca34b2
--- /dev/null
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+import sys
+import os
+import md5
+
+def list_to_md5(strlist):
+       digest = md5.new()
+       for f in strlist:
+               digest.update(f)
+
+       return digest.hexdigest()
+
+while True:
+       line = sys.stdin.readline()
+       if not line:
+               break
+       line = line.strip()
+       fields = line.split()
+       host = fields[1]
+       rpms = fields[2:]
+       rpms.sort()
+       if len(rpms) != 0:
+               sum = list_to_md5(rpms)
+               print sum, host
diff --git a/statistics/rtsurvey.py b/statistics/rtsurvey.py
new file mode 100755 (executable)
index 0000000..2f2babd
--- /dev/null
@@ -0,0 +1,226 @@
+#!/usr/bin/python
+
+import os, sys, shutil
+import MySQLdb
+import string
+
+import re
+
+import time 
+from datetime import datetime
+
+from monitor import config
+from monitor import database
+
+def convert_time(time_str):
+       if '-' in str:
+               try:
+                       tup = time.strptime(str, "%Y-%m-%d %H:%M:%S")
+               except:
+                       tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+       elif '/' in str:
+               tup = time.strptime(str, "%m/%d/%Y")
+       else:
+               tup = time.strptime(str, "%m/%d/%Y")
+       d_ret = datetime.fromtimestamp(time.mktime(tup))
+       return d_ret
+
+def open_rt_db():
+
+       try:
+               rt_db = MySQLdb.connect(host=config.RT_DB_HOST,
+                                                               user=config.RT_DB_USER,
+                                                               passwd=config.RT_DB_PASSWORD,
+                                                               db=config.RT_DB_NAME)
+       except Exception, err:
+               print "Failed to connect to RT database: %s" %err
+               return -1
+
+       return rt_db
+
+def fetch_from_db(db, sql):
+       try:
+               # create a 'cursor' (required by MySQLdb)
+               c = db.cursor()
+               c.execute(sql)
+       except Exception, err:
+               print "Could not execute RT query %s" %err
+               return -1
+
+       # fetch all rows (list of lists)
+       raw = c.fetchall()
+       return raw
+       
+
+def get_rt_tickets():
+       print "open db connection"
+       db = open_rt_db()
+       if db == -1:
+               return ""
+
+       sql = """SELECT tk.id, tk.Queue, tr.Type, tr.Field, tr.OldValue, tr.NewValue, 
+                    tr.Created, at.id, at.Subject, at.Content
+             FROM Tickets as tk, Transactions as tr 
+             LEFT OUTER JOIN Attachments as at ON tr.id=at.TransactionId 
+             WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>10000  """
+
+       print "run query"
+       raw = fetch_from_db(db, sql)
+       if raw == -1:
+               return raw
+       
+       tickets = {}
+       subject_map = {}
+       def parse_ticket(x):
+               ticket_id = int(x[0])
+               queue = int(x[1])
+               trtype = str(x[2])
+               field = x[3]
+               oldvalue = x[4]
+               newvalue = x[5]
+               datecreated = x[6]              # already a datetime object
+               attachmentid = x[7]
+               subject = x[8]
+               content = x[9]
+
+               if ticket_id not in tickets:
+                       print "found new ticket_id", ticket_id
+                       tickets[ticket_id] = {'queue' : queue,
+                                                                 'transactions' : [] }
+
+               if subject != "":
+                       subject_map[ticket_id] = subject
+               elif ticket_id in subject_map:
+                       subject = subject_map[ticket_id]
+               else:
+                       # subject == "" and no record in subject_map yet
+                       # should probably put on a queue to be processed later.
+                       print "no subject for %s" % ticket_id
+
+               transaction = {
+                                       'type' : trtype,
+                                       'field' : field,
+                                       'oldvalue' : oldvalue,
+                                       'newvalue' : newvalue,
+                                       'datecreated' : datecreated,
+                                       'attachmentid' : attachmentid,
+                                       'subject' : subject,
+                                       'content' : content,
+                                               }
+               tickets[ticket_id]['transactions'].append(transaction)
+               
+
+       print "sort data"
+       list = map(parse_ticket, raw)
+
+       # map(lambda x: { "email":str(x[4]), "lastupdated":str(x[5]), "owner":str(x[7]), }, raw)
+
+       db.close()
+
+
+       return tickets
+
+
+# flow chart:
+#              classify:
+#                      for each ticket
+#                              classify into category
+#                              remove from ticket set, add to classified-set
+#              
+#              add new search patterns, 
+#              re-run classify algorithm
+
+re_map = [
+       #('mom', {'pattern' : '.*pl_mom.*'}),
+       #('technical-support', {'pattern' : '.*PlanetLab node.* down'}),
+       #('technical-support', {'pattern' : 'Node .* was stopped by'}),  # and opened
+       #('technical-support', {'pattern' : 'bootcd|BootCD|bootCD|boot cd|boot CD|booting'}),
+       #('technical-support', {'pattern' : '.* failed to authenticate'}),
+       #('technical-support', {'pattern' : '.* fails to boot'}),
+       #('technical-support', {'pattern' : '.* fail.* to boot'}),
+       #('technical-support', {'pattern' : '.* failed to authenticate'}),
+       #('technical-support', {'pattern' : 'curl (60)|.* CA certificates.*|peer certificate.*authenticated'}),
+       #('technical-support', {'pattern' : '(usb|USB).*(key|Disk|stick|boot|help|problem|trouble)'}), 
+       #('complaint', {'pattern' : '.*omplaint|.*attack'}),
+       #('complaint', {'pattern' : '.* stop .*'}), # and subject
+       #('spam', {}),j
+       #('user-support', {'pattern' : '(R|r)egistration|(R|r)egister'}),
+       #('user-support', {'pattern' : 'password reset|reset password'}),
+       ('user-support', {'pattern' : 'New PI account registration from'}),
+       #('other', {}),
+]
+
+def sort_tickets(tickets, re_map):
+
+       ticket_count = len(tickets.keys())
+       marked_subject = 0
+       marked_content = 0
+       for ticket_id in sorted(tickets.keys()):
+               for i,(name, pattern) in enumerate(re_map):
+                       if 'compile' not in pattern:
+                               pattern['compile'] = re.compile(pattern['pattern'])
+                       pat = pattern['compile']
+                       for transaction in tickets[ticket_id]['transactions']:
+
+                               try:
+                                       if transaction['subject'] and re.match(pat, transaction['subject']):
+                                               print "ticket %s matches pattern %s: %s" % (ticket_id, 
+                                                               pattern['pattern'], transaction['subject'])
+                                               marked_subject += 1
+                                               break
+                                       if transaction['content'] and re.match(pat, transaction['content']):
+                                               print "ticket %s matches pattern %s: %s" % (ticket_id, 
+                                                               pattern['pattern'], transaction['subject'])
+                                               #if transaction['subject'] == "":
+                                               #       print transaction
+                                               marked_content += 1
+                                               break
+                               except:
+                                       import traceback
+                                       print traceback.print_exc()
+                                       print transaction
+                                       print ticket_id
+                                       print pattern
+                                       sys.exit(1)
+
+       print ticket_count
+       print marked_subject
+       print marked_content
+       print ticket_count - marked_content - marked_content
+
+def main():
+       from optparse import OptionParser
+       parser = OptionParser()
+
+       parser.set_defaults(runsql=False,)
+
+       parser.add_option("", "--runsql", dest="runsql", action="store_true",
+                                               help="Whether to collect data from the MySQL server before "+
+                                                       "caching it, or to just use the previously collected data.")
+
+       (config, args) = parser.parse_args()
+       if len(sys.argv) == 1:
+               parser.print_help()
+               sys.exit(1)
+
+       for i,(name, pattern) in enumerate(re_map):
+               print i, name
+
+       if config.runsql:
+               tickets = get_rt_tickets()
+               database.dbDump("survey_tickets", tickets)
+       else:
+               print "loading"
+               tickets = database.dbLoad("survey_tickets")
+       print tickets[42171]['transactions'][0]
+
+       sort_tickets(tickets, re_map)
+
+       # for each ticket id
+       #       scan for known keywords and sort into classes
+       #       record assigned class
+
+       # review all tickets that remain
+
+if __name__ == '__main__':
+       main()
diff --git a/statistics/sliceavg.py b/statistics/sliceavg.py
new file mode 100755 (executable)
index 0000000..739814f
--- /dev/null
@@ -0,0 +1,45 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+from monitor.wrapper import plc
+
+api = plc.cacheapi
+api.AuthCheck()
+
+# for each site, find total number of assigned slivers
+# find median, high, low, average
+
+site_list = []
+
+for site in api.GetSites({'peer_id': None}):
+       sl = api.GetSlices(site['slice_ids'])
+       sliver_cnt = 0
+       for slice in sl:
+               sliver_cnt += len(slice['node_ids'])
+       val = (site['login_base'], sliver_cnt)
+       site_list.append(val)
+       #print val
+
+print "------------------------------------------"
+site_list.sort(lambda x,y: cmp(y[1], x[1]))
+totals = 0
+use_count = 0
+for i in site_list:
+       if i[1] != 0: 
+               print "%10s %s" % i
+               use_count += 1
+       totals += i[1]
+
+site_avg = totals/len(site_list)
+
+print "high: %s %s" % site_list[0]
+print "low: %s %s" % site_list[-1]
+print "median: %s %s" % site_list[len(site_list)/2]
+print "used median: %s %s" % site_list[use_count/2]
+print "all avg: %s" % site_avg
+print "used avg: %s" % (totals/use_count)
+print "totals: %s" % totals 
+print "use_count: %s" % use_count
+print "site_count: %s" % len(site_list)
diff --git a/statistics/usedtickets.py b/statistics/usedtickets.py
new file mode 100755 (executable)
index 0000000..b3ab662
--- /dev/null
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+
+from monitor.model import *
+
+sql = database.dbLoad("idTickets")
+for db in ["monitor", "pcu", "bootcd", "hardware", "unknown", 
+                 "suspect", "baddns", "nodenet", "nodeid"]:
+       db = "%s_persistmessages" % db
+       #print db
+       try:
+               pm = database.dbLoad(db)
+       except:
+               continue
+       for host in pm.keys():
+               m = pm[host]
+               id = str(m.ticket_id)
+               if m.ticket_id > 0:
+                       if id in sql:
+                               print "%s %6s %s" % (m.ticket_id, sql[id]['status'], host)
+                       else:
+                               print "%s closed %s" % ( m.ticket_id, host)