From 8814d16dc60e8027c9a30963b47dd77b54efcdbf Mon Sep 17 00:00:00 2001
From: Stephen Soltesz <soltesz@cs.princeton.edu>
Date: Fri, 12 Jun 2009 16:27:07 +0000
Subject: [PATCH] add scrappy statistics gathering scripts

---
 statistics/aggregate-nm.py          | 103 +++++++++++++
 statistics/aggregate.py             |  39 +++++
 statistics/aggregatehistory.py      | 133 ++++++++++++++++
 statistics/correspondence.py        |  33 ++++
 statistics/harvestrt.py             |  46 ++++++
 statistics/monitorstats.py          |  80 ++++++++++
 statistics/nodebad.py               | 152 +++++++++++++++++++
 statistics/nodecommon.py            | 200 ++++++++++++++++++++++++
 statistics/nodediff-graph-better.py | 138 +++++++++++++++++
 statistics/nodediff-graph.py        | 100 ++++++++++++
 statistics/nodediff-length.py       | 107 +++++++++++++
 statistics/nodediff.py              | 128 ++++++++++++++++
 statistics/nodequeryold.py          | 216 ++++++++++++++++++++++++++
 statistics/parserpms.py             |  25 +++
 statistics/rtsurvey.py              | 226 ++++++++++++++++++++++++++++
 statistics/sliceavg.py              |  45 ++++++
 statistics/usedtickets.py           |  21 +++
 17 files changed, 1792 insertions(+)
 create mode 100755 statistics/aggregate-nm.py
 create mode 100755 statistics/aggregate.py
 create mode 100755 statistics/aggregatehistory.py
 create mode 100644 statistics/correspondence.py
 create mode 100755 statistics/harvestrt.py
 create mode 100644 statistics/monitorstats.py
 create mode 100755 statistics/nodebad.py
 create mode 100644 statistics/nodecommon.py
 create mode 100755 statistics/nodediff-graph-better.py
 create mode 100644 statistics/nodediff-graph.py
 create mode 100755 statistics/nodediff-length.py
 create mode 100755 statistics/nodediff.py
 create mode 100755 statistics/nodequeryold.py
 create mode 100755 statistics/parserpms.py
 create mode 100755 statistics/rtsurvey.py
 create mode 100755 statistics/sliceavg.py
 create mode 100755 statistics/usedtickets.py

diff --git a/statistics/aggregate-nm.py b/statistics/aggregate-nm.py
new file mode 100755
index 0000000..70a8574
--- /dev/null
+++ b/statistics/aggregate-nm.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python
+
+from monitor.wrapper import plc
+api = plc.getAuthAPI()
+
+from monitor import database
+import time
+from datetime import datetime, timedelta
+import calendar
+
+import sys
+import time
+from monitor.model import *
+
+from monitorstats import *
+
+
+def main():
+	from monitor import parser as parsermodule
+
+	parser = parsermodule.getParser()
+	parser.set_defaults(node=None, aggname='aggregatenm', archivedir='archive-pdb', field='nm', value='Y', fromtime=None, load=False, state='BOOT')
+	parser.add_option("", "--node", dest="node", metavar="nodename.edu", 
+						help="A single node name to add to the nodegroup")
+	parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--aggname", dest="aggname", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--field", dest="field", metavar="key",
+						help="Which record field to extract from all files.")
+	parser.add_option("", "--value", dest="value", metavar="val",
+						help="Which value to look for in field.")
+	parser.add_option("", "--state", dest="state", metavar="key",
+						help="Which boot state to accept.")
+	parser.add_option("", "--load", action="store_true",
+						help="load aggregatenm rather than recreate it.")
+	parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	config = parsermodule.parse_args(parser)
+
+	archive = get_archive(config.archivedir)
+	agg = {}
+
+	if config.fromtime:
+		begin = config.fromtime
+	else:
+		begin = "2008-09-28"
+
+	d = datetime_fromstr(begin)
+	tdelta = timedelta(1)
+	verbose = 1
+
+	if not config.load:
+		while True:
+			file = get_filefromglob(d, "production.findbad", config.archivedir)
+			print archive.path
+			fb = archive.load(file)
+			try:
+				print "nodes: ", len(fb['nodes'])
+				state_count=0
+				for node in fb['nodes']:
+					fb_nodeinfo  = fb['nodes'][node]['values']
+					time = d.strftime("%Y-%m-%d")
+
+					if type(fb_nodeinfo) == type([]):
+						continue
+
+					if fb_nodeinfo['state'] != config.state:
+						continue
+					state_count += 1
+
+					if node not in agg:
+						agg[node] = { 'total' : 0, 'up' : 0}
+
+					agg[node]['total'] += 1
+					if fb_nodeinfo[config.field] == config.value:
+						agg[node]['up'] += 1
+				print "%s nodes in state %s" % ( state_count, config.state )
+
+				del fb
+				verbose = 0
+			except SystemExit:
+				sys.exit(1)
+			except KeyboardInterrupt:
+				sys.exit(1)
+			except:
+				import traceback; print traceback.print_exc()
+				print d.strftime("%Y-%m-%d"), "No record"
+
+			d = d + tdelta
+			if d > datetime.now(): break
+	else:
+		agg = database.dbLoad(config.aggname)
+	
+	for node in agg:
+		if agg[node]['total'] > 0:
+			if agg[node]['up'] != agg[node]['total']:
+				print "%s %s" %  (node, float(agg[node]['up']) / float(agg[node]['total']))
+
+	database.dbDump(config.aggname, agg)
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/aggregate.py b/statistics/aggregate.py
new file mode 100755
index 0000000..371a2b8
--- /dev/null
+++ b/statistics/aggregate.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+
+
+from monitor import database
+import time
+import sys
+
+actall = database.dbLoad("act_all_080825")
+agg = database.dbLoad("aggregatehistory")
+
+for node in actall.keys():
+	for record in actall[node]:
+		if 'date_created' in record:
+			t = record['date_created']
+		elif 'time' in record:
+			t = record['time']
+		else:
+			continue
+
+		acttime = time.strftime("%Y-%m-%d", time.localtime(t)) 
+
+		if acttime > '2007-11-06':
+			if 'noop' in record['action']:
+				if node in agg:
+					for ntime,state in agg[node]:
+						if state == 'BOOT':
+							if ntime > acttime:
+								if type(record['action']) == type([]):
+									action = record['action'][0]
+								else:
+									action = record['action']
+								print acttime, action, ntime, state, node
+
+				#print time.strftime("%Y-%m-%d", time.localtime(t)), record['action'], node
+
+#for node in agg:
+#	for ntime,state in agg[node]:
+#		if state == 'BOOT':
+#			print ntime, state, node
diff --git a/statistics/aggregatehistory.py b/statistics/aggregatehistory.py
new file mode 100755
index 0000000..588d24c
--- /dev/null
+++ b/statistics/aggregatehistory.py
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+
+import plc
+api = plc.getAuthAPI()
+
+import database
+import reboot
+import time
+from datetime import datetime, timedelta
+import calendar
+
+import sys
+import time
+from monitor.model import *
+from nodecommon import *
+
+def get_filefromglob(d, str):
+	import os
+	import glob
+	# TODO: This is aweful.
+	path = "archive-pdb"
+	archive = database.SPickle(path)
+	glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str)
+	os.chdir(path)
+	#print glob_str
+	file = glob.glob(glob_str)[0]
+	#print "loading %s" % file
+	os.chdir("..")
+	return file[:-4]
+	#fb = archive.load(file[:-4])
+
+
+def fb_print_nodeinfo(fbnode, verbose, date=None):
+	if verbose: print "              state |  ssh  |  pcu  | bootcd | category | kernel"
+	if 'checked' in fbnode:
+		print "%11.11s " % diff_time(fbnode['checked']),
+	else:
+		if date: print date,
+		else: print "Unknown",
+		
+	if fbnode['bootcd']:
+		fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
+	else:
+		fbnode['bootcd'] = "unknown"
+	fbnode['state'] = color_boot_state(get_current_state(fbnode))
+	if len(fbnode['kernel'].split()) >= 3:
+		fbnode['kernel'] = fbnode['kernel'].split()[2]
+	print "    %(state)5s | %(ssh)5.5s | %(pcu)5.5s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+
+def pcu_print_info(pcuinfo, hostname):
+	print "   Checked: ",
+	if 'checked' in pcuinfo:
+		print "%11.11s " % diff_time(pcuinfo['checked'])
+	else:
+		print "Unknown"
+
+	print "\t            user   |          password | port | hostname "
+	print "\t %17s | %17s | %4s | %30s | %s" % \
+		(pcuinfo['username'], pcuinfo['password'], 
+		 pcuinfo[hostname], reboot.pcu_name(pcuinfo), pcuinfo['model'])
+
+	if 'portstatus' in pcuinfo and pcuinfo['portstatus'] != {}:
+		if pcuinfo['portstatus']['22'] == "open":
+			print "\t ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no %s@%s" % (pcuinfo['username'], reboot.pcu_name(pcuinfo))
+		if pcuinfo['portstatus']['23'] == "open":
+			print "\t telnet %s" % (reboot.pcu_name(pcuinfo))
+		if pcuinfo['portstatus']['80'] == "open" or \
+			pcuinfo['portstatus']['443'] == "open":
+			print "\t http://%s" % (reboot.pcu_name(pcuinfo))
+		if pcuinfo['portstatus']['443'] == "open":
+			print "\t racadm.py -r %s -u %s -p '%s'" % (pcuinfo['ip'], pcuinfo['username'], pcuinfo['password'])
+			print "\t cmdhttps/locfg.pl -s %s -f iloxml/Reset_Server.xml -u %s -p '%s' | grep MESSAGE" % \
+				(reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
+
+agg = {}
+
+def main():
+	import parser as parsermodule
+
+	parser = parsermodule.getParser()
+	parser.set_defaults(node=None, fields='state', fromtime=None)
+	parser.add_option("", "--node", dest="node", metavar="nodename.edu", 
+						help="A single node name to add to the nodegroup")
+	parser.add_option("", "--fields", dest="fields", metavar="key",
+						help="Which record field to extract from all files.")
+	parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	config = parsermodule.parse_args(parser)
+
+	path = "archive-pdb"
+	archive = database.SPickle(path)
+
+	if config.fromtime:
+		begin = config.fromtime
+	else:
+		begin = "2007-11-06"
+
+	d = datetime_fromstr(begin)
+	tdelta = timedelta(1)
+	verbose = 1
+
+	while True:
+		try:
+			file = get_filefromglob(d, "production.findbad")
+			fb = archive.load(file)
+			for node in fb['nodes']:
+				fb_nodeinfo  = fb['nodes'][node]['values']
+				state = fb_nodeinfo['state']
+				time = d.strftime("%Y-%m-%d")
+				if node not in agg:
+					agg[node] = []
+				if len(agg[node]) == 0:
+					agg[node].append((time, state))
+				else:
+					oldtime = agg[node][-1][0]
+					oldstate = agg[node][-1][1]
+					if oldstate != state:
+						agg[node].append((time, state))
+			del fb
+			verbose = 0
+		except KeyboardInterrupt:
+			sys.exit(1)
+		except:
+			#import traceback; print traceback.print_exc()
+			print d.strftime("%Y-%m-%d"), "No record"
+
+		d = d + tdelta
+		if d > datetime.now(): break
+	
+	database.dbDump("aggregatehistory", agg)
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/correspondence.py b/statistics/correspondence.py
new file mode 100644
index 0000000..db9ad7b
--- /dev/null
+++ b/statistics/correspondence.py
@@ -0,0 +1,33 @@
+
+
+def dt_mod_range(dt, range=(60*60*24*7)):
+	t_stamp = time.mktime(dt.timetuple())
+	t_stamp -= (t_stamp % range)
+	dt_ret = datetime.datetime.fromtimestamp(t_stamp)
+	return dt_ret
+
+SUPPORT =3
+MONITOR =22
+
+weekly_bin = {}
+c = 0
+for ticket in tickets.keys():
+	if tickets[ticket]['queue'] != MONITOR: continue
+	for t in tickets[ticket]['transactions']:
+		if t['type'] == 'Correspond':
+			#print t['datecreated'], t['field'], t['oldvalue'], t['type'], t['newvalue'], t['subject']
+			k = dt_mod_range(t['datecreated'])
+			s_key = k.strftime("%Y-%m-%d")
+			if s_key not in weekly_bin: weekly_bin[s_key] = 0
+			
+			weekly_bin[s_key] += 1
+			
+		#	c += 1
+		#if c > 100 : break;
+	#break;
+
+dates = weekly_bin.keys()
+dates.sort()
+for t in dates:
+	print t, ",", weekly_bin[t]
+
diff --git a/statistics/harvestrt.py b/statistics/harvestrt.py
new file mode 100755
index 0000000..f3940e0
--- /dev/null
+++ b/statistics/harvestrt.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+import os
+import time
+from datetime import datetime, timedelta
+import sys
+
+def popen(cmdstr):
+	f = os.popen(cmdstr)
+	ret = f.read()
+	return ret
+
+def datetime_fromstr(str):
+	if '-' in str:
+		try:
+			tup = time.strptime(str, "%Y-%m-%d")
+		except:
+			tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+	elif '/' in str:
+		tup = time.strptime(str, "%m/%d/%Y")
+	else:
+		tup = time.strptime(str, "%m/%d/%Y")
+	ret = datetime.fromtimestamp(time.mktime(tup))
+	return ret
+
+
+def main():
+	queue = sys.argv[1]
+	d1 = datetime_fromstr(sys.argv[2])
+	iterations = int(sys.argv[3])
+	i = 0
+	while i < iterations:
+		d1_s = d1.strftime("%Y-%m-%d")
+		d2 = d1 + timedelta(30)
+		d2_s = d2.strftime("%Y-%m-%d")
+		query = "Queue='%s' and " % queue 
+		query = query + "Told > '%s' and Told < '%s'" % (d1_s, d2_s)
+		cmd = """rt ls -t ticket "%s" | grep -v "No matching" | wc -l  """ % query
+		print cmd
+		ret = popen(cmd)
+		print d1_s, ",", ret[:-1]
+		d1=d2
+		i += 1
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/monitorstats.py b/statistics/monitorstats.py
new file mode 100644
index 0000000..8fc24d5
--- /dev/null
+++ b/statistics/monitorstats.py
@@ -0,0 +1,80 @@
+
+from monitor import database
+from datetime import datetime, timedelta
+import os
+import glob
+import time
+
+from monitor import config
+
+def datetime_fromstr(str):
+	if '-' in str:
+		try:
+			tup = time.strptime(str, "%Y-%m-%d")
+		except:
+			tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+	elif '/' in str:
+		tup = time.strptime(str, "%m/%d/%Y")
+	else:
+		tup = time.strptime(str, "%m/%d/%Y")
+	ret = datetime.fromtimestamp(time.mktime(tup))
+	return ret
+
+def get_filefromglob(d, str, path="archive-pdb", returnlist=False):
+	# TODO: This is aweful.
+	startpath = os.getcwd()
+	os.chdir(config.MONITOR_SCRIPT_ROOT + "/" + path)
+
+	#archive = database.SPickle(path)
+	glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str)
+	fg_list = [ x[:-4] for x in glob.glob(glob_str) ]
+
+	os.chdir(startpath)
+
+	if returnlist:
+		return sorted(fg_list)
+	else:
+		return fg_list[0]
+
+def get_archive(path):
+	full_path = config.MONITOR_SCRIPT_ROOT + "/" + path
+	return database.SPickle(full_path)
+	
+def print_graph(data, begin, end, xaxis, offset=500, window=100):
+	s1=[]
+	s2=[]
+	s3=[]
+	for row in data:
+		s1.append(row[0])
+		s2.append(row[1])
+		s3.append(row[2])
+	
+	delta=offset
+	s1 = map(lambda x: x-delta, s1)
+	rlow= zip(s1,s3)
+	rhigh = zip(s1,s2)
+	diff_low  = map(lambda x: x[0]-x[1], rlow)
+	diff_high = map(lambda x: x[0]+x[1], rhigh)
+	s1 = map(lambda x: str(x), s1)
+	diff_low = map(lambda x: str(x), diff_low)
+	diff_high = map(lambda x: str(x), diff_high)
+	print s1
+	print diff_low
+	print diff_high
+	print "http://chart.apis.google.com/chart?cht=lc&chds=0,100&chxt=x,y&chxl=0:%s1:|500|550|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % (xaxis, ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high) )
+
+def get_xaxis(list, width=16, wide=False):
+	# 3 for odd
+	# 4 for even
+	# 5 for wide odd
+	# 6 for wide even
+	list_len = len(list)
+	if list_len == 0: return "||"
+
+	is_even = list_len % 2 == 0
+	#if is_even:
+	#	xaxis = "|" + list[0][:width] + "|" + list[-1][:width] + "|"
+	#else:
+	xaxis = "|" + list[0][:width] + "|" + list[list_len/2 + 1][:width] + "|" + list[-1][:width] + "|"
+	return xaxis
+
diff --git a/statistics/nodebad.py b/statistics/nodebad.py
new file mode 100755
index 0000000..eec69be
--- /dev/null
+++ b/statistics/nodebad.py
@@ -0,0 +1,152 @@
+#!/usr/bin/python
+
+import os
+import sys
+import string
+import time
+
+
+from monitor import database
+from nodequeryold import verify,query_to_dict,node_select
+from monitor.common import *
+
+from monitor.wrapper import plc
+api = plc.getAuthAPI()
+from monitor.model import *
+
+round = 1
+externalState = {'round': round, 'nodes': {}}
+count = 0
+
+def main(config):
+	global externalState
+	externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
+	if config.increment:
+		# update global round number to force refreshes across all nodes
+		externalState['round'] += 1
+
+	#l_nodes = syncplcdb.create_plcdb()
+	l_plcnodes = database.dbLoad("l_plcnodes")
+
+	l_nodes = get_nodeset(config)
+	#if config.node:
+	#	l_nodes = [config.node]
+	##else:
+	#	l_nodes = [node['hostname'] for node in l_plcnodes]
+	
+	checkAndRecordState(l_nodes, l_plcnodes)
+
+def checkAndRecordState(l_nodes, l_plcnodes):
+	global externalState
+	global count
+	global_round = externalState['round']
+
+	for nodename in l_nodes:
+		if nodename not in externalState['nodes']:
+			externalState['nodes'][nodename] = {'round': 0, 'values': []}
+
+		node_round   = externalState['nodes'][nodename]['round']
+		if node_round < global_round:
+			# do work
+			values = collectStatusAndState(nodename, l_plcnodes)
+			global_round = externalState['round']
+			externalState['nodes'][nodename]['values'] = values
+			externalState['nodes'][nodename]['round'] = global_round
+		else:
+			count += 1
+
+		if count % 20 == 0:
+			database.dbDump(config.dbname, externalState)
+
+	database.dbDump(config.dbname, externalState)
+
+fb = database.dbLoad('findbad')
+
+def getnodesup(nodelist):
+	up = 0
+	for node in nodelist:
+		if node['hostname'] in fb['nodes'].keys():
+			try:
+				if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
+					up = up + 1
+			except:
+				pass
+	return up
+
+def get(fb, path):
+	indexes = path.split("/")
+	values = fb
+	for index in indexes:
+		if index in values:
+			values = values[index]
+		else:
+			return None
+	return values
+
+def collectStatusAndState(nodename, l_plcnodes):
+	global count
+
+	d_node = None
+	for node in l_plcnodes:
+		if node['hostname'] == nodename:
+			d_node = node
+			break
+	if not d_node:
+		return None
+
+	pf = PersistFlags(nodename, 1, db='node_persistflags')
+
+	if not pf.checkattr('last_changed'):
+		pf.last_changed = time.time()
+		
+	pf.last_checked = time.time()
+
+	if not pf.checkattr('status'):
+		pf.status = "unknown"
+
+	state_path     = "nodes/" + nodename + "/values/state"
+	bootstate_path = "nodes/" + nodename + "/values/plcnode/boot_state"
+
+	if get(fb, state_path) == "BOOT":
+		if pf.status != "good": pf.last_changed = time.time()
+		pf.status = "good"
+	elif get(fb, state_path)  == "DEBUG":
+		bs = get(fb, bootstate_path)
+		if pf.status != bs: pf.last_changed = time.time()
+		pf.status = bs
+	else:
+		if pf.status != "down": pf.last_changed = time.time()
+		pf.status = "down"
+
+	count += 1
+	print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(pf.last_changed))
+	# updated by other modules
+	#pf.enabled = 
+	#pf.suspended = 
+
+	pf.save()
+
+	return True
+
+if __name__ == '__main__':
+	import parser as parsermodule
+	parser = parsermodule.getParser(['nodesets'])
+	parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, 
+						increment=False, dbname="nodebad", cachenodes=False)
+	
+	parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
+						help="Specify the name of the database to which the information is saved")
+	parser.add_option("-i", "--increment", action="store_true", dest="increment", 
+						help="Increment round number to force refresh or retry")
+	parser = parsermodule.getParser(['defaults'], parser)
+	config = parsermodule.parse_args(parser)
+
+	try:
+		main(config)
+	except Exception, err:
+		import traceback
+		print traceback.print_exc()
+		print "Exception: %s" % err
+		print "Saving data... exitting."
+		database.dbDump(config.dbname, externalState)
+		sys.exit(0)
diff --git a/statistics/nodecommon.py b/statistics/nodecommon.py
new file mode 100644
index 0000000..042f80f
--- /dev/null
+++ b/statistics/nodecommon.py
@@ -0,0 +1,200 @@
+
+import struct
+import time
+from monitor.util import file
+from monitor.wrapper import plc
+from datetime import datetime 
+from monitor import database
+esc = struct.pack('i', 27)
+RED  	= esc + "[1;31m"
+GREEN	= esc + "[1;32m"
+YELLOW	= esc + "[1;33m"
+BLUE	= esc + "[1;34m"
+LIGHTBLUE	= esc + "[1;36m"
+NORMAL  = esc + "[0;39m"
+
+def red(str):
+	return RED + str + NORMAL
+
+def yellow(str):
+	return YELLOW + str + NORMAL
+
+def green(str):
+	return GREEN + str + NORMAL
+
+def lightblue(str):
+	return LIGHTBLUE + str + NORMAL
+
+def blue(str):
+	return BLUE + str + NORMAL
+
+def get_current_state(fbnode):
+	if 'state' in fbnode:
+		state = fbnode['state']
+	else:
+		state = "none"
+	l = state.lower()
+	if l == "debug": l = 'dbg '
+	return l
+
+def color_pcu_state(fbnode):
+	import reboot
+
+	if 'plcnode' in fbnode and 'pcu_ids' in fbnode['plcnode'] and len(fbnode['plcnode']['pcu_ids']) > 0 :
+		values = reboot.get_pcu_values(fbnode['plcnode']['pcu_ids'][0])
+		if values == None:
+			return fbnode['pcu']
+	else:
+		if 'pcu' not in fbnode:
+			return 'NOPCU'
+		else:
+			return fbnode['pcu']
+
+	if 'reboot' in values:
+		rb = values['reboot']
+		if rb == 0 or rb == "0":
+			return fbnode['pcu'] + "OK  "
+			#return fbnode['pcu'] + "OK  "
+			#return green(fbnode['pcu'])
+		elif "NetDown" == rb  or "Not_Run" == rb:
+			return fbnode['pcu'] + "DOWN"
+			#return yellow(fbnode['pcu'])
+		else:
+			return fbnode['pcu'] + "BAD "
+			#return red(fbnode['pcu'])
+	else:
+		#return red(fbnode['pcu'])
+		return fbnode['pcu'] + "BAD "
+
+def color_boot_state(l):
+	if    l == "dbg": return yellow("debg")
+	elif  l == "dbg ": return yellow("debg")
+	elif  l == "diag": return lightblue(l)
+	elif  l == "disable": return red("dsbl")
+	elif  l == "down": return red(l)
+	elif  l == "boot": return green(l)
+	elif  l == "rins": return blue(l)
+	else:
+		return l
+
+def diff_time(timestamp, abstime=True):
+	import math
+	now = time.time()
+	if timestamp == None:
+		return "unknown"
+	if abstime:
+		diff = now - timestamp
+	else:
+		diff = timestamp
+	# return the number of seconds as a difference from current time.
+	t_str = ""
+	if diff < 60: # sec in min.
+		t = diff / 1
+		t_str = "%s sec ago" % int(math.ceil(t))
+	elif diff < 60*60: # sec in hour
+		t = diff / (60)
+		t_str = "%s min ago" % int(math.ceil(t))
+	elif diff < 60*60*24: # sec in day
+		t = diff / (60*60)
+		t_str = "%s hrs ago" % int(math.ceil(t))
+	elif diff < 60*60*24*14: # sec in week
+		t = diff / (60*60*24)
+		t_str = "%s days ago" % int(math.ceil(t))
+	elif diff <= 60*60*24*30: # approx sec in month
+		t = diff / (60*60*24*7)
+		t_str = "%s wks ago" % int(math.ceil(t))
+	elif diff > 60*60*24*30: # approx sec in month
+		t = diff / (60*60*24*30)
+		t_str = "%s mnths ago" % int(t)
+	return t_str
+
+def getvalue(fb, path):
+    indexes = path.split("/")
+    values = fb
+    for index in indexes:
+        if index in values:
+            values = values[index]
+        else:
+            return None
+    return values
+
+def nodegroup_display(node, fb, conf=None):
+	from unified_model import PersistFlags
+	if node['hostname'] in fb['nodes']:
+		node['current'] = get_current_state(fb['nodes'][node['hostname']]['values'])
+	else:
+		node['current'] = 'none'
+
+	if fb['nodes'][node['hostname']]['values'] == []:
+		return ""
+
+	s = fb['nodes'][node['hostname']]['values']['kernel'].split()
+	if len(s) >=3:
+		node['kernel'] = s[2]
+	else:
+		node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel']
+		
+	if '2.6' not in node['kernel']: node['kernel'] = ""
+	if conf and not conf.nocolor:
+	    node['boot_state']	= color_boot_state(node['boot_state'])
+	    node['current'] 	= color_boot_state(node['current'])
+	#node['boot_state']	= node['boot_state']
+	#node['current'] 	= node['current']
+	node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu']
+	node['lastupdate'] = diff_time(node['last_contact'])
+	pf = PersistFlags(node['hostname'], 1, db='node_persistflags')
+	try:
+		node['lc'] = diff_time(pf.last_changed)
+	except:
+		node['lc'] = "err"
+	ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime']
+	if ut != "null":
+		ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False)
+	node['uptime'] = ut
+
+	return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node
+
+def datetime_fromstr(str):
+	if '-' in str:
+		try:
+			tup = time.strptime(str, "%Y-%m-%d")
+		except:
+			tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+	elif '/' in str:
+		tup = time.strptime(str, "%m/%d/%Y")
+	else:
+		tup = time.strptime(str, "%m/%d/%Y")
+	ret = datetime.fromtimestamp(time.mktime(tup))
+	return ret
+
+def get_nodeset(config):
+	"""
+		Given the config values passed in, return the set of hostnames that it
+		evaluates to.
+	"""
+	api = plc.getAuthAPI()
+	l_nodes = database.dbLoad("l_plcnodes")
+
+	if config.nodelist:
+		f_nodes = util.file.getListFromFile(config.nodelist)
+		l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
+	elif config.node:
+		f_nodes = [config.node]
+		l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
+	elif config.nodegroup:
+		ng = api.GetNodeGroups({'name' : config.nodegroup})
+		l_nodes = api.GetNodes(ng[0]['node_ids'], ['hostname'])
+	elif config.site:
+		site = api.GetSites(config.site)
+		l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname'])
+		
+	l_nodes = [node['hostname'] for node in l_nodes]
+
+	# perform this query after the above options, so that the filter above
+	# does not break.
+	if config.nodeselect:
+		fb = database.dbLoad("findbad")
+		l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb)
+
+	return l_nodes
+	
diff --git a/statistics/nodediff-graph-better.py b/statistics/nodediff-graph-better.py
new file mode 100755
index 0000000..68e14e1
--- /dev/null
+++ b/statistics/nodediff-graph-better.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+#from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file, select=None):
+	fb = arch.load(file)
+
+	nodelist = fb['nodes'].keys()
+	nodelist = node_select(select, nodelist, fb)
+	return nodelist
+
+def print_nodelist(nodelist, file):
+	for node in nodelist:
+		if file:
+			print >>file, node
+		else:
+			print node
+	
+
+def main():
+	parser = parsermodule.getParser()
+	parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None, select2=None)
+	parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--select", dest="select", metavar="key",
+						help="Select .")
+	parser.add_option("", "--select2", dest="select2", metavar="key",
+						help="Select .")
+	parser.add_option("", "--print", dest="printnodes", action="store_true",
+						help="print the nodes that have come up or down.")
+	parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+						help="Specify a ending date at which queries end.")
+	config = parsermodule.parse_args(parser)
+	archive = get_archive(config.archivedir)
+
+	if not config.begin or not config.end:
+		print parsermodule.usage(parser)
+		sys.exit(1)
+
+	tdelta = timedelta(1)
+	d_s1 = datetime_fromstr(config.begin)
+	d_s2 = datetime_fromstr(config.begin) + tdelta
+	d_end = datetime_fromstr(config.end)
+
+	print d_s1
+	print d_s2
+	print d_end
+
+	data = []
+	while d_end > d_s2:
+
+		try:
+			f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+			f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+		except:
+			timestr = d_s2.strftime("%Y-%m-%d")
+			print timestr, ",", 0, ",", 0
+			d_s1 = d_s2
+			d_s2 = d_s1 + tdelta
+			continue
+
+		s1 = set(nodes_from_time(archive, f_s1, config.select))
+		s2 = set(nodes_from_time(archive, f_s2, config.select))
+		s3 = set(nodes_from_time(archive, f_s2, config.select2))
+
+
+		timestr = d_s2.strftime("%Y-%m-%d")
+		print timestr, ",", len(s2),",",  len(s3)
+		if not config.printnodes:
+		#	f_up = open("up-%s" % timestr, 'w')
+		#	f_down = open("dn-%s" % timestr, 'w')
+			f_up = None
+			f_down = None
+			pass
+		else:
+			print "%s nodes up" % len(s2-s1)
+			print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+			f_up = None
+			f_down = None
+
+		#print_nodelist(s2-s1, f_up)
+
+		if config.printnodes:
+			print ""
+			print "%s nodes down" % len(s1-s2)
+			print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+		#print_nodelist(s1-s2, f_down)
+		if not config.printnodes:
+			if f_up: f_up.close()
+			if f_up: f_down.close()
+
+		d_s1 = d_s2
+		d_s2 = d_s1 + tdelta
+	
+	s1=[]
+	s2=[]
+	s3=[]
+	for row in data:
+		s1.append(row[0])
+		s2.append(row[1])
+		s3.append(row[2])
+	
+	s1 = map(lambda x: x-500, s1)
+	rlow= zip(s1,s3)
+	rhigh = zip(s1,s2)
+	diff_low  = map(lambda x: x[0]-x[1], rlow)
+	diff_high = map(lambda x: x[0]+x[1], rhigh)
+	s1 = map(lambda x: str(x), s1)
+	diff_low = map(lambda x: str(x), diff_low)
+	diff_high = map(lambda x: str(x), diff_high)
+	print s1
+	print diff_low
+	print diff_high
+	print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high)
+
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/nodediff-graph.py b/statistics/nodediff-graph.py
new file mode 100644
index 0000000..e5f9d39
--- /dev/null
+++ b/statistics/nodediff-graph.py
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+	fb = arch.load(file)
+
+	nodelist = fb['nodes'].keys()
+	nodelist = node_select(config.select, nodelist, fb)
+	return nodelist
+	
+
+def main():
+	parser = parsermodule.getParser()
+	parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, select=None)
+	parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--select", dest="select", metavar="key",
+						help="Select .")
+	parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+						help="Specify a ending date at which queries end.")
+	config = parsermodule.parse_args(parser)
+	archive = get_archive(config.archivedir)
+
+	if not config.begin or not config.end:
+		print parsermodule.usage(parser)
+		sys.exit(1)
+
+	tdelta = timedelta(1)
+	d_s1 = datetime_fromstr(config.begin)
+	d_s2 = datetime_fromstr(config.begin) + tdelta
+	d_end   = datetime_fromstr(config.end)
+
+	data = []
+	while d_end > d_s2:
+
+		f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+		f_s2   = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+
+		s1 = set(nodes_from_time(archive, f_s1))
+		s2 = set(nodes_from_time(archive, f_s2))
+
+		print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+		data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+		#print "len s2 : ", len(s2)
+		#print "len s1 : ", len(s1)
+		#print "%s nodes up" % len(s2-s1)
+		#print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+		#for node in s2 - s1: print node
+		#print ""
+		#print "%s nodes down" % len(s1-s2)
+		#print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+	#	for node in s1 - s2: print node
+		d_s1 = d_s2
+		d_s2 = d_s1 + tdelta
+	
+	s1=[]
+	s2=[]
+	s3=[]
+	for row in data:
+		s1.append(row[0])
+		s2.append(row[1])
+		s3.append(row[2])
+	
+	s1 = map(lambda x: x-500, s1)
+	rlow= zip(s1,s3)
+	rhigh = zip(s1,s2)
+	diff_low  = map(lambda x: x[0]-x[1], rlow)
+	diff_high = map(lambda x: x[0]+x[1], rhigh)
+	s1 = map(lambda x: str(x), s1)
+	diff_low = map(lambda x: str(x), diff_low)
+	diff_high = map(lambda x: str(x), diff_high)
+	print s1
+	print diff_low
+	print diff_high
+	print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high)
+
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/nodediff-length.py b/statistics/nodediff-length.py
new file mode 100755
index 0000000..f2e468f
--- /dev/null
+++ b/statistics/nodediff-length.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+	fb = arch.load(file)
+
+	nodelist = fb['nodes'].keys()
+	nodelist = node_select(config.select, nodelist, fb)
+	return nodelist
+
+def print_nodelist(nodelist, file):
+	for node in nodelist:
+		if file:
+			print >>file, node
+		else:
+			print node
+	
+
+def main():
+	parser = parsermodule.getParser()
+	parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None)
+	parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--select", dest="select", metavar="key",
+						help="Select .")
+	parser.add_option("", "--print", dest="printnodes", action="store_true",
+						help="print the nodes that have come up or down.")
+	parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+						help="Specify a ending date at which queries end.")
+	config = parsermodule.parse_args(parser)
+	archive = get_archive(config.archivedir)
+
+	if not config.begin or not config.end:
+		print parsermodule.usage(parser)
+		sys.exit(1)
+
+	tdelta = timedelta(1)
+	d_s1 = datetime_fromstr(config.begin)
+	d_s2 = datetime_fromstr(config.end)
+	d_end = d_s2
+
+	print d_s1
+	print d_s2
+	print d_end
+
+	data = []
+	while d_end >= d_s2:
+
+		f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir)
+		f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir)
+
+		s1 = set(nodes_from_time(archive, f_s1))
+		s2 = set(nodes_from_time(archive, f_s2))
+
+		print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+		data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+		#print "len s2 : ", len(s2)
+		#print "len s1 : ", len(s1)
+		timestr = d_s2.strftime("%Y-%m-%d")
+		if not config.printnodes:
+			f_up = open("up-%s" % timestr, 'w')
+			f_down = open("dn-%s" % timestr, 'w')
+		else:
+			print "%s nodes up" % len(s2-s1)
+			print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+			f_up = None
+			f_down = None
+
+		print_nodelist(s2-s1, f_up)
+
+		if config.printnodes:
+			print ""
+			print "%s nodes down" % len(s1-s2)
+			print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+		print_nodelist(s1-s2, f_down)
+		if not config.printnodes:
+			f_up.close()
+			f_down.close()
+
+		d_s1 = d_s2
+		d_s2 = d_s1 + tdelta
+	
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/nodediff.py b/statistics/nodediff.py
new file mode 100755
index 0000000..7e6674d
--- /dev/null
+++ b/statistics/nodediff.py
@@ -0,0 +1,128 @@
+#!/usr/bin/python
+
+
+from monitor import config
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+from monitor.model import *
+from monitorstats import *
+from monitor import database
+
+import sys
+import time
+import calendar
+from datetime import datetime, timedelta
+
+from nodequeryold import verify,query_to_dict,node_select
+
+api = plc.getAuthAPI()
+
+def nodes_from_time(arch, file):
+	fb = arch.load(file)
+
+	nodelist = fb['nodes'].keys()
+	nodelist = node_select(config.select, nodelist, fb)
+	del fb
+	return nodelist
+
+def print_nodelist(nodelist, file):
+	for node in nodelist:
+		if file:
+			print >>file, node
+		else:
+			print node
+	
+def main():
+	parser = parsermodule.getParser()
+	parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, 
+						sequential=False, printnodes=False, select=None)
+
+	parser.add_option("", "--archivedir", dest="archivedir", metavar="filename",
+						help="Pickle file aggregate output.")
+	parser.add_option("", "--select", dest="select", metavar="key",
+						help="Select .")
+	parser.add_option("", "--sequential", dest="sequential", action="store_true",
+						help="Compare EVERY timestep between begin and end .")
+	parser.add_option("", "--print", dest="printnodes", action="store_true",
+						help="print the nodes that have come up or down.")
+	parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD",
+						help="Specify a starting date from which to begin the query.")
+	parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD",
+						help="Specify a ending date at which queries end.")
+
+	config = parsermodule.parse_args(parser)
+	archive = get_archive(config.archivedir)
+
+	if not config.begin or not config.end:
+		print parsermodule.usage(parser)
+		sys.exit(1)
+
+	tdelta = timedelta(1)
+	d_s1 = datetime_fromstr(config.begin)
+	d_s2 = datetime_fromstr(config.begin) + tdelta
+	d_end = datetime_fromstr(config.end)
+
+	print d_s1
+	print d_s2
+	print d_end
+
+	file_list = []
+		# then the iterations are day-based.
+	while d_end > d_s2:
+		f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir, True)
+		if not config.sequential:
+			file_list.append(f_s1)
+		else:
+			file_list += f_s1
+			
+		d_s1 = d_s2
+		d_s2 = d_s1 + tdelta
+		
+	print file_list
+	file_list = file_list[4:]
+
+	xaxis = get_xaxis(file_list)
+
+	data = []
+	f_s1 = None
+	f_s2 = None
+	i = 0
+	for file in file_list:
+
+		i+=1
+		f_s2 = file
+		if f_s1 is None:
+			f_s1 = f_s2
+			continue
+
+		s1 = set(nodes_from_time(archive, f_s1))
+		s2 = set(nodes_from_time(archive, f_s2))
+
+		print f_s1
+		print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) )
+		data.append( [ len(s2), len(s2-s1), len(s1-s2)] )
+
+		print "%s nodes up" % len(s2-s1)
+		print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1)
+		f_up = None
+		f_down = None
+
+		if config.printnodes:
+			print_nodelist(s2-s1, f_up)
+
+		print ""
+		print "%s nodes down" % len(s1-s2)
+		print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2)
+
+		if config.printnodes:
+			print_nodelist(s1-s2, f_down)
+
+		f_s1 = f_s2
+		f_s2 = None
+
+	print_graph(data, config.begin, config.end, xaxis)
+# takes two arguments as dates, comparing the number of up nodes from one and
+# the other.
+
+if __name__ == "__main__":
+	main()
diff --git a/statistics/nodequeryold.py b/statistics/nodequeryold.py
new file mode 100755
index 0000000..a2aba4b
--- /dev/null
+++ b/statistics/nodequeryold.py
@@ -0,0 +1,216 @@
+#!/usr/bin/python
+
+import sys
+from nodecommon import *
+import glob
+import os
+from monitor.util import file
+
+import time
+import re
+
+#fb = {}
+fb = None
+fbpcu = None
+
+class NoKeyException(Exception): pass
+
+def fb_print_nodeinfo(fbnode, hostname, fields=None):
+	fbnode['hostname'] = hostname
+	fbnode['checked'] = diff_time(fbnode['checked'])
+	if fbnode['bootcd']:
+		fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
+	else:
+		fbnode['bootcd'] = "unknown"
+	fbnode['pcu'] = color_pcu_state(fbnode)
+
+	if not fields:
+		if 'ERROR' in fbnode['category']:
+			fbnode['kernel'] = ""
+		else:
+			fbnode['kernel'] = fbnode['kernel'].split()[2]
+		fbnode['boot_state'] = fbnode['plcnode']['boot_state']
+
+		try:
+			if len(fbnode['nodegroups']) > 0:
+				fbnode['category'] = fbnode['nodegroups'][0]
+		except:
+			#print "ERROR!!!!!!!!!!!!!!!!!!!!!"
+			pass
+
+		print "%(hostname)-45s | %(checked)11.11s | %(boot_state)5.5s| %(state)8.8s | %(ssh)5.5s | %(pcu)6.6s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+	else:
+		format = ""
+		for f in fields:
+			format += "%%(%s)s " % f
+		print format % fbnode
+
+def get(fb, path):
+    indexes = path.split("/")
+    values = fb
+    for index in indexes:
+        if index in values:
+            values = values[index]
+        else:
+            raise NoKeyException(index)
+    return values
+
+def verifyType(constraints, data):
+	"""
+		constraints is a list of key, value pairs.
+		# [ {... : ...}==AND , ... , ... , ] == OR
+	"""
+	con_or_true = False
+	for con in constraints:
+		#print "con: %s" % con
+		if len(con.keys()) == 0:
+			con_and_true = False
+		else:
+			con_and_true = True
+
+		for key in con.keys():
+			#print "looking at key: %s" % key
+			if data is None:
+				con_and_true = False
+				break
+
+			try:
+				get(data,key)
+				o = con[key]
+				if o.name() == "Match":
+					if get(data,key) is not None:
+						value_re = re.compile(o.value)
+						con_and_true = con_and_true & (value_re.search(get(data,key)) is not None)
+					else:
+						con_and_true = False
+				elif o.name() == "ListMatch":
+					if get(data,key) is not None:
+						match = False
+						for listitem in get(data,key):
+							value_re = re.compile(o.value)
+							if value_re.search(listitem) is not None:
+								match = True
+								break
+						con_and_true = con_and_true & match
+					else:
+						con_and_true = False
+				elif o.name() == "Is":
+					con_and_true = con_and_true & (get(data,key) == o.value)
+				elif o.name() == "FilledIn":
+					con_and_true = con_and_true & (len(get(data,key)) > 0)
+				elif o.name() == "PortOpen":
+					if get(data,key) is not None:
+						v = get(data,key)
+						con_and_true = con_and_true & (v[str(o.value)] == "open")
+					else:
+						con_and_true = False
+				else:
+					value_re = re.compile(o.value)
+					con_and_true = con_and_true & (value_re.search(get(data,key)) is not None)
+
+			except NoKeyException, key:
+				print "missing key %s" % key,
+				pass
+				#print "missing key %s" % key
+				#con_and_true = False
+
+		con_or_true = con_or_true | con_and_true
+
+	return con_or_true
+
+def verify(constraints, data):
+	"""
+		constraints is a list of key, value pairs.
+		# [ {... : ...}==AND , ... , ... , ] == OR
+	"""
+	con_or_true = False
+	for con in constraints:
+		#print "con: %s" % con
+		if len(con.keys()) == 0:
+			con_and_true = False
+		else:
+			con_and_true = True
+
+		for key in con.keys():
+			#print "looking at key: %s" % key
+			if key in data: 
+				value_re = re.compile(con[key])
+				if type([]) == type(data[key]):
+					local_or_true = False
+					for val in data[key]:
+						local_or_true = local_or_true | (value_re.search(val) is not None)
+					con_and_true = con_and_true & local_or_true
+				else:
+					con_and_true = con_and_true & (value_re.search(data[key]) is not None)
+			elif key not in data:
+				print "missing key %s" % key,
+				pass
+				#print "missing key %s" % key
+				#con_and_true = False
+
+		con_or_true = con_or_true | con_and_true
+
+	return con_or_true
+
+def query_to_dict(query):
+	
+	ad = []
+
+	or_queries = query.split('||')
+	for or_query in or_queries:
+	 	and_queries = or_query.split('&&')
+
+		d = {}
+
+		for and_query in and_queries:
+			(key, value) = and_query.split('=')
+			d[key] = value
+
+		ad.append(d)
+	
+	return ad
+
+def pcu_in(fbdata):
+	if 'plcnode' in fbdata:
+		if 'pcu_ids' in fbdata['plcnode']:
+			if len(fbdata['plcnode']['pcu_ids']) > 0:
+				return True
+	return False
+
+def node_select(str_query, nodelist=None, fbdb=None):
+	global fb
+
+	hostnames = []
+	if str_query is None: return hostnames
+
+	#print str_query
+	dict_query = query_to_dict(str_query)
+	#print dict_query
+
+	if fbdb is not None:
+		fb = fbdb
+
+	for node in fb['nodes'].keys():
+		if nodelist is not None: 
+			if node not in nodelist: continue
+	
+		fb_nodeinfo  = fb['nodes'][node]['values']
+
+		if fb_nodeinfo == []:
+			#print node, "has lost values"
+			continue
+			#sys.exit(1)
+		#fb_nodeinfo['pcu'] = color_pcu_state(fb_nodeinfo)
+		fb_nodeinfo['hostname'] = node
+		if 'plcnode' in fb_nodeinfo:
+			fb_nodeinfo.update(fb_nodeinfo['plcnode'])
+
+		if verify(dict_query, fb_nodeinfo):
+			#print node #fb_nodeinfo
+			hostnames.append(node)
+		else:
+			#print "NO MATCH", node
+			pass
+	
+	return hostnames
+
diff --git a/statistics/parserpms.py b/statistics/parserpms.py
new file mode 100755
index 0000000..6ca34b2
--- /dev/null
+++ b/statistics/parserpms.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+import sys
+import os
+import md5
+
+def list_to_md5(strlist):
+	digest = md5.new()
+	for f in strlist:
+		digest.update(f)
+
+	return digest.hexdigest()
+
+while True:
+	line = sys.stdin.readline()
+	if not line:
+		break
+	line = line.strip()
+	fields = line.split()
+	host = fields[1]
+	rpms = fields[2:]
+	rpms.sort()
+	if len(rpms) != 0:
+		sum = list_to_md5(rpms)
+		print sum, host
diff --git a/statistics/rtsurvey.py b/statistics/rtsurvey.py
new file mode 100755
index 0000000..2f2babd
--- /dev/null
+++ b/statistics/rtsurvey.py
@@ -0,0 +1,226 @@
+#!/usr/bin/python
+
+import os, sys, shutil
+import MySQLdb
+import string
+
+import re
+
+import time 
+from datetime import datetime
+
+from monitor import config
+from monitor import database
+
+def convert_time(time_str):
+	if '-' in str:
+		try:
+			tup = time.strptime(str, "%Y-%m-%d %H:%M:%S")
+		except:
+			tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+	elif '/' in str:
+		tup = time.strptime(str, "%m/%d/%Y")
+	else:
+		tup = time.strptime(str, "%m/%d/%Y")
+	d_ret = datetime.fromtimestamp(time.mktime(tup))
+	return d_ret
+
+def open_rt_db():
+
+	try:
+		rt_db = MySQLdb.connect(host=config.RT_DB_HOST,
+								user=config.RT_DB_USER,
+		   						passwd=config.RT_DB_PASSWORD,
+								db=config.RT_DB_NAME)
+	except Exception, err:
+		print "Failed to connect to RT database: %s" %err
+		return -1
+
+	return rt_db
+
+def fetch_from_db(db, sql):
+	try:
+		# create a 'cursor' (required by MySQLdb)
+		c = db.cursor()
+		c.execute(sql)
+	except Exception, err:
+		print "Could not execute RT query %s" %err
+		return -1
+
+	# fetch all rows (list of lists)
+	raw = c.fetchall()
+	return raw
+	
+
+def get_rt_tickets():
+	print "open db connection"
+	db = open_rt_db()
+	if db == -1:
+		return ""
+
+	sql = """SELECT tk.id, tk.Queue, tr.Type, tr.Field, tr.OldValue, tr.NewValue, 
+                    tr.Created, at.id, at.Subject, at.Content
+             FROM Tickets as tk, Transactions as tr 
+             LEFT OUTER JOIN Attachments as at ON tr.id=at.TransactionId 
+             WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>10000  """
+
+	print "run query"
+	raw = fetch_from_db(db, sql)
+	if raw == -1:
+		return raw
+	
+	tickets = {}
+	subject_map = {}
+	def parse_ticket(x):
+		ticket_id = int(x[0])
+		queue = int(x[1])
+		trtype = str(x[2])
+		field = x[3]
+		oldvalue = x[4]
+		newvalue = x[5]
+		datecreated = x[6]		# already a datetime object
+		attachmentid = x[7]
+		subject = x[8]
+		content = x[9]
+
+		if ticket_id not in tickets:
+			print "found new ticket_id", ticket_id
+			tickets[ticket_id] = {'queue' : queue,
+								  'transactions' : [] }
+
+		if subject != "":
+			subject_map[ticket_id] = subject
+		elif ticket_id in subject_map:
+			subject = subject_map[ticket_id]
+		else:
+			# subject == "" and no record in subject_map yet
+			# should probably put on a queue to be processed later.
+			print "no subject for %s" % ticket_id
+
+		transaction = {
+					'type' : trtype,
+					'field' : field,
+					'oldvalue' : oldvalue,
+					'newvalue' : newvalue,
+					'datecreated' : datecreated,
+					'attachmentid' : attachmentid,
+					'subject' : subject,
+					'content' : content,
+						}
+		tickets[ticket_id]['transactions'].append(transaction)
+		
+
+	print "sort data"
+	list = map(parse_ticket, raw)
+
+	# map(lambda x: { "email":str(x[4]), "lastupdated":str(x[5]), "owner":str(x[7]), }, raw)
+
+	db.close()
+
+
+	return tickets
+
+
+# flow chart:
+#		classify:
+#			for each ticket
+#				classify into category
+#				remove from ticket set, add to classified-set
+#		
+#		add new search patterns, 
+#		re-run classify algorithm
+
+re_map = [
+	#('mom', {'pattern' : '.*pl_mom.*'}),
+	#('technical-support', {'pattern' : '.*PlanetLab node.* down'}),
+	#('technical-support', {'pattern' : 'Node .* was stopped by'}),  # and opened
+	#('technical-support', {'pattern' : 'bootcd|BootCD|bootCD|boot cd|boot CD|booting'}),
+	#('technical-support', {'pattern' : '.* failed to authenticate'}),
+	#('technical-support', {'pattern' : '.* fails to boot'}),
+	#('technical-support', {'pattern' : '.* fail.* to boot'}),
+	#('technical-support', {'pattern' : '.* failed to authenticate'}),
+	#('technical-support', {'pattern' : 'curl (60)|.* CA certificates.*|peer certificate.*authenticated'}),
+	#('technical-support', {'pattern' : '(usb|USB).*(key|Disk|stick|boot|help|problem|trouble)'}), 
+	#('complaint', {'pattern' : '.*omplaint|.*attack'}),
+	#('complaint', {'pattern' : '.* stop .*'}), # and subject
+	#('spam', {}),j
+	#('user-support', {'pattern' : '(R|r)egistration|(R|r)egister'}),
+	#('user-support', {'pattern' : 'password reset|reset password'}),
+	('user-support', {'pattern' : 'New PI account registration from'}),
+	#('other', {}),
+]
+
+def sort_tickets(tickets, re_map):
+
+	ticket_count = len(tickets.keys())
+	marked_subject = 0
+	marked_content = 0
+	for ticket_id in sorted(tickets.keys()):
+		for i,(name, pattern) in enumerate(re_map):
+			if 'compile' not in pattern:
+				pattern['compile'] = re.compile(pattern['pattern'])
+			pat = pattern['compile']
+			for transaction in tickets[ticket_id]['transactions']:
+
+				try:
+					if transaction['subject'] and re.match(pat, transaction['subject']):
+						print "ticket %s matches pattern %s: %s" % (ticket_id, 
+								pattern['pattern'], transaction['subject'])
+						marked_subject += 1
+						break
+					if transaction['content'] and re.match(pat, transaction['content']):
+						print "ticket %s matches pattern %s: %s" % (ticket_id, 
+								pattern['pattern'], transaction['subject'])
+						#if transaction['subject'] == "":
+						#	print transaction
+						marked_content += 1
+						break
+				except:
+					import traceback
+					print traceback.print_exc()
+					print transaction
+					print ticket_id
+					print pattern
+					sys.exit(1)
+
+	print ticket_count
+	print marked_subject
+	print marked_content
+	print ticket_count - marked_content - marked_content
+
+def main():
+	from optparse import OptionParser
+	parser = OptionParser()
+
+	parser.set_defaults(runsql=False,)
+
+	parser.add_option("", "--runsql", dest="runsql", action="store_true",
+						help="Whether to collect data from the MySQL server before "+
+							"caching it, or to just use the previously collected data.")
+
+	(config, args) = parser.parse_args()
+	if len(sys.argv) == 1:
+		parser.print_help()
+		sys.exit(1)
+
+	for i,(name, pattern) in enumerate(re_map):
+		print i, name
+
+	if config.runsql:
+		tickets = get_rt_tickets()
+		database.dbDump("survey_tickets", tickets)
+	else:
+		print "loading"
+		tickets = database.dbLoad("survey_tickets")
+	print tickets[42171]['transactions'][0]
+
+	sort_tickets(tickets, re_map)
+
+	# for each ticket id
+	#	scan for known keywords and sort into classes
+	#	record assigned class
+
+	# review all tickets that remain
+
+if __name__ == '__main__':
+	main()
diff --git a/statistics/sliceavg.py b/statistics/sliceavg.py
new file mode 100755
index 0000000..739814f
--- /dev/null
+++ b/statistics/sliceavg.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+from monitor.wrapper import plc
+
+api = plc.cacheapi
+api.AuthCheck()
+
+# for each site, find total number of assigned slivers
+# find median, high, low, average
+
+site_list = []
+
+for site in api.GetSites({'peer_id': None}):
+	sl = api.GetSlices(site['slice_ids'])
+	sliver_cnt = 0
+	for slice in sl:
+		sliver_cnt += len(slice['node_ids'])
+	val = (site['login_base'], sliver_cnt)
+	site_list.append(val)
+	#print val
+
+print "------------------------------------------"
+site_list.sort(lambda x,y: cmp(y[1], x[1]))
+totals = 0
+use_count = 0
+for i in site_list:
+	if i[1] != 0: 
+		print "%10s %s" % i
+		use_count += 1
+	totals += i[1]
+
+site_avg = totals/len(site_list)
+
+print "high: %s %s" % site_list[0]
+print "low: %s %s" % site_list[-1]
+print "median: %s %s" % site_list[len(site_list)/2]
+print "used median: %s %s" % site_list[use_count/2]
+print "all avg: %s" % site_avg
+print "used avg: %s" % (totals/use_count)
+print "totals: %s" % totals 
+print "use_count: %s" % use_count
+print "site_count: %s" % len(site_list)
diff --git a/statistics/usedtickets.py b/statistics/usedtickets.py
new file mode 100755
index 0000000..b3ab662
--- /dev/null
+++ b/statistics/usedtickets.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+
+from monitor.model import *
+
+sql = database.dbLoad("idTickets")
+for db in ["monitor", "pcu", "bootcd", "hardware", "unknown", 
+		  "suspect", "baddns", "nodenet", "nodeid"]:
+	db = "%s_persistmessages" % db
+	#print db
+	try:
+		pm = database.dbLoad(db)
+	except:
+		continue
+	for host in pm.keys():
+		m = pm[host]
+		id = str(m.ticket_id)
+		if m.ticket_id > 0:
+			if id in sql:
+				print "%s %6s %s" % (m.ticket_id, sql[id]['status'], host)
+			else:
+				print "%s closed %s" % ( m.ticket_id, host)
-- 
2.43.0