+#!/usr/bin/python
+
+import sys, os, time, bz2
+
+if len(sys.argv) != 2 :
+ print 'usage: bz2comonlogfile'
+ sys.exit()
+
+filename = sys.argv[1]
+start_time = time.time()
+if not ('dump_comon_' in filename and filename.endswith('bz2')) :
+ print 'not a comon log file:'
+ sys.exit()
+
+def str_to_ts(date_str, format="%Y-%m-%d"):
+ ts = time.mktime(time.strptime(date_str, format))
+ return ts
+
+
+# .../dump_cotop_20080101 -> 2008-01-01
+indx = filename.rfind('dump_comon_') + len('dump_comon') + 1
+date = filename[indx:indx+8]
+date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
+ts = str_to_ts(date)
+
+# read in bz2 log file
+bzobj = bz2.BZ2File(filename, mode = 'r')
+lines = bzobj.readlines()
+
+last_time = 0
+entry = {}
+processed_tags = set()
+
+# keep track of malformed entries
+n_badTS = 0
+n_ambigTS = 0
+n_NA = {}
+
+important_tags = ['Start:', 'Name:', 'RespTime:']
+
+def get_field(table, *args):
+ pos = table
+ l = len(args)
+ for i,v in enumerate(args[:-1]):
+ if v not in pos:
+ pos[v] = {}
+ pos = pos[v]
+ v = args[-1]
+ if args[-1] not in pos:
+ pos[v] = 0
+ return pos[v]
+
+def set_field(table, *args):
+ pos = table
+ l = len(args)
+ #get_field(table, *args[0:-1])
+ for i,v in enumerate(args[:-2]):
+ pos = pos[v]
+ pos[args[-2]] = args[-1]
+
+def isValid(entry):
+ # check important_tags
+ for t in important_tags:
+ if t not in entry:
+ #print "bad entry", entry
+ return False
+
+ try:
+ if 'Uptime:' in entry:
+ float(entry['Uptime:'])
+ except:
+ #print "uptime fault"
+ return False
+
+ return True
+
+hs = {} # HOST SUMMARY
+
+# Process log
+for line in lines :
+ line = line.strip()
+ if line == '' :
+ #Process timestamp
+ try :
+ this_time = int(entry['Start:'][0])
+ fmtime = time.strftime('%D %T', time.localtime(this_time))
+ ambigTS = this_time < last_time
+ if ambigTS :
+ n_ambigTS += 1
+ else :
+ last_time = this_time
+ #outcsv.write('%d,%s' % (this_time, ambigTS))
+ except KeyError :
+ continue
+ except :
+ n_badTS += 1
+ entry = {}
+ processed_tags = set()
+ continue
+ #Process other fields
+ #try :
+
+
+ if True:
+
+ if not isValid(entry):
+ entry = {}
+ processed_tags = set()
+ continue
+
+ h = entry['Name:']
+
+ if h not in hs:
+ get_field(hs,h,'offline')
+ get_field(hs,h,'online')
+ get_field(hs,h,'uptime')
+
+ try:
+ if len(entry['RespTime:'].split()) > 1:
+ set_field(hs,h,'offline', get_field(hs,h,'offline') + 1)
+ else:
+ set_field(hs,h,'online', get_field(hs,h,'online') + 1)
+ set_field(hs,h,'uptime', max(get_field(hs,h,'uptime'),entry['Uptime:']) )
+ except:
+ #print "except resptime"
+ continue
+
+
+ #except KeyError :
+ ## print "key error! on hostname: %s" % h
+ # continue
+
+ entry = {}
+ processed_tags = set()
+ else :
+ words = line.split()
+ tag = words[0]
+ if tag in processed_tags : # start over, since a tag is repeating
+ entry = {}
+ processed_tags = set()
+ entry[tag] = " ".join(words[1:len(words)])
+ processed_tags.add(tag)
+
+# finished processing log file
+
+# clean up memory objs
+#outcsv.close()
+bzobj.close()
+lines = ''
+
+online = 0
+offline = 0
+uptimes = []
+#print "number of hosts:%s" % len(hs)
+for h in hs:
+ if hs[h]['uptime'] > 0: uptimes.append(float(hs[h]['uptime']))
+ if hs[h]['online'] > hs[h]['offline']:
+ online += 1
+ else:
+ offline += 1
+
+l = len(uptimes)
+uptimes.sort()
+end_time = time.time()
+print date, ts, online+offline, online, offline, uptimes[0], uptimes[l/4], uptimes[l/2], uptimes[l/2+l/4], uptimes[-1], end_time-start_time, filename