(no commit message)
authorStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 11 Feb 2010 20:12:52 +0000 (20:12 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 11 Feb 2010 20:12:52 +0000 (20:12 +0000)
statistics/harvest_tt_resolve.py [new file with mode: 0755]
statistics/harvest_tt_respond.py [new file with mode: 0755]

diff --git a/statistics/harvest_tt_resolve.py b/statistics/harvest_tt_resolve.py
new file mode 100755 (executable)
index 0000000..bebff32
--- /dev/null
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+
+# Collect statistics from myops db on node downtimes.
+# For every node that goes down we need:
+#      * node down time
+#      * node reboot time
+#      * node notice time
+#      * node up time
+
+# then for each node, order events by time
+#      for each event sequence extract sub-sequences like:
+#              down xx up              
+#      for each such sub-sequence extract
+#              time between down and up
+
+from monitor.database.info.model import *
+from math import *
+import sys
+from datetime import datetime
+
+def find_next(history_list, from_index, node_status):
+       list_len = len(history_list)
+       for i in range(min(list_len-1, from_index), 0, -1):
+               v = history_list[i]
+               if v.status == node_status:
+                       return i
+
+       return 0
+
+def diff_time(t1, t2):
+       d = t1-t2
+       return d.days*60*60*24 + d.seconds
+
+times = []
+frequency = {}
+count = 0
+for index,node in enumerate(HistoryNodeRecord.query.all()):
+       frequency[node.hostname] = 0
+
+
+       #if index > 3: sys.exit(1)
+
+       if node.hostname == 'planetlab-02.kyushu.jgn2.jp':
+               for h in node.versions:
+                       print h.last_checked, h.status
+
+       #print "--"
+       pairs = []
+       i = len(node.versions)
+       ret = find_next(node.versions, i, 'online')
+       if ret == 0:
+               print node.hostname
+               print node.status
+               print node.last_checked
+               print node.last_changed
+               #if count > 3: sys.exit(1)
+               count += 1
+               pairs.append((datetime.now(), node.versions[-1].last_checked))
+       else:
+
+               while i > 0:
+                       i = find_next(node.versions, i, 'down')
+                       i2 = find_next(node.versions, i, 'offline')
+                       if i == 0 and i2 == 0:
+                               break
+                       h1 = node.versions[i]
+                       #print i, h1.last_checked, h1.status
+                       h2 = node.versions[i2]
+                       #print i2, h2.last_checked, h2.status
+                       i = i2
+                       pairs.append((h1.last_checked,h2.last_checked))
+                       frequency[node.hostname] += 1
+
+       # list of all times
+
+       for p in pairs:
+               times.append(diff_time(p[0],p[1]))
+
+##frequency
+def flip_key(hash):
+       fk = {}
+       for key in hash.keys():
+               if hash[key] not in fk:
+                       fk[hash[key]] = []
+               fk[hash[key]].append(key)
+       return fk
+
+freq = flip_key(frequency)
+freq_list = freq.keys()
+freq_list.sort()
+for f in freq_list:
+       print f, len(freq[f]), freq[f]
+
+times.sort()
+bins = {}
+for i in range(0,200,1):
+       step = i/2.0
+       bins[step] = []
+       
+for t in times:
+       t = t/60.0/60.0/24.0
+       b = floor(t*2)/2.0
+       bins[b].append(t)
+
+keys = bins.keys()
+keys.sort()
+total = 0
+for k in keys:
+       total += len(bins[k])
+       print k, len(bins[k]), total
diff --git a/statistics/harvest_tt_respond.py b/statistics/harvest_tt_respond.py
new file mode 100755 (executable)
index 0000000..190900a
--- /dev/null
@@ -0,0 +1,76 @@
+#!/usr/bin/python
+
+# Collect statistics from myops db on node downtimes.
+# For every site that goes down we need:
+
+from monitor.database.info.model import *
+from math import *
+import sys
+
+def find_next(history_list, from_index, status):
+       list_len = len(history_list)
+       #for i in range(min(list_len-1, from_index), 0, -1):
+       for i in range(max(0, from_index), list_len, 1):
+               v = history_list[i]
+               if status in v.action_type:
+                       return i
+
+       return list_len
+
+def diff_time(t1, t2):
+       d = t1-t2
+       return d.days*60*60*24 + d.seconds
+
+
+times = []
+for index,site in enumerate(HistorySiteRecord.query.all()):
+
+       acts = ActionRecord.query.filter_by(loginbase=site.loginbase).order_by(ActionRecord.date_created.asc()).all()
+       act_len = len(acts)
+       print site.loginbase, act_len
+
+       #for a in acts:
+       #       print a.date_created, a.loginbase, a.action_type
+       i=0
+       pairs = []
+       while i < act_len:
+               i = find_next(acts, i, 'notice')
+               i2= find_next(acts, i, 'pause_penalty')
+               print i, i2
+               if i == act_len or i2 == act_len:
+                       break
+
+               print i, i2
+               a1 = acts[i]
+               print a1.date_created, a1.loginbase, a1.action_type
+               a2 = acts[i2]
+               print a2.date_created, a2.loginbase, a2.action_type
+               i = i2
+               pairs.append((a1,a2))
+       
+#      # list of all times
+#
+       for p in pairs:
+               if diff_time(p[1].date_created,p[0].date_created) < 0:
+                       print "fuck!"
+                       sys.exit(1)
+               times.append(diff_time(p[1].date_created,p[0].date_created))
+
+
+times.sort()
+bins = {}
+for i in range(0,200,1):
+       step = i/2.0
+       bins[step] = []
+       
+for t in times:
+       t = t/60/60/24
+       b = floor(t*2)/2.0
+       bins[b].append(t)
+
+keys = bins.keys()
+keys.sort()
+total = 0
+for k in keys:
+       total += len(bins[k])
+       print k, len(bins[k]), total