Added commands for interacting with google's spreadsheets:
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 27 May 2011 22:40:15 +0000 (22:40 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 27 May 2011 22:40:15 +0000 (22:40 +0000)
    statistics/add-google-record.py
    statistics/get-records.py
    Added commands for parsing RT database and generating figure.
    Added command for dumping monitor db to CSV (dump_db_m3_raw.py)
    minor fixes

12 files changed:
Monitor.spec
automate-default.sh
commands/nodebad.py
monitor/common.py
monitor/reboot.py
statistics/add-google-record.py
statistics/dump_db_m3_raw.py [new file with mode: 0755]
statistics/dump_subset.py [new file with mode: 0755]
statistics/get-records.py [new file with mode: 0755]
statistics/rt_s1_raw_dump.py [new file with mode: 0755]
statistics/rt_s2_parse_raw.py [new file with mode: 0755]
statistics/rt_s3_overhead.r [new file with mode: 0644]

index 658c07a..d2970bb 100644 (file)
@@ -133,7 +133,7 @@ The server side include all python modules and scripts needed to fully
 operation, track, and interact with any third-party monitoring software, such
 as Zabbix DB.
 
-####################################### RunlevelAgent
+######################################## RunlevelAgent
 %package runlevelagent
 summary: the RunlevelAgent that reports node runlevels
 group: applications/system
@@ -168,7 +168,7 @@ install -d $RPM_BUILD_ROOT/%{python_sitearch}/monitor
 # plc.d scripts
 install -D -m 644 monitor.functions $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor.functions
 install -D -m 755 monitor-server.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor
-install -D -m 755 zabbix/monitor-zabbix.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/zabbix
+#install -D -m 755 zabbix/monitor-zabbix.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/zabbix
 
 install -D -m 755 nagios/monitor-nagios.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor-nagios
 install -D -m 644 nagios/monitor-nagios.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/monitor-nagios.cron
@@ -233,13 +233,12 @@ rm -rf $RPM_BUILD_ROOT
 /usr/share/%{name}/RunlevelAgent.py*
 /usr/share/%{name}/automate-default.sh
 /usr/share/%{name}/monitor-default.conf
-/usr/share/%{name}/monitor-runlevelagent.init
 /usr/share/%{name}/monitor-server.cron
 /usr/share/%{name}/monitor-server.init
 /usr/share/%{name}/monitor.functions
 /usr/share/%{name}/setup.py*
 /usr/share/%{name}/threadpool.py*
-/usr/share/%{name}/zabbix.spec
+#/usr/share/%{name}/zabbix.spec
 
 /usr/share/%{name}/bootcd
 /usr/share/%{name}/commands
@@ -259,7 +258,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_sysconfdir}/cron.d/monitor-server.cron
 %{_sysconfdir}/plc.d/monitor
 %{_sysconfdir}/plc.d/monitor.functions
-%{_sysconfdir}/plc.d/zabbix
+#%{_sysconfdir}/plc.d/zabbix
 %{_sysconfdir}/httpd/conf.d
 %{python_sitearch}
 
index db53fd4..d691c66 100755 (executable)
@@ -27,6 +27,7 @@ API=$(${MONITOR_SCRIPT_ROOT}/tools/testapi.py)
 if [ "$API" != "ok" ] ; then 
        # NOTE: Do not try to run any commands if the API is obviously broken.
        echo "API IS DOWN : "`date`
+    send_mail "API IS DOWN: canceled monitor run for `date`" "have a nice day..."
        exit 1
 fi
 
@@ -91,9 +92,25 @@ ps ax | grep BatchMode | grep -v grep | awk '{print $1}' | xargs -r kill || :
 ps ax | grep locfg | grep -v grep | awk '{print $1}' | xargs -r kill || :
 
 
+${MONITOR_SCRIPT_ROOT}/commands/repair.py $DATE || :
 ${MONITOR_SCRIPT_ROOT}/commands/policy.py $DATE || :
 curl -s 'http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview&formatcsv' > /var/lib/monitor/comon/$DATE.comon.csv || :
 
+#${MONITOR_SCRIPT_ROOT}/statistics/add-google-record.py --email email  --password password --database MonitorStats --sheet NodeHistory `${MONITOR_SCRIPT_ROOT}/statistics/get-records.py nodes`
+#${MONITOR_SCRIPT_ROOT}/statistics/add-google-record.py --email email --password password --database MonitorStats --sheet SiteHistory `${MONITOR_SCRIPT_ROOT}/statistics/get-records.py sites`
+
 cp ${MONITOR_SCRIPT_ROOT}/monitor.log ${MONITOR_ARCHIVE_ROOT}/`date +%F-%H:%M`.monitor.log
 service plc restart monitor || :
 rm -f $MONITOR_PID
+
+D=`date +%F-%H:%M`
+
+# NOTE: check log for major sections.
+wc=`grep -E "^(findbad|findbadpcu|nodebad|pcubad|sitebad|apply-policy)$" ${MONITOR_SCRIPT_ROOT}/monitor.log | wc -l`
+if [[ $wc -ge 6 ]] ; then 
+    send_mail "A:finished monitor run for $SD at $D" "Thank you..."
+else
+    send_mail "ERROR finished monitor run for $SD at $D" "Missing some sections:
+        $(grep -E "findbad|findbadpcu|nodebad|pcubad|sitebad|apply-policy" ${MONITOR_SCRIPT_ROOT}/monitor.log)"
+fi
+
index d1b2d35..5fd34ea 100755 (executable)
@@ -32,6 +32,16 @@ def main2(config):
     
     checkAndRecordState(l_nodes, l_plcnodes)
 
+def get_uptime(uptime_str):
+    up = 0
+    if len(uptime_str) > 0:
+        try:
+            up = float(uptime_str.split()[0])
+            print "uptime: %s" % up
+        except:
+            up = 0
+    return up
+
 # Node states:
 
 def check_node_state(rec, node):
@@ -95,9 +105,15 @@ def check_node_state(rec, node):
             node.last_changed = datetime.now()
 
     if node_state == 'BOOT' and node.status != 'online' and node.status != 'good':
-        print "changed status from %s to online" % node.status
-        node.status = 'online'
-        node.last_changed = datetime.now()
+        old_status = node.status
+        uptime = get_uptime(rec.uptime)
+        if uptime > (60*60*24):
+            node.status = 'good'
+            node.last_changed = datetime.now() - timedelta(0,uptime)
+        else:
+            node.status = 'online'
+            node.last_changed = datetime.now()
+        print "changed status from %s to %s" % (old_status, node.status)
 
     #################################################################
     # Switch temporary hystersis states into their 'firm' states.
index 5cf8151..ae32156 100644 (file)
@@ -283,13 +283,13 @@ def found_within(recent_actions, action_type, within):
        return False
        
 
-class Time:
-    @classmethod
-    def dt_to_ts(cls, dt):
-        t = time.mktime(dt.timetuple())
-        return t
-
-    @classmethod
-    def ts_to_dt(cls, ts):
-        d = datetime.fromtimestamp(ts)
-        return d
+#class Time:
+#    @classmethod
+#    def dt_to_ts(cls, dt):
+#        t = time.mktime(dt.timetuple())
+#        return t
+#
+#    @classmethod
+#    def ts_to_dt(cls, ts):
+#        d = datetime.fromtimestamp(ts)
+#        return d
index cd49a59..46d667b 100755 (executable)
@@ -9,7 +9,7 @@ import xml, xmlrpclib
 import errno, time, traceback
 import urllib2
 import urllib
-import threading, popen2
+import threading
 import array, struct
 import base64
 from subprocess import PIPE, Popen
index ae0d313..31f1a89 100755 (executable)
@@ -76,9 +76,9 @@ def main():
                         values=None,
                         valuelist=None,
                         update=None,
-                                               email=None,
-                                               password=None,
-                                               prefix="",
+                        email=None,
+                        password=None,
+                        prefix="",
                         create=False)
     parser.add_option("", "--email", dest="email", help="")
     parser.add_option("", "--password", dest="password", help="")
@@ -92,9 +92,9 @@ def main():
     parser.add_option("", "--prefix", dest="prefix", help="add a prefix to numeric headers")
 
     (config, args) = parser.parse_args()
-    #if len(sys.argv) == 1:
-    #    parser.print_help()
-    #    sys.exit(1)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
 
     email = config.email
     password = config.password
@@ -107,10 +107,11 @@ def main():
 
     if config.labels:
         config.labels = config.labels.split(',')
-       config.labels = [config.labels[0] ] + [ config.prefix + l for l in config.labels[1:] ]
+    config.labels = [config.labels[0] ] + [ config.prefix + l for l in config.labels[1:] ]
 
     data_list = []
     if config.values:
+        config.values = config.values.replace("+", " ")
         config.values = config.values.split(',')
         data_list = [dict(zip(config.labels, config.values))]
         print data_list
diff --git a/statistics/dump_db_m3_raw.py b/statistics/dump_db_m3_raw.py
new file mode 100755 (executable)
index 0000000..477c269
--- /dev/null
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+
+from monitor.database.info.model import *
+import sys
+from math import *
+from monitor.generic import *
+from datetime import datetime, timedelta
+import time
+import string
+
+def list_to_str(list):
+    ret = []
+    for l in list:
+        if type(l) == type([]):
+            ret.append(" ".join([str(i) for i in l]))
+        else:
+            s = str(l)
+            s = s.translate(string.maketrans(",\n\r\"", ";||'"))
+            ret.append(s)
+    return ret
+
+def add_if_not_present(d, add_fields=None):
+    if type(d) == type({}):
+        key_list = d.keys()
+        for k in add_fields.keys():
+            if k not in key_list:
+                d[k] = add_fields[k]
+            else:
+                add_if_not_present(d[k], add_fields[k])
+    return
+
+def dict_to_list(d, add_fields=None, ignore_fields=None):
+    """ return a list of header names from a nested dict 
+        { 'a' : 1, 'b' : { 'c':2, 'd' : 3}} 
+        would return:
+        [ 'a', 'b_c', 'b_d' ]
+    """
+    k_list = []
+    d_list = []
+    if add_fields: add_if_not_present(d, add_fields)
+    for k in d.keys():
+        if type(d[k]) == type({}):
+            (z_kl, z_dl) = dict_to_list(d[k])
+            for i,zk in enumerate(map(lambda x: "%s_%s" % (k,x), z_kl)):
+                if ignore_fields is None or zk not in ignore_fields:
+                    k_list.append(zk) 
+                    d_list.append(z_dl[i]) 
+        else:
+            if ignore_fields is None or k not in ignore_fields:
+                k_list.append(k)
+                d_list.append(d[k])
+    r = zip(k_list, list_to_str(d_list))
+    r.sort(lambda x,y: cmp(x[0], y[0]))
+    return ([ i[0] for i in r ], [ i[1] for i in r ])
+
+
+if len(sys.argv) > 1 and sys.argv[1] == "--action":
+    args = sys.argv[2:]
+    find_action = True
+else:
+    if len(sys.argv) > 1:
+        since_time = Time.str_to_dt(sys.argv[1], "%Y-%m-%d")
+        skip = int(sys.argv[2])
+        args = sys.argv[3:]
+    else:
+        args = sys.argv[1:]
+
+    find_action = False
+
+first_time = True
+index = 0
+t1=t2=0
+if find_action:
+    a = ActionRecord.query.all()
+    print >>sys.stderr, len(a)
+    for node in a:
+
+        print >>sys.stderr, index, node.hostname, t2-t1
+        index += 1
+        t1 = time.time()
+
+        d = node.__dict__
+        (k,l) = dict_to_list(d)
+        if first_time:
+            print "timestamp_unix,%s" % ",".join(k[1:])
+            first_time = False
+
+        print "%s,%s" % (Time.dt_to_ts(d['date_created']), ",".join(l[1:]))
+        t2=time.time()
+
+else:
+    ignore_fields = ['plc_node_stats_nodenetwork_ids', 'port_status_806', 'port_status_22', 'port_status_80' ]
+    add_fields = {'plc_node_stats' : { 'last_boot' : 0, 'last_pcu_confirmation' : 0, 'last_pcu_reboot' : 0, 'last_download' : 0, 'run_level': 0, }}
+    for node in FindbadNodeRecord.query.all():
+
+        print >>sys.stderr, index, node.hostname, t2-t1
+        index += 1
+        t1 = time.time()
+        if index > skip :
+            for v in node.versions:
+
+                d = v.__dict__
+                (k,l) = dict_to_list(d, add_fields=add_fields, ignore_fields=ignore_fields)
+                if not first_time:
+                    if cmp(k, k_last) != 0:
+                        print >>sys.stderr, "mismatching key lists"
+                        print >>sys.stderr, k
+                        print >>sys.stderr, k_last
+                        for i in zip(k,k_last):
+                            print >>sys.stderr, i
+                        print >>sys.stderr, set(k) - set(k_last)
+                        print >>sys.stderr, set(k_last) - set(k)
+                        #sys.exit(1)
+                        continue
+
+                if d['timestamp'] > since_time:
+                    if first_time:
+                        print "timestamp_unix,%s" % ",".join(k[1:])
+                        first_time = False
+
+
+                    print "%s,%s" % (Time.dt_to_ts(d['timestamp']), ",".join(l[1:]))
+
+                k_last = k
+        t2=time.time()
diff --git a/statistics/dump_subset.py b/statistics/dump_subset.py
new file mode 100755 (executable)
index 0000000..ccb1659
--- /dev/null
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+
+import sys
+import os
+
+def recent(q, val, length):
+    return [val] + q[:length-1]
+
+def add(d, path, v):
+    if path not in d:
+        d[path] = []
+    d[path] = recent(d[path], v, 6)
+    #d[path].append(v)
+
+def stateof(l):
+    if len(set(l)) == 1:
+        return l[0]
+    else:
+        return "BOOT"
+
+def in_hard_state(l):
+    if len(set(l)) == 1:
+        return True
+    else:
+        return False
+
+def main():
+    from optparse import OptionParser
+    parser = OptionParser()
+    
+    parser.set_defaults(database="",
+                        sheet="",
+                        fields="date_checked,timestamp_unix,hostname,uptime,kernel_version,observed_status",
+                        values=None,
+                        valuelist=None,
+                        update=None,
+                        fieldpositions=None,
+                        showfieldpositions=False,
+                        create=False)
+    parser.add_option("", "--database", dest="database", help="")
+    parser.add_option("", "--create", dest="create", action="store_true", help="")
+    parser.add_option("", "--sheet",  dest="sheet", help="")
+    parser.add_option("", "--values", dest="values", help="")
+    parser.add_option("", "--valuelist", dest="valuelist", help="")
+    parser.add_option("", "--update", dest="update", help="")
+    parser.add_option("", "--fields", dest="fields", help="")
+    parser.add_option("", "--fieldpositions", dest="fieldpositions", help="")
+    parser.add_option("", "--showfieldpositions", dest="showfieldpositions", action="store_true", help="")
+    (config, args) = parser.parse_args()
+
+    if config.fields:
+        config.fields = config.fields.split(',')
+
+    if config.fieldpositions:
+        config.fieldpositions = [ int(x) for x in config.fieldpositions.split(',') ]
+
+    first = True
+    #for f in config.fields:
+    #    print "%s,"%f,
+    #print ""
+
+    while True:
+        line = sys.stdin.readline()
+        if not line:
+            break
+        line = line.strip()
+        # NOTE assumes ts are ordered.
+        #try:
+        if True:
+            fields = line.split(',')
+            if first:
+                headers = [ f for f in fields ]
+                #for i,f in enumerate(headers):
+                #    print i, headers[i]
+                first=False
+                if config.showfieldpositions:
+                    for f in config.fields:
+                        i = headers.index(f)
+                        print i,
+                    print ""
+
+
+        #except:
+        #    print >>sys.stderr, "EXCEPTION:", line
+        #    sys.exit(1)
+        #for i,f in enumerate(fields):
+            #print i, headers
+            #print i, f
+        #    print i, headers[i], f
+
+        for pos,f in enumerate(config.fields):
+
+            if config.fieldpositions:
+                i = config.fieldpositions[pos]
+            else:
+                try:
+                    i = headers.index(f)
+                except:
+                    print "could not find field: %s" % f
+                    sys.exit(1)
+
+            try:
+                v = fields[i]
+            except:
+                continue
+            
+            print "%s," % v,
+        print ""
+
+if __name__ == "__main__":
+    main()
diff --git a/statistics/get-records.py b/statistics/get-records.py
new file mode 100755 (executable)
index 0000000..b6a9ecc
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+
+from monitor.database.info.model import *
+import time
+import sys
+
+sumdata = {}
+sumdata['nodes'] = {}
+sumdata['sites'] = {}
+sumdata['pcus'] = {}
+
+def summarize(query, type):
+       for o in query:
+               if o.status not in sumdata[type]:
+                       sumdata[type][o.status] = 0
+               sumdata[type][o.status] += 1
+
+time_str = time.strftime("%m/%d/%y+%H:%M")
+
+if len(sys.argv) == 1:
+       print "For use in conjunction with add-google-record.py"
+       print "Usage: %s <nodes|sites>" % sys.argv[0]
+       sys.exit(1)
+
+elif sys.argv[1] == "sites":
+
+       site_type_list = ['date', 'good', 'offline', 'down', 'online', 'new']
+
+       for k in site_type_list:
+               sumdata['sites'][k]=0 
+
+       fbquery = HistorySiteRecord.query.all()
+       summarize(fbquery, 'sites')
+       sumdata['sites']['date'] = time_str
+       for f in sumdata['sites']:
+               sumdata['sites'][f] = str(sumdata['sites'][f])
+
+       l = ",".join(site_type_list)
+       v = ",".join([ sumdata['sites'][k] for k in site_type_list ])
+       print "--labels=%s --values=%s" % ( l, v )
+
+elif sys.argv[1] == "nodes":
+
+       node_type_list = ['date', 'good', 'offline', 'down', 'online', 'disabled', 'failboot', 'safeboot']
+       for k in node_type_list:
+               sumdata['nodes'][k]=0 
+       fbquery = HistoryNodeRecord.query.all()
+       summarize(fbquery, 'nodes')
+       sumdata['nodes']['date'] = time_str
+       for f in sumdata['nodes']:
+               sumdata['nodes'][f] = str(sumdata['nodes'][f])
+
+       l = ",".join(node_type_list)
+       v = ",".join([ sumdata['nodes'][k] for k in node_type_list ])
+       print "--labels=%s --values=%s" % ( l, v )
+
+
+#row.content
+#row.Push()
+#row.Pull()
diff --git a/statistics/rt_s1_raw_dump.py b/statistics/rt_s1_raw_dump.py
new file mode 100755 (executable)
index 0000000..da8c1fb
--- /dev/null
@@ -0,0 +1,229 @@
+#!/usr/bin/python
+
+import os, sys, shutil
+import MySQLdb
+import string
+
+import re
+
+import time 
+from datetime import datetime
+
+from monitor import config
+from monitor import database
+
+def convert_time(time_str):
+       if '-' in str:
+               try:
+                       tup = time.strptime(str, "%Y-%m-%d %H:%M:%S")
+               except:
+                       tup = time.strptime(str, "%Y-%m-%d-%H:%M")
+       elif '/' in str:
+               tup = time.strptime(str, "%m/%d/%Y")
+       else:
+               tup = time.strptime(str, "%m/%d/%Y")
+       d_ret = datetime.fromtimestamp(time.mktime(tup))
+       return d_ret
+
+def open_rt_db():
+
+       try:
+               rt_db = MySQLdb.connect(host=config.RT_DB_HOST,
+                                                               user=config.RT_DB_USER,
+                                                               passwd=config.RT_DB_PASSWORD,
+                                                               db=config.RT_DB_NAME)
+       except Exception, err:
+               print "Failed to connect to RT database: %s" %err
+               return -1
+
+       return rt_db
+
+def fetch_from_db(db, sql):
+       try:
+               # create a 'cursor' (required by MySQLdb)
+               c = db.cursor()
+               c.execute(sql)
+       except Exception, err:
+               print "Could not execute RT query %s" %err
+               return -1
+
+       # fetch all rows (list of lists)
+       raw = c.fetchall()
+       return raw
+       
+
+def get_rt_tickets(date):
+       print "open db connection"
+       db = open_rt_db()
+       if db == -1:
+               return ""
+
+       if date is None:
+               date_select = ""
+       else:
+               #date_select = "AND tr.Created >= DATE_SUB(CURDATE(),INTERVAL %s DAY)" % date
+               date_select = "AND tr.Created >= STR_TO_DATE('%s', '%%Y-%%m-%%d')" % date
+
+       sql = """SELECT tk.id, tk.Queue, tr.Type, tr.Field, tr.OldValue, tr.NewValue, 
+                                       tr.Created, at.id, at.Subject, at.Content, us.Name
+                        FROM Tickets as tk, Transactions as tr, Users as us
+                        LEFT OUTER JOIN Attachments as at ON tr.id=at.TransactionId 
+                        WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>0 AND
+                        us.id=tr.Creator %s""" % date_select
+                        #WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>12506 AND
+                        # WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>40800 AND 
+                        #WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>39896 AND tk.id<42241 AND ## (oct15th2008) 
+                        #WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>40800 AND ## (1st3months)
+                        #WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>12506  # 12506 jan-1-2006
+
+       print sql
+       print "run query"
+       raw = fetch_from_db(db, sql)
+       if raw == -1:
+               return raw
+       
+       tickets = {}
+       subject_map = {}
+       def parse_ticket(x):
+               ticket_id = int(x[0])
+               queue = int(x[1])
+               trtype = str(x[2])
+               field = x[3]
+               oldvalue = x[4]
+               newvalue = x[5]
+               datecreated = x[6]              # already a datetime object
+               attachmentid = x[7]
+               subject = x[8]
+               content = x[9]
+               creator = x[10]
+
+               if ticket_id not in tickets:
+                       print "found new ticket_id", ticket_id
+                       tickets[ticket_id] = {'queue' : queue,
+                                                                 'transactions' : [] }
+
+               if subject != "":
+                       subject_map[ticket_id] = subject
+               elif ticket_id in subject_map:
+                       subject = subject_map[ticket_id]
+               else:
+                       # subject == "" and no record in subject_map yet
+                       # should probably put on a queue to be processed later.
+                       print "no subject for %s" % ticket_id
+
+               transaction = {
+                                       'type' : trtype,
+                                       'field' : field,
+                                       'oldvalue' : oldvalue,
+                                       'newvalue' : newvalue,
+                                       'datecreated' : datecreated,
+                                       'attachmentid' : attachmentid,
+                                       'creator' : creator,
+                                       'subject' : subject,
+                                       'content' : content,
+                                               }
+               tickets[ticket_id]['transactions'].append(transaction)
+               
+
+       print "sort data"
+       list = map(parse_ticket, raw)
+
+       db.close()
+
+
+       return tickets
+
+
+# flow chart:
+#              classify:
+#                      for each ticket
+#                              classify into category
+#                              remove from ticket set, add to classified-set
+#              
+#              add new search patterns, 
+#              re-run classify algorithm
+
+re_map = [
+       #('mom', {'pattern' : '.*pl_mom.*'}),
+       #('technical-support', {'pattern' : '.*PlanetLab node.* down'}),
+       #('technical-support', {'pattern' : 'Node .* was stopped by'}),  # and opened
+       #('technical-support', {'pattern' : 'bootcd|BootCD|bootCD|boot cd|boot CD|booting'}),
+       #('technical-support', {'pattern' : '.* failed to authenticate'}),
+       #('technical-support', {'pattern' : '.* fails to boot'}),
+       #('technical-support', {'pattern' : '.* fail.* to boot'}),
+       #('technical-support', {'pattern' : '.* failed to authenticate'}),
+       #('technical-support', {'pattern' : 'curl (60)|.* CA certificates.*|peer certificate.*authenticated'}),
+       #('technical-support', {'pattern' : '(usb|USB).*(key|Disk|stick|boot|help|problem|trouble)'}), 
+       #('complaint', {'pattern' : '.*omplaint|.*attack'}),
+       #('complaint', {'pattern' : '.* stop .*'}), # and subject
+       #('spam', {}),j
+       #('user-support', {'pattern' : '(R|r)egistration|(R|r)egister'}),
+       #('user-support', {'pattern' : 'password reset|reset password'}),
+       ('user-support', {'pattern' : 'New PI account registration from'}),
+       #('other', {}),
+]
+
+def sort_tickets(tickets, re_map):
+
+       ticket_count = len(tickets.keys())
+       marked_subject = 0
+       marked_content = 0
+       for ticket_id in sorted(tickets.keys()):
+               for i,(name, pattern) in enumerate(re_map):
+                       if 'compile' not in pattern:
+                               pattern['compile'] = re.compile(pattern['pattern'])
+                       pat = pattern['compile']
+                       for transaction in tickets[ticket_id]['transactions']:
+
+                               try:
+                                       if transaction['subject'] and re.match(pat, transaction['subject']):
+                                               print "ticket %s matches pattern %s: %s" % (ticket_id, 
+                                                               pattern['pattern'], transaction['subject'])
+                                               marked_subject += 1
+                                               break
+                                       if transaction['content'] and re.match(pat, transaction['content']):
+                                               print "ticket %s matches pattern %s: %s" % (ticket_id, 
+                                                               pattern['pattern'], transaction['subject'])
+                                               #if transaction['subject'] == "":
+                                               #       print transaction
+                                               marked_content += 1
+                                               break
+                               except:
+                                       import traceback
+                                       print traceback.print_exc()
+                                       print transaction
+                                       print ticket_id
+                                       print pattern
+                                       sys.exit(1)
+
+       print ticket_count
+       print marked_subject
+       print marked_content
+       print ticket_count - marked_content - marked_content
+
+def main():
+       from optparse import OptionParser
+       parser = OptionParser()
+
+       parser.set_defaults(runsql=False,date=None)
+
+       parser.add_option("", "--runsql", dest="runsql", action="store_true",
+                                               help="Whether to collect data from the MySQL server before "+
+                                                       "caching it, or to just use the previously collected data.")
+       parser.add_option("", "--date", dest="date", 
+                                               help="Query all transactions after the given date.")
+
+       (config, args) = parser.parse_args()
+       if len(sys.argv) == 1:
+               parser.print_help()
+               sys.exit(1)
+
+       if config.runsql:
+               tickets = get_rt_tickets(config.date)
+               database.dbDump("survey_tickets", tickets)
+       else:
+               print "loading"
+               tickets = database.dbLoad("survey_tickets")
+
+if __name__ == '__main__':
+       main()
diff --git a/statistics/rt_s2_parse_raw.py b/statistics/rt_s2_parse_raw.py
new file mode 100755 (executable)
index 0000000..006b17c
--- /dev/null
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+from datetime import datetime
+
+from monitor import config
+from monitor import database
+from monitor.common import Time
+import sys 
+
+def main():
+    tickets = database.dbLoad("survey_tickets")
+
+#if True:
+    #f = open('rt_monitor_data.csv','w')
+    queue = int(sys.argv[1])
+    exclude = ['monitor']
+
+    f = sys.stdout
+    print >>f, "ticket_id,s1,s2,start,lastreply,replies,complete,creator"
+    for t in sorted(tickets.keys()):
+        ticket_id = 0
+        start = datetime(2004,1,1)
+        lastreply = datetime.now()
+        resolved = 0
+        complete = 0
+        replies = 1
+        creator = ''
+        if tickets[t]['queue'] != queue: continue
+        for tr in tickets[t]['transactions']:
+            # create - ticketid,creator, datecreated, 
+            # correspond - creator, datecreated, content
+            # status - newvalue = resolved
+            if tr['type'] == 'Create':
+                start = tr['datecreated']
+                creator = tr['creator']
+                if complete==0: complete = 1
+            elif tr['type'] == 'Correspond':
+                if tr['creator'] not in exclude:
+                    lastreply = tr['datecreated']
+                    replies += 1
+                    if complete == 1: complete = 2
+
+            elif tr['type'] == 'Status' and tr['newvalue'] == 'resolved':
+                resolved = 1
+                if complete == 2: complete = 3
+
+        if replies < 1100:
+            if complete in [2,3]: complete = 1
+            else: complete = 0
+            print >>f, "%s,%s,%s,%s,%s,%s,%s,%s" % (t, start.strftime('%Y-%m-%d'), lastreply.strftime('%Y-%m-%d'), Time.dt_to_ts(start), Time.dt_to_ts(lastreply), replies, complete, creator)
+    f.close()
+
+if __name__ == '__main__':
+       main()
diff --git a/statistics/rt_s3_overhead.r b/statistics/rt_s3_overhead.r
new file mode 100644 (file)
index 0000000..682b6a1
--- /dev/null
@@ -0,0 +1,193 @@
+source("functions.r");
+
+
+
+median_time_to_resolve_window <- function (t, tg, window)
+{
+    hbreaks<-tg$week_ts
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_index <- NULL;
+    q_list <- NULL;
+
+    x<-seq(-20,20,0.01)
+
+    for ( i in seq(1,length(hbreaks)-window-1) )
+    {
+        print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
+        if ( length(t_sub$start) <= 1 )  { next }
+        # take log, then sn.mle -> h
+        d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        print (sprintf("length: %s", length(d)))
+        q<-quantile(d)
+        print(q)
+
+        date_index <- c(date_index, round(i+window/2))
+
+        xx<- c(xx, hbreaks[round(i+window/2)])
+        q_list <- rbind(q_list, q)
+
+    }
+    return (cbind(xx,q_list))
+}
+
+available_nodes <- function (ns, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from ns
+        ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
+        nodes <- length(ns_sub$date)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, nodes)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+
+open_tickets <- function (t, tg)
+{
+    xx<-NULL;
+    yy<-NULL;
+
+    hbreaks<-tg$day_ts
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # identify any tickets with a start time in range, lastreply in range
+        # or where both start is less and lastreply is greater than the range
+        t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
+                          (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
+                          (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
+        tickets <- length(t_sub$start)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, tickets)
+    }
+    return (rbind(xx,yy))
+}
+
+online_nodes <- function (fb)
+{
+    breaks <- unique(fb$timestamp)
+    n<-NULL
+    o<-NULL
+    x<-NULL
+    for (i in seq(1,length(breaks)) )
+    {
+        ts <- breaks[i]
+        sub <- fb[which(fb$timestamp == ts),]
+        node_count   <- length(unique(sub$hostname))
+        online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
+        x<-c(x,ts)
+        n<-c(n,node_count)
+        o<-c(o,online_count)
+    }
+    print(length(x))
+    print(length(n))
+    print(length(o))
+    return (rbind(x,n,o))
+}
+
+#####
+
+# system("rt_s1_raw_dump.py --runsql");
+# system("rt_s2_parse_raw.py 3 > rt_data.csv");
+# t <- read.csv('rt_data_2004-2011.csv', sep=',', header=TRUE)
+#t <- read.csv(, sep=',', header=TRUE)
+
+draw_rt_data <- function (input_filename, output_filename, start_date, end_date, draw=TRUE, one=FALSE)
+{
+    t <- read.csv(input_filename, sep=',', header=TRUE)
+    t2 <- t[which(t$complete == 1),]
+
+    tg <- time_graph_setup(start_date, end_date) 
+    ot <- open_tickets(t2, tg)
+
+    if ( draw == TRUE ) {
+        start_image(output_filename, width=600, height=400)
+    }
+    if ( one == TRUE )
+    {
+        par(mfrow=c(1,1))
+        par(mai=c(0.8,1,0.4,0.1))
+    } else {
+        par(mfrow=c(2,1))
+        par(mai=c(0,1,0.3,0.1))
+    }
+
+    x1<-as.numeric(ot[1,])
+    y1<-as.numeric(ot[2,])
+
+    a_ot<-lowess_smooth(x1, y1)
+
+    plot(x1, y1, col='grey80', type='l', axes=F, 
+        ylab="a) Open Tickets (tickets/day)", xlab="Date",
+        ylim=c(0,120)) # , ylim=c(0,260))
+    lines(a_ot$x, round(a_ot$y), col='black')
+
+    axis(2, las=1)
+    if ( one == TRUE ) {
+        axis(1, labels=tg$month_str, at=tg$month_ts, cex.axis=0.7)
+        axis(1, labels=tg$year_str, at=tg$year_ts, cex.axis=0.7, line=1, lwd=0)
+    }
+
+
+    abline(h=15, lty=3, col='grey80')
+    abline(h=25, lty=3, col='grey80')
+    abline(h=40, lty=3, col='grey80')
+
+    plc_releases(120)
+    if ( one == FALSE )
+    {
+        par(mai=c(1,1,0.1,0.1))
+        for ( s in c(5) ) 
+        {
+            d <- median_time_to_resolve_window(t2, tg, s) # "2004/1/1", "2011/1/28", s, "%b")
+            plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
+                    axes=F, ylim=c(0.01, 15), ylab="b) Resolution Time by", col='black',
+                    xlim=c(min(x1), max(x1)))
+            mtext("Quartile (days)", 2, 2)
+            lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='grey50')
+            lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey75')
+            axis(1, labels=tg$month_str, at=tg$month_ts, cex.axis=0.7)
+            axis(1, labels=tg$year_str, at=tg$year_ts, cex.axis=0.7, line=1, lwd=0)
+            axis(2, labels=c(0,1,4,7,14), at=c(0,1,4,7,14), las=1)
+            m<-round(max(exp(as.numeric(d[,4]))/24), 2)
+        }
+
+        abline(h=1, lty=3, col='grey80')
+        abline(h=4, lty=3, col='grey80')
+        abline(h=7, lty=3, col='grey80')
+
+        planetlab_releases(15)
+    }
+
+    if ( draw == TRUE ) {
+        end_image()
+    }
+}
+
+#system("./rt_s2_parse_raw.py 3 > rt_data_2004-2011.csv");
+draw_rt_data('rt_data_2004-2011.csv', "rt_operator_support_2004-2011.png", "2004/1/1", "2011/6/1", TRUE, TRUE)
+#draw_rt_data('rt_data_monitor_2004-2011.csv',"rt_operator_monitor_2004-2011.png", "2004/1/1", "2011/4/1")
+
+#draw_rt_data('short_support_20110101.csv',"rt_short_2011.png", "2010/11/1", "2011/4/1", FALSE)