Minor updates:
authorStephen Soltesz <soltesz@cs.princeton.edu>
Wed, 31 Aug 2011 20:11:21 +0000 (20:11 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Wed, 31 Aug 2011 20:11:21 +0000 (20:11 +0000)
pcuinfo is a simple db query
enable reboot command
add an fprobe repair to bootman.py
account for new pcu type in monitorweb/controllers

commands/pcuinfo.py
commands/reboot.py
monitor/bootman.py
monitor/reboot.py
web/MonitorWeb/monitorweb/controllers.py

index c9d1e90..108f86c 100755 (executable)
@@ -1,8 +1,8 @@
 #!/usr/bin/python
 
-import database
-import plc
-import parser as parsermodule
+#import database
+from monitor.wrapper import plccache
+import monitor.parser as parsermodule
 import sys
 from reboot import pcu_name, get_pcu_values
 
@@ -12,43 +12,20 @@ def print_dict(dict):
        for key in dict.keys():
                print "%30s : %s" % (key, dict[key])
 
-parser = parsermodule.getParser()
-parser.set_defaults(withpcu=False,
-                                       refresh=False)
-parser.add_option("-f", "--nodelist",dest="filename",default="", metavar="FILE",
-                                 help="Provide the input file for the downnode list")
-parser.add_option("", "--refresh", action="store_true", dest="refresh",
-                                       help="Refresh the cached values")
-
-config = parsermodule.parse_args(parser)
-
-if not config.run:
-       k = config.__dict__.keys()
-       k.sort()
-       for o in k:
-               print o, "=", config.__dict__[o]
-       print "Add --run to actually perform the command"
-       sys.exit(1)
-
-pculist = plccache.l_pcus # database.if_cached_else_refresh(1, 
-                                                 #     config.refresh, 
-                                                 #     "pculist", 
-                                                 #     lambda : plc.GetPCUs())
+pculist = plccache.l_pcus 
 for pcu in pculist:
-       #print pcu
-       #sys.exit(1)
-       if pcu['model'] == None:
+       if 'model' in pcu and pcu['model'] == None:
                continue
 
-       if True: # pcu['model'].find("APC AP79xx/Masterswitch") >= 0:
+       if True: 
                host = pcu_name(pcu)
                values = get_pcu_values(pcu['pcu_id'])
-               if 'portstatus' not in values:
-                       portstatus = ""
-               else:
-                       if values['reboot'] == 0 or (not isinstance(values['reboot'],int) and values['reboot'].find("error") >= 0):
-                               portstatus = "22:%(22)s 23:%(23)s" % values['portstatus']
-               if values['reboot'] == 0:
-                       print "%6d %20s %50s %s" % (pcu['pcu_id'], pcu['password'], "%s@%s" % (pcu['username'], host), portstatus)
+               #if 'port_status' not in values:
+               #       portstatus = ""
+               #else:
+               #       if values['reboot_trial_status'] == 0 or (not isinstance(values['reboot_trial_status'],int) and values['reboot_trial_status'].find("error") >= 0):
+               #               portstatus = "22:%(22)s 23:%(23)s" % values['port_status']
+               #if 'reboot_trial_status' in values and (values['reboot_trial_status'] == 0 or values['reboot_trial_status'] == "0"):
+               print "%6d: %10s %20s %50s reboot:%s" % (pcu['pcu_id'], pcu['model'], pcu['password'], "%s@%s" % (pcu['username'], host), values['reboot_trial_status'])
 
 #database.dbDump("pculist", pculist, 'php')
index 4963900..d5ce318 100755 (executable)
@@ -32,7 +32,7 @@ def main():
                print err
 
 if __name__ == '__main__':
-       #main()
+       main()
        f = open("/tmp/rebootlog", 'a')
        f.write("reboot %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
        f.close()
index 2070e00..9754218 100755 (executable)
@@ -129,6 +129,21 @@ class NodeConnection:
                                print key, " == ", bm.VARS[key]
                else:
                        print "   Unable to read Node Configuration"
+
+       def fprobe_repair_node(self):
+               # When fprobe data gets too much, it fills the root partition and
+               # fails to boot
+               c = self.c
+               self.c.modules.sys.path.append("/tmp/source/")
+
+               # NOTE: assume that the root fs is already mounted...
+               if self.c.modules.os.path.exists('/tmp/mnt/sysimg/var/local/fprobe'):
+                       print "CLEARING FPROBE DATA on %s" % self.node
+                       self.c.modules.os.chdir('/tmp/mnt/sysimg/var/local/fprobe')
+                       cmd = """ ls -lrt . | awk '{if (i<NR/2 && $9) {print "rm "$9;i=i+1;}}' | sh """
+                       self.c.modules.os.system(cmd)
+               else:
+                       print "COULD NOT CLEAR FPROBE DATA on %s" % self.node
                
        def fsck_repair_node(self):
                c = self.c
@@ -682,10 +697,13 @@ def restore_basic(sitehist, hostname, config=None, forced_action=None):
                args['saveact'] = True
                args['ccemail'] = True
 
-               sitehist.sendMessage('unknownsequence_notice', **args)
+               if 'nospace' in s:
+                       # NOTE: sequence is unknown and contains nospace, so try the
+                       # fprobe repair trick first.
+                       conn.fprobe_repair_node()
 
+               sitehist.sendMessage('unknownsequence_notice', **args)
                conn.restart_bootmanager('boot')
-
                bootman_action = "restart_bootmanager"
 
                # NOTE: Do not set the pflags value for this sequence if it's unknown.
index 46d667b..7dda1ae 100755 (executable)
@@ -43,7 +43,7 @@ verbose = 1
 
 def get_pcu_values(pcu_id):
        from monitor.database.info.model import FindbadPCURecord
-       print "pcuid: %s" % pcu_id
+       #print "pcuid: %s" % pcu_id
        try:
                pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id)
                if pcurec:
index 68df2eb..0f919ab 100644 (file)
@@ -12,6 +12,7 @@ from monitor.database.info.model import *
 #from monitor.database.zabbixapi.model import *
 from monitor_xmlrpc import MonitorXmlrpcServer
 from controllers_local import LocalExtensions
+from pcucontrol.reboot import pcu_name
 
 from monitor import util
 from monitor import reboot
@@ -148,7 +149,7 @@ def format_ports(data, pcumodel=None):
 def format_pcu_shortstatus(pcu):
        status = "error"
        if pcu:
-               if pcu.reboot_trial_status == str(0):
+               if pcu.reboot_trial_status == str(0) or pcu.reboot_trial_status == "Test: No error":
                        status = "Ok"
                elif pcu.reboot_trial_status == "NetDown" or pcu.reboot_trial_status == "Not_Run":
                        status = pcu.reboot_trial_status
@@ -170,6 +171,7 @@ def prep_pcu_for_display(pcu):
 
        agg.ports = format_ports(pcu.port_status, pcu.plc_pcu_stats['model'])
        agg.status = format_pcu_shortstatus(pcu)
+       agg.pcu_name = pcu_name(pcu.plc_pcu_stats)
 
        #print pcu.entry_complete
        agg.entry_complete_str = pcu.entry_complete
@@ -604,6 +606,12 @@ class Root(controllers.RootController, MonitorXmlrpcServer, LocalExtensions):
                                for pcuid_key in pcus:
                                        pcuquery += [pcus[pcuid_key]]
 
+               #for a in nodequery:
+               #       print type(a.node)
+               #       print type(a.node.hostname)
+               nodequery.sort(lambda a,b: cmp(a.node.hostname,b.node.hostname))
+               pcuquery.sort(lambda a,b: cmp(a.pcu_name,b.pcu_name))
+
                actionlist_widget = ActionListWidget(template='monitorweb.templates.actionlist_template')
                return dict(sitequery=sitequery, pcuquery=pcuquery, nodequery=nodequery, actions=actions_list, actionlist_widget=actionlist_widget, since=since, exceptions=exceptions)