added sitelist option for find* scripts.
authorStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 12 Feb 2009 21:19:36 +0000 (21:19 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 12 Feb 2009 21:19:36 +0000 (21:19 +0000)
clarified email message in emailZabbix.

automate/fetch.py
findbad.py
findbadpcu.py
monitor/database/zabbixapi/emailZabbix.py
monitor/parser.py
pcucontrol/reboot.py
sitebad.py
web/MonitorWeb/monitorweb/controllers.py

index 91b5715..8d01986 100755 (executable)
@@ -6,7 +6,7 @@ import os
 from glob import glob
 
 import vxargs
-import parser as parsermodule
+from monitor import parser as parsermodule
 from automate import *
 
 def build_vx_args(shell_cmd):
index 78ba65e..9b348bb 100755 (executable)
@@ -122,6 +122,13 @@ def main():
        elif config.site:
                site = api.GetSites(config.site)
                l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname'])
+       elif config.sitelist:
+               site_list = config.sitelist.split(',')
+               sites = api.GetSites(site_list)
+               node_ids = []
+               for s in sites:
+                       node_ids += s['node_ids']
+               l_nodes = api.GetNodes(node_ids, ['hostname'])
                
        l_nodes = [node['hostname'] for node in l_nodes]
 
@@ -172,5 +179,3 @@ if __name__ == '__main__':
                print "Saving data... exitting."
                sys.exit(0)
        print "sleeping"
-       #print "final commit"
-       #time.sleep(10)
index 2d27f79..815a77e 100755 (executable)
@@ -91,9 +91,9 @@ def main():
                                                                                        if_new_set={'round' : global_round})
 
        global_round = fbsync.round
+       api = plc.getAuthAPI()
 
        if config.site is not None:
-               api = plc.getAuthAPI()
                site = api.GetSites(config.site)
                l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
                pcus = []
@@ -101,6 +101,21 @@ def main():
                        pcus += node['pcu_ids']
                # clear out dups.
                l_pcus = [pcu for pcu in sets.Set(pcus)]
+       elif config.sitelist:
+               site_list = config.sitelist.split(',')
+
+               sites = api.GetSites(site_list)
+               node_ids = []
+               for s in sites:
+                       node_ids += s['node_ids']
+
+               l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
+               pcus = []
+               for node in l_nodes:
+                       pcus += node['pcu_ids']
+               # clear out dups.
+               l_pcus = [pcu for pcu in sets.Set(pcus)]
+
        elif config.pcuselect is not None:
                n, pcus = pcu_select(config.pcuselect)
                print pcus
index 9f41da3..01f1b8f 100644 (file)
@@ -123,15 +123,17 @@ You can acknowledge this notice by visiting the link below or by letting us know
 Thank you for your help,
     -- PlanetLab Central (%(support_email)s)
 """
-       node_discovered_subject = """Discovered {HOSTNAME} and online"""
+       node_discovered_subject = """Discovered host {HOSTNAME} w/ ip {IPADDRESS} and online"""
        node_discovered = """
 Thank you for registering and installing this machine:
 
-    {HOSTNAME}
+    hostname {HOSTNAME} : ip {IPADDRESS}
 
 Our monitoring infrastructure has detected it, and in the future it will alert you in the event of unexpected downtime.
 
-       {TRIGGER.NAME}: {STATUS}
+       Discovered : Online
+       {TRIGGER.URL}
+
 Thank you for your help,
     -- PlanetLab Central (%(support_email)s)
 """
index e3365ee..ac26cd6 100644 (file)
@@ -86,11 +86,13 @@ def parseSetNodeSets(parser=None):
        if parser == None:
                parser = OptionParser()
        
-       parser.set_defaults(node=None, site=None, nodelist=None, nodeselect=None, nodegroup=None)
+       parser.set_defaults(node=None, site=None, sitelist=None, nodelist=None, nodeselect=None, nodegroup=None)
        parser.add_option("", "--node", dest="node", metavar="hostname", 
                                                help="Provide a single node to operate on")
        parser.add_option("", "--site", dest="site", metavar="site name",
                                                help="Specify a single site to operate on")
+       parser.add_option("", "--sitelist", dest="sitelist", metavar="site name",
+                                               help="Specify a list of sites, separated by ','")
        parser.add_option("", "--nodegroup", dest="nodegroup", metavar="GroupName", 
                                                help="Provide the nodegroup for the list of nodes.")
        parser.add_option("", "--nodelist", dest="nodelist", metavar="FILE", 
index 035aa01..9d171a2 100755 (executable)
@@ -471,6 +471,8 @@ def reboot_test_new(nodename, values, verbose, dryrun):
                # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
        except ExceptionPort, err:
                rb_ret = str(err)
+       except NameError, err:
+               rb_ret = str(err)
 
        return rb_ret
 
@@ -503,3 +505,6 @@ def main():
 if __name__ == '__main__':
        logger = logging.getLogger("monitor")
        main()
+       f = open("/tmp/rebootlog", 'a')
+       f.write("reboot %s\n" % sys.argv)
+       f.close()
index 5132233..f8524f0 100755 (executable)
@@ -19,14 +19,19 @@ from nodequery import verify,query_to_dict,node_select
 from monitor.model import *
 
 api = plc.getAuthAPI()
+def main():
+       main2(config)
 
-def main(config):
+def main2(config):
 
        l_nodes = plccache.l_nodes
        l_plcsites = plccache.l_sites
 
        if config.site:
                l_sites = [config.site]
+       elif config.sitelist:
+               site_list = config.sitelist.split(',')
+               l_sites = site_list
        else:
                l_sites = [site['login_base'] for site in l_plcsites]
        
@@ -108,13 +113,13 @@ if __name__ == '__main__':
 
        parser.add_option("", "--site", dest="site", metavar="login_base", 
                                                help="Provide a single site to operate on")
-       parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
-                                               help="Provide a list of files to operate on")
+       parser.add_option("", "--sitelist", dest="sitelist", 
+                                               help="Provide a list of sites separated by ','")
 
        config = parsermodule.parse_args(parser)
 
        try:
-               main(config)
+               main2(config)
        except Exception, err:
                import traceback
                print traceback.print_exc()
index f4832ba..bb0580b 100644 (file)
@@ -203,26 +203,32 @@ class Root(controllers.RootController):
        
        def nodeaction_handler(self, tg_exceptions=None):
                """Handle any kind of error."""
-               refurl = request.headers.get("Referer",link("pcu"))
-               print refurl
-
-               # TODO: do this more intelligently...
-               uri_fields = urllib.splitquery(refurl)
-               if uri_fields[1] is not None:
-                       val = query_to_dict(uri_fields[1])
-                       if 'pcuid' in val:
-                               pcuid = val['pcuid']
-                       elif 'hostname' in val:
-                               pcuid = FindbadNodeRecord.get_latest_by(hostname=val['hostname']).first().plc_pcuid
+
+               if 'pcuid' in request.params:
+                       pcuid = request.params['pcuid']
                else:
-                       pcuid=None
+                       refurl = request.headers.get("Referer",link("pcu"))
+                       print refurl
+
+                       # TODO: do this more intelligently...
+                       uri_fields = urllib.splitquery(refurl)
+                       if uri_fields[1] is not None:
+                               val = query_to_dict(uri_fields[1])
+                               if 'pcuid' in val:
+                                       pcuid = val['pcuid']
+                               elif 'hostname' in val:
+                                       pcuid = FindbadNodeRecord.get_latest_by(hostname=val['hostname']).first().plc_pcuid
+                               else:
+                                       pcuid=None
+                       else:
+                               pcuid=None
 
                cherry_trail = cherrypy._cputil.get_object_trail()
                for i in cherry_trail:
                        print "trail: ", i
 
                print pcuid
-               return self.pcuview(pcuid, **dict(exceptions=tg_exceptions))
+               return self.pcuview(None, pcuid, **dict(exceptions=tg_exceptions))
 
        def nodeaction(self, **data):
                for item in data.keys():
@@ -247,7 +253,7 @@ class Root(controllers.RootController):
                        print "REBOOT: %s" % hostname
                        ret = reboot.reboot_str(str(hostname))
                        print ret
-                       if ret: raise RuntimeError("Error using PCU: " + ret)
+                       if ret: raise RuntimeError("Error using PCU: " + str(ret))
                        flash("Reboot appeared to work.  All at most 5 minutes.  Run ExternalScan to check current status.")
 
                elif action == "ExternalScan":