added bootmanager log for monitor-collected log files to be referenced by web
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 19 Jun 2009 01:44:06 +0000 (01:44 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 19 Jun 2009 01:44:06 +0000 (01:44 +0000)
added keyboard interrupt exception to nodequery

addressed root cause of IntegrityErrors from lousy code in controllers.py
no elements of the Objects are assigned.
this patch addresses functionality, need to clean up for clarity.

adjusted templates to use new aggregate object of nodes, pcus, and
'pre-processed' values

updated emailTxt to use just 'pattern' in www node urls

added session.flush() to bootman.py to write out ActionsRecords

13 files changed:
Monitor.spec
automate-default.sh
monitor-server.cron
monitor-server.init
monitor/bootman.py
monitor/wrapper/emailTxt.py
monitor/wrapper/plccache.py
nodequery.py
web/MonitorWeb/monitorweb/controllers.py
web/MonitorWeb/monitorweb/templates/node_template.kid
web/MonitorWeb/monitorweb/templates/nodelist.kid
web/MonitorWeb/monitorweb/templates/pculist.kid
web/MonitorWeb/monitorweb/templates/pcuview.kid

index c28b7de..c5578a6 100644 (file)
@@ -6,7 +6,7 @@
 
 %define name monitor
 %define version 3.0
-%define taglevel 16
+%define taglevel 15
 
 %define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}
 %global python_sitearch        %( python -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)" )
@@ -137,6 +137,7 @@ install -d $RPM_BUILD_ROOT/data/var/lib/%{name}/archive-pdb
 install -d $RPM_BUILD_ROOT/var/lib/%{name}
 install -d $RPM_BUILD_ROOT/var/lib/%{name}/archive-pdb
 install -d $RPM_BUILD_ROOT/var/www/cgi-bin/monitor/
+install -d $RPM_BUILD_ROOT/var/www/html/monitorlog/
 
 install -D -m 755 monitor-server.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor
 
@@ -146,6 +147,7 @@ rsync -a --exclude www --exclude archive-pdb --exclude .svn --exclude CVS \
 
 echo " * Installing web pages"
 rsync -a www/ $RPM_BUILD_ROOT/var/www/cgi-bin/monitor/
+rsync -a log/ $RPM_BUILD_ROOT/var/www/html/monitorlog/
 
 echo " * Installing cron job for automated polling"
 install -D -m 644 monitor-server.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/monitor-server.cron
@@ -225,6 +227,8 @@ easy_install --build-directory /var/tmp -UZ Elixir
 
 # crazy openssl libs for racadm binary
 ln -s /lib/libssl.so.0.9.8b /usr/lib/libssl.so.2
+mkdir /usr/share/monitor/.ssh
+chmod 700 /usr/share/monitor/.ssh
 
 if grep 'pam_loginuid.so' /etc/pam.d/crond ; then
     sed -i -e 's/^session    required   pam_loginuid.so/#session    required   pam_loginuid.so/g' /etc/pam.d/crond
@@ -262,12 +266,6 @@ chkconfig --add monitor-runlevelagent
 chkconfig monitor-runlevelagent on
 
 %changelog
-* Wed Jun 17 2009 Stephen Soltesz <soltesz@cs.princeton.edu> - Monitor-3.0-16
-- Added Rpyc from 1.0 branch.
-- add pcuhistory
-- add setup-agent for password protected keys.
-- other minor improvements.
-
 * Wed Jun 17 2009 Stephen Soltesz <soltesz@cs.princeton.edu> - Monitor-3.0-15
 - automate install
 - auto-close tickets
index 958d578..66a42d9 100755 (executable)
@@ -75,6 +75,7 @@ ps ax | grep locfg | grep -v grep | awk '{print $1}' | xargs -r kill || :
 ${MONITOR_SCRIPT_ROOT}/policy.py $DATE
 ${MONITOR_SCRIPT_ROOT}/checksync.py $DATE || :
 service plc restart monitor
+curl -s 'http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeview&formatcsv' > /var/lib/monitor/comon/$DATE.comon.csv
 
 cp ${MONITOR_SCRIPT_ROOT}/monitor.log ${MONITOR_ARCHIVE_ROOT}/`date +%F-%H:%M`.monitor.log
 rm -f $MONITOR_PID
index 4d02a82..73f0fdf 100644 (file)
@@ -2,5 +2,5 @@
 # automated actions for debug nodes.
 
 01 * * * * root /usr/share/monitor/automate-default.sh 2>&1 > /usr/share/monitor/monitor.log
-30 * * * * root /etc/plc.d/monitor sync 2>&1 >> /var/log/monitorsync.log
+#30 * * * * root /etc/plc.d/monitor sync 2>&1 >> /var/log/monitorsync.log
 
index 5ffe974..4b5bbb0 100644 (file)
@@ -192,6 +192,7 @@ API_AUTH_PASSWORD=${PLC_MONITOR_DBPASSWORD}
 MONITOR_SCRIPT_ROOT=${MONITORPATH}
 MONITOR_DATA_ROOT=/var/lib/monitor
 MONITOR_ARCHIVE_ROOT=/var/lib/monitor/archive-pdb
+MONITOR_BOOTMANAGER_LOG=/var/www/html/monitorlog
 
 MONITOR_HOSTNAME=${PLC_MONITOR_HOST}
 MONITOR_IP=${PLC_MONITOR_IP}
index 2afbbf6..9fe28af 100755 (executable)
@@ -68,18 +68,34 @@ class NodeConnection:
                return "unknown"
 
        def get_dmesg(self):
+               t_stamp = time.strftime("%Y-%m-%d-%H:%M")
                self.c.modules.os.system("dmesg > /var/log/dmesg.bm.log")
-               download(self.c, "/var/log/dmesg.bm.log", "log/dmesg.%s.log" % self.node)
-               log = open("log/dmesg.%s.log" % self.node, 'r')
+               download(self.c, "/var/log/dmesg.bm.log", "%s/history/%s-dmesg.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, t_stamp, self.node))
+               os.system("cp %s/history/%s-dmesg.%s.log %s/dmesg.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, t_stamp, self.node, config.MONITOR_BOOTMANAGER_LOG, self.node))
+               log = open("%s/dmesg.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, self.node), 'r')
                return log
 
        def get_bootmanager_log(self):
-               download(self.c, "/tmp/bm.log", "log/bm.%s.log.gz" % self.node)
-               #os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
-               os.system("cp log/bm.%s.log.gz log/bm.%s.log" % (self.node, self.node))
-               log = open("log/bm.%s.log" % self.node, 'r')
+               t_stamp = time.strftime("%Y-%m-%d-%H:%M")
+               download(self.c, "/tmp/bm.log", "%s/history/%s-bm.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, t_stamp, self.node))
+               os.system("cp %s/history/%s-bm.%s.log %s/bm.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, t_stamp, self.node, config.MONITOR_BOOTMANAGER_LOG, self.node))
+               log = open("%s/bm.%s.log" % (config.MONITOR_BOOTMANAGER_LOG, self.node), 'r')
                return log
 
+
+#      def get_dmesg(self):
+#              self.c.modules.os.system("dmesg > /var/log/dmesg.bm.log")
+#              download(self.c, "/var/log/dmesg.bm.log", "log/dmesg.%s.log" % self.node)
+#              log = open("log/dmesg.%s.log" % self.node, 'r')
+#              return log
+#
+#      def get_bootmanager_log(self):
+#              download(self.c, "/tmp/bm.log", "log/bm.%s.log.gz" % self.node)
+#              #os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
+#              os.system("cp log/bm.%s.log.gz log/bm.%s.log" % (self.node, self.node))
+#              log = open("log/bm.%s.log" % self.node, 'r')
+#              return log
+
        def dump_plconf_file(self):
                c = self.c
                self.c.modules.sys.path.append("/tmp/source/")
@@ -627,8 +643,12 @@ class DebugInterface:
 
                return sequence
                
-
 def restore(sitehist, hostname, config=None, forced_action=None):
+       ret = restore_basic(sitehist, hostname, config, forced_action)
+       session.flush()
+       return ret
+
+def restore_basic(sitehist, hostname, config=None, forced_action=None):
 
        # NOTE: Nothing works if the bootcd is REALLY old.
        #       So, this is the first step.
@@ -650,16 +670,8 @@ def restore(sitehist, hostname, config=None, forced_action=None):
 
        debugnode = DebugInterface(hostname)
        conn = debugnode.getConnection()
-       #print "conn: %s" % conn
-       #print "trying to use conn after returning it."
-       #print conn.c.modules.sys.path
-       #print conn.c.modules.os.path.exists('/tmp/source')
        if type(conn) == type(False): return False
 
-       #if forced_action == "reboot":
-       #       conn.restart_node('reinstall')
-       #       return True
-
        boot_state = conn.get_boot_state()
        if boot_state != "debug":
                print "... %s in %s state: skipping..." % (hostname , boot_state)
@@ -689,7 +701,7 @@ def restore(sitehist, hostname, config=None, forced_action=None):
                        print "...Should investigate.  Skipping node."
                        # TODO: send message related to these errors.
 
-                       if not found_within(recent_actions, 'newbootcd_notice', 3):
+                       if not found_within(recent_actions, 'baddisk_notice', 3):
 
                                log=conn.get_dmesg().read()
                                sitehist.sendMessage('baddisk_notice', hostname=hostname, log=log)
index 77e8576..9b7b456 100644 (file)
@@ -487,7 +487,7 @@ Thank you for your help,
                                           """
 While trying to automatically recover this machine:
 
-    http://www.planet-lab.org/db/nodes/index.php?nodepattern=%(hostname)s
+    http://www.planet-lab.org/db/nodes/index.php?pattern=%(hostname)s
 
 We encountered an unknown situation.  Please re-code to handle, or manually intervene to repair this host.
 
@@ -517,7 +517,7 @@ It is essential that the AMT feature be configured to enable PlanetLab staff to
     Configure the DC7800 AMT feature  : https://www.planet-lab.org/AMT
     Add a PCU to your site            : https://www.planet-lab.org/db/sites/pcu.php
        Associate your node with the PCU  : Follow the 'My Site' link
-       Finally, download the Boot Image  : https://www.planet-lab.org/db/nodes/index.php?nodepattern=%(hostname)s
+       Finally, download the Boot Image  : https://www.planet-lab.org/db/nodes/index.php?pattern=%(hostname)s
        Burn Boot Image to media & Reboot your node
 
 You can confirm that your machine's PCU is correctly configured by visiting the AMT
@@ -573,7 +573,7 @@ Thank you for your help,
                                           """
 While trying to automatically recover this machine:
 
-    http://www.planet-lab.org/db/nodes/index.php?nodepattern=%(hostname)s
+    http://www.planet-lab.org/db/nodes/index.php?pattern=%(hostname)s
 
 We encountered an failed hardware requirement.  Please look at the log below to determine the exact nature of the failure, either Disk, CPU, Network, or Mimial RAM was not satisfied.
 
@@ -637,7 +637,7 @@ Thank you for your help,
 
 As part of PlanetLab node monitoring, we noticed that %(hostname)s has a network configuration error related to DNS or hostname lookups.  Often this can happen either due local configuraiton changes, or a misconfiguration of the node's DNS servers.  To resolve the issue we require your assistance.  All that is needed is to visit:
 
-       https://www.planet-lab.org/db/nodes/index.php?nodepattern=%(hostname)s
+       https://www.planet-lab.org/db/nodes/index.php?pattern=%(hostname)s
 
 Find the primary node network entry and confirm that the settings are correct.  
 
@@ -658,7 +658,7 @@ BootManager.log output follows:
        nodeconfig_notice=(""" Please Update Configuration file for PlanetLab node %(hostname)s""", 
 """As part of PlanetLab node monitoring, we noticed %(hostname)s has an out-dated plnode.txt file with no NODE_ID or a mis-matched HOSTNAME.  This can happen either due to an initial configuration failure at your site, with information entered into our database, or after a software upgrade.  To resolve the issue we require your assistance.  All that is needed is to visit:
 
-       https://www.planet-lab.org/db/nodes/index.php?nodepattern=%(hostname)s
+       https://www.planet-lab.org/db/nodes/index.php?pattern=%(hostname)s
 
 Then, select, "Download -> Download plnode.txt file for %(hostname)s" menu.  This will generate a new configuration file for your node.  Copy this file to the appropriate read-only media, either floppy or USB stick, and reboot the machine.
 
index fea4c72..ac23f1b 100755 (executable)
@@ -63,8 +63,8 @@ plcdb_id2lb = None
 
 def init():
        import traceback
-       print "IMPORTING PLCCACHE: ",
-       traceback.print_stack()
+       #print "IMPORTING PLCCACHE: ",
+       #traceback.print_stack()
        global l_sites
        global l_nodes
        global l_pcus
@@ -73,14 +73,15 @@ def init():
        global plcdb_id2lb
        print "initing plccache"
 
+       print "collecting plcsites"
        dbsites = PlcSite.query.all()
        l_sites = [ s.plc_site_stats for s in dbsites ]
 
-       print "plcnode"
+       print "collecting plcnodes"
        dbnodes = PlcNode.query.all()
        l_nodes = [ s.plc_node_stats for s in dbnodes ]
 
-       print "plcpcu"
+       print "collecting plcpcus"
        dbpcus = PlcPCU2.query.all()
        l_pcus = []
        for s in dbpcus:
@@ -90,11 +91,10 @@ def init():
                                  'model', 'password', 'ports']:
                        pcu[k] = getattr(s, k)
                l_pcus.append(pcu)
-       #l_pcus = [ s.plc_pcu_stats for s in dbpcus ]
 
-       print "dsites_from_lsites"
+       print "building id2lb"
        (d_sites,id2lb) = dsites_from_lsites(l_sites)
-       print "dsn_from_dsln"
+       print "building lb2hn"
        (plcdb, hn2lb, lb2hn) = dsn_from_dsln(d_sites, id2lb, l_nodes)
 
        plcdb_hn2lb = hn2lb
index 738e58d..2fe0e4a 100755 (executable)
@@ -317,6 +317,9 @@ def node_select(str_query, nodelist=None, fb=None):
                        fb_noderec = None
                        #fb_noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node).order_by(FindbadNodeRecord.date_checked.desc()).first()
                        fb_noderec = FindbadNodeRecord.get_latest_by(hostname=node)
+               except KeyboardInterrupt:
+                       print "Exiting at user request: Ctrl-C"
+                       sys.exit(1)
                except:
                        print traceback.print_exc()
                        continue
@@ -328,7 +331,6 @@ def node_select(str_query, nodelist=None, fb=None):
                        #if 'plcnode' in fb_nodeinfo:
                        #       fb_nodeinfo.update(fb_nodeinfo['plcnode'])
 
-                       #if verifyDBrecord(dict_query, fb_nodeinfo):
                        if verify(dict_query, fb_nodeinfo):
                                #print fb_nodeinfo.keys()
                                #print node #fb_nodeinfo
@@ -414,6 +416,9 @@ def main():
                        fb_noderec = FindbadNodeRecord.get_latest_by(hostname=node) 
                        if not fb_noderec: continue
                        fb_nodeinfo = fb_noderec.to_dict()
+               except KeyboardInterrupt:
+                       print "Exiting at user request: Ctrl-C"
+                       sys.exit(1)
                except:
                        print traceback.print_exc()
                        continue
index aad1f21..dba81e9 100644 (file)
@@ -22,6 +22,9 @@ from monitor.wrapper.plccache import plcdb_hn2lb as site_hn2lb
 from monitorweb.templates.links import *
 
 
+# make it easier group objects without invoking the elixir auto-write feature.
+class aggregate: pass
+
 
 def query_to_dict(query):
        """ take a url query string and chop it up """
@@ -76,79 +79,85 @@ def format_pcu_shortstatus(pcu):
        return status
 
 def prep_pcu_for_display(pcu):
+       agg = aggregate()
+       agg.pcu = pcu 
                
        try:
-               pcu.loginbase = PlcSite.query.get(pcu.plc_pcu_stats['site_id']).plc_site_stats['login_base']
+               agg.loginbase = PlcSite.query.get(pcu.plc_pcu_stats['site_id']).plc_site_stats['login_base']
        except:
-               pcu.loginbase = "unknown"
+               agg.loginbase = "unknown"
 
-       pcu.ports = format_ports(pcu.port_status, pcu.plc_pcu_stats['model'])
-       pcu.status = format_pcu_shortstatus(pcu)
+       agg.ports = format_ports(pcu.port_status, pcu.plc_pcu_stats['model'])
+       agg.status = format_pcu_shortstatus(pcu)
 
        #print pcu.entry_complete
-       pcu.entry_complete_str = pcu.entry_complete
+       agg.entry_complete_str = pcu.entry_complete
        #pcu.entry_complete_str += "".join([ f[0] for f in pcu.entry_complete.split() ])
        if pcu.dns_status == "NOHOSTNAME":
-               pcu.dns_short_status = 'NoHost'
+               agg.dns_short_status = 'NoHost'
        elif pcu.dns_status == "DNS-OK":
-               pcu.dns_short_status = 'Ok'
+               agg.dns_short_status = 'Ok'
        elif pcu.dns_status == "DNS-NOENTRY":
-               pcu.dns_short_status = 'NoEntry'
+               agg.dns_short_status = 'NoEntry'
        elif pcu.dns_status == "NO-DNS-OR-IP":
-               pcu.dns_short_status = 'NoHostOrIP'
+               agg.dns_short_status = 'NoHostOrIP'
        elif pcu.dns_status == "DNS-MISMATCH":
-               pcu.dns_short_status = 'Mismatch'
+               agg.dns_short_status = 'Mismatch'
+       return agg
 
 class NodeWidget(widgets.Widget):
        pass
 
 def prep_node_for_display(node):
+       agg = aggregate()
+       agg.node = node
+
        if node.plc_pcuid:
                pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid)
                if pcu:
-                       node.pcu_status = pcu.reboot_trial_status
-                       node.pcu_short_status = format_pcu_shortstatus(pcu)
-                       node.pcu = pcu
-                       prep_pcu_for_display(node.pcu)
+                       agg.pcu_status = pcu.reboot_trial_status
+                       agg.pcu_short_status = format_pcu_shortstatus(pcu)
+                       agg.pcu = prep_pcu_for_display(pcu)
                else:
-                       node.pcu_short_status = "none"
-                       node.pcu_status = "nodata"
-                       node.pcu = None
+                       agg.pcu_short_status = "none"
+                       agg.pcu_status = "nodata"
+                       agg.pcu = None
 
        else:
-               node.pcu_status = "nopcu"
-               node.pcu_short_status = "none"
-               node.pcu = None
+               agg.pcu_status = "nopcu"
+               agg.pcu_short_status = "none"
+               agg.pcu = None
 
 
        if node.kernel_version:
-               node.kernel = node.kernel_version.split()[2]
+               agg.kernel = node.kernel_version.split()[2]
        else:
-               node.kernel = ""
+               agg.kernel = ""
 
        try:
-               node.loginbase = PlcSite.query.get(node.plc_node_stats['site_id']).plc_site_stats['login_base']
+               agg.loginbase = PlcSite.query.get(node.plc_node_stats['site_id']).plc_site_stats['login_base']
        except:
-               node.loginbase = "unknown"
+               agg.loginbase = "unknown"
 
-       if node.loginbase:
-               node.site = HistorySiteRecord.by_loginbase(node.loginbase)
-               if node.site is None:
+       if agg.loginbase:
+               agg.site = HistorySiteRecord.by_loginbase(agg.loginbase)
+               if agg.site is None:
                        # TODO: need a cleaner fix for this...
-                       node.site = HistorySiteRecord.by_loginbase("pl")
-                        if not node.site:
-                                node.site = HistorySiteRecord.by_loginbase("ple")
-                       
+                       agg.site = HistorySiteRecord.by_loginbase("pl")
+                       if not agg.site:
+                               agg.site = HistorySiteRecord.by_loginbase("ple")
 
-       node.history = HistoryNodeRecord.by_hostname(node.hostname)
+       agg.history = HistoryNodeRecord.by_hostname(node.hostname)
 
-       node.ports = format_ports(node.port_status)
+       agg.ports = format_ports(node.port_status)
 
        try:
                exists = node.plc_node_stats['last_contact']
        except:
+               # TODO: this should not assign to the fb object!
                node.plc_node_stats = {'last_contact' : None}
-
+       
+       return agg
 
 
 class Root(controllers.RootController, MonitorXmlrpcServer):
@@ -162,10 +171,10 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
        def nodeview(self, hostname=None):
                nodequery=[]
                if hostname:
-                        node = FindbadNodeRecord.get_latest_by(hostname=hostname)
-                        # NOTE: reformat some fields.
-                        prep_node_for_display(node)
-                        nodequery += [node]
+                       node = FindbadNodeRecord.get_latest_by(hostname=hostname)
+                       # NOTE: reformat some fields.
+                       agg = prep_node_for_display(node)
+                       nodequery += [agg]
 
                return self.pcuview(None, None, hostname) # dict(nodequery=nodequery)
 
@@ -181,41 +190,41 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                                                'neverboot' : 0, 'pending' : 0, 'all' : 0, None : 0}
                for node in fbquery:
                        # NOTE: reformat some fields.
-                       prep_node_for_display(node)
+                       agg = prep_node_for_display(node)
 
                        #node.history.status
                        #print node.hostname
 
-                       if not node.history:
+                       if not agg.history:
                                continue
 
-                       if node.history.status in ['down', 'offline']:
+                       if agg.history.status in ['down', 'offline']:
                                if node.plc_node_stats and node.plc_node_stats['last_contact'] != None:
                                        filtercount['down'] += 1
                                else:
                                        filtercount['neverboot'] += 1
-                       elif node.history.status in ['good', 'online']:
+                       elif agg.history.status in ['good', 'online']:
                                filtercount['boot'] += 1
-                       elif node.history.status in ['debug', 'monitordebug']:
+                       elif agg.history.status in ['debug', 'monitordebug']:
                                filtercount['debug'] += 1
                        else:
-                                # TODO: need a better fix. filtercount
-                                # doesn't maps to GetBootStates() on
-                                # 4.3 so this one fails quite often.
-                                if filtercount.has_key(node.history.status):
-                                        filtercount[node.history.status] += 1
+                               # TODO: need a better fix. filtercount
+                               # doesn't maps to GetBootStates() on
+                               # 4.3 so this one fails quite often.
+                               if filtercount.has_key(agg.history.status):
+                                       filtercount[agg.history.status] += 1
                                
 
                        # NOTE: apply filter
                        if filter == "neverboot":
                                if not node.plc_node_stats or node.plc_node_stats['last_contact'] == None:
-                                       query.append(node)
+                                       query.append(agg)
                        elif filter == "all":
-                               query.append(node)
-                       elif filter == node.history.status:
-                               query.append(node)
+                               query.append(agg)
+                       elif filter == agg.history.status:
+                               query.append(agg)
                        elif filter == 'boot':
-                               query.append(node)
+                               query.append(agg)
 
                                
                widget = NodeWidget(template='monitorweb.templates.node_template')
@@ -321,12 +330,12 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                        pcus = {}
                        for node in FindbadNodeRecord.query.filter_by(loginbase=loginbase):
                                        # NOTE: reformat some fields.
-                                       prep_node_for_display(node)
-                                       nodequery += [node]
-                                       if node.plc_pcuid:      # not None
-                                               pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid)
-                                               prep_pcu_for_display(pcu)
-                                               pcus[node.plc_pcuid] = pcu
+                                       agg = prep_node_for_display(node)
+                                       nodequery += [agg]
+                                       if agg.pcu: #.pcu.plc_pcuid:    # not None
+                                               #pcu = FindbadPCURecord.get_latest_by(plc_pcuid=agg.plc_pcuid)
+                                               #prep_pcu_for_display(pcu)
+                                               pcus[agg.pcu.pcu.plc_pcuid] = agg.pcu
 
                        for pcuid_key in pcus:
                                pcuquery += [pcus[pcuid_key]]
@@ -335,10 +344,10 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                        print "pcuid: %s" % pcuid
                        pcu = FindbadPCURecord.get_latest_by(plc_pcuid=pcuid)
                        # NOTE: count filter
-                       prep_pcu_for_display(pcu)
-                       pcuquery += [pcu]
+                       aggpcu = prep_pcu_for_display(pcu)
+                       pcuquery += [aggpcu]
                        if 'site_id' in pcu.plc_pcu_stats:
-                               sitequery = [HistorySiteRecord.by_loginbase(pcu.loginbase)]
+                               sitequery = [HistorySiteRecord.by_loginbase(aggpcu.loginbase)]
                                
                        if 'nodenames' in pcu.plc_pcu_stats:
                                for nodename in pcu.plc_pcu_stats['nodenames']: 
@@ -347,19 +356,19 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                                        print "%s" % node.port_status
                                        print "%s" % node.to_dict()
                                        if node:
-                                               prep_node_for_display(node)
-                                               nodequery += [node]
+                                               agg = prep_node_for_display(node)
+                                               nodequery += [agg]
 
                if hostname and pcuid is None:
                                node = FindbadNodeRecord.get_latest_by(hostname=hostname)
                                # NOTE: reformat some fields.
-                               prep_node_for_display(node)
-                               sitequery = [node.site]
-                               nodequery += [node]
-                               if node.plc_pcuid:      # not None
-                                       pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid)
-                                       prep_pcu_for_display(pcu)
-                                       pcuquery += [pcu]
+                               agg = prep_node_for_display(node)
+                               sitequery = [agg.site]
+                               nodequery += [agg]
+                               if agg.plc_pcuid:       # not None
+                                       #pcu = FindbadPCURecord.get_latest_by(plc_pcuid=node.plc_pcuid)
+                                       #prep_pcu_for_display(pcu)
+                                       pcuquery += [agg.pcu]
                        
                return dict(sitequery=sitequery, pcuquery=pcuquery, nodequery=nodequery, actions=actions, exceptions=exceptions)
 
@@ -429,22 +438,22 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                        else:
                                filtercount['pending'] += 1
 
-                       prep_pcu_for_display(node)
+                       pcuagg = prep_pcu_for_display(node)
 
                        # NOTE: apply filter
                        if filter == "all":
-                               query.append(node)
+                               query.append(pcuagg)
                        elif filter == "ok" and node.reboot_trial_status == str(0):
-                               query.append(node)
+                               query.append(pcuagg)
                        elif filter == node.reboot_trial_status:
-                               query.append(node)
+                               query.append(pcuagg)
                        elif filter == "pending":
                                # TODO: look in message logs...
                                if node.reboot_trial_status != str(0) and \
                                        node.reboot_trial_status != 'NetDown' and \
                                        node.reboot_trial_status != 'Not_Run':
 
-                                       query.append(node)
+                                       query.append(pcuagg)
                                
                return dict(query=query, fc=filtercount)
 
@@ -455,8 +464,8 @@ class Root(controllers.RootController, MonitorXmlrpcServer):
                nodequery = []
                for node in FindbadNodeRecord.query.filter_by(loginbase=loginbase):
                        # NOTE: reformat some fields.
-                       prep_node_for_display(node)
-                       nodequery += [node]
+                       agg = prep_node_for_display(node)
+                       nodequery += [agg]
                return dict(sitequery=sitequery, nodequery=nodequery, fc={})
 
        @expose(template="monitorweb.templates.sitelist")
index 795fdde..3e1583a 100644 (file)
@@ -18,29 +18,29 @@ from links import *
                <th>last_contact</th>
        </span>
        <span py:if="node is not None">
-                <td py:content="node.plc_node_stats['node_id']">node_id</td>
+                <td py:content="node.node.plc_node_stats['node_id']">node_id</td>
                <td nowrap="true">
-                 <a target="_top" href="${link('pcuview', hostname=node.hostname)}" py:content="node.hostname">your.host.org</a></td>
+                 <a target="_top" href="${link('pcuview', hostname=node.node.hostname)}" py:content="node.node.hostname">your.host.org</a></td>
                <td>
-                 <a href="${link('pcuview', loginbase=node.loginbase)}">${node.loginbase}</a>
+                 <a href="${link('pcuview', loginbase=node.node.loginbase)}">${node.node.loginbase}</a>
                </td>
-                <td py:content="node.ping_status">ping</td>
-                <td py:content="node.ssh_status">ssh</td>
-                <td py:content="node.plc_node_stats['boot_state']">boot</td>
+                <td py:content="node.node.ping_status">ping</td>
+                <td py:content="node.node.ssh_status">ssh</td>
+                <td py:content="node.node.plc_node_stats['boot_state']">boot</td>
                <td width="20%" nowrap='true' align='center' id="status-${node.pcu_short_status}">
                  <div id="links">
                    <a class="info" py:if="'error' in node.pcu_short_status" 
-                      href="${link('pcuview', pcuid=node.plc_pcuid)}">
-                     Error<span><pre>${node.pcu.reboot_trial_status}</pre></span></a>
+                      href="${link('pcuview', pcuid=node.pcu.pcu.plc_pcuid)}">
+                     Error<span><pre>${node.pcu.pcu.reboot_trial_status}</pre></span></a>
                    <a py:if="'error' not in node.pcu_short_status and 'none' not in node.pcu_short_status" 
-                      href="${link('pcuview', pcuid=node.plc_pcuid)}"
+                      href="${link('pcuview', pcuid=node.pcu.pcu.plc_pcuid)}"
                       py:content="node.pcu_short_status">Reboot Status</a>
                    <span py:if="'none' in node.pcu_short_status" 
                          py:content="node.pcu_short_status">Reboot Status</span>
                  </div>
                </td>
                <td nowrap="true" py:content="node.kernel"></td>
-               <td nowrap="true" py:content="node.bootcd_version"></td>
-               <td  id="node-${node.observed_status}" py:content="diff_time(node.plc_node_stats['last_contact'])"></td>
+               <td nowrap="true" py:content="node.node.bootcd_version"></td>
+               <td  id="node-${node.node.observed_status}" py:content="diff_time(node.node.plc_node_stats['last_contact'])"></td>
        </span>
 </span>
index 2c3cd00..29a09fe 100644 (file)
@@ -17,8 +17,8 @@ from links import *
   </script>
 
   <center>
-  <b py:content="'BOOT: %d' % len([node for node in query if node.observed_status == 'BOOT'])"></b> | 
-  <b py:content="'DOWN: %d' % len([node for node in query if node.observed_status == 'DOWN'])"></b><br/>
+  <b py:content="'BOOT: %d' % len([agg for agg in query if agg.node.observed_status == 'BOOT'])"></b> | 
+  <b py:content="'DOWN: %d' % len([agg for agg in query if agg.node.observed_status == 'DOWN'])"></b><br/>
   </center>
 
 <table id="nodelist" cellpadding="0" border="0" class="plekit_table sortable-onload-2 colstyle-alt no-arrow paginationcallback-nodelist_paginator max-pages-10 paginate-25">
index 6671c70..2d06887 100644 (file)
@@ -58,14 +58,14 @@ from links import *
       <td nowrap='true'>
        <div class='oneline'>
          <a class='left' href="${link('pcuview', loginbase=node.loginbase)}">${node.loginbase}</a>
-         <a class='right' href="${plc_site_uri_id(node.plc_pcu_stats['site_id'])}">
+         <a class='right' href="${plc_site_uri_id(node.pcu.plc_pcu_stats['site_id'])}">
            <img style='display: inline' border='0' src="static/images/extlink.gif" align='right'/></a>
        </div>
       </td>
       <td nowrap='true'>
        <div class='oneline'>
-         <a class='left' href="${link('pcuview', pcuid=node.plc_pcuid)}">${pcu_name(node.plc_pcu_stats)}</a>
-         <a class='right' href="${plc_pcu_uri_id(node.plc_pcu_stats['pcu_id'])}">
+         <a class='left' href="${link('pcuview', pcuid=node.pcu.plc_pcuid)}">${pcu_name(node.pcu.plc_pcu_stats)}</a>
+         <a class='right' href="${plc_pcu_uri_id(node.pcu.plc_pcu_stats['pcu_id'])}">
            <img style='display: inline' border='0' src="static/images/extlink.gif" align='right'/></a>
        </div>
       </td>
@@ -76,15 +76,15 @@ from links import *
       <td width="20%" nowrap='true' align='center' id="status-${node.status}">
        <div id="links">
          <a class="info" py:if="'error' in node.status" 
-            href="${link('pcuview', pcuid=node.plc_pcuid)}">
-           Error<span><pre>${node.reboot_trial_status}</pre></span></a>
+            href="${link('pcuview', pcuid=node.pcu.plc_pcuid)}">
+           Error<span><pre>${node.pcu.reboot_trial_status}</pre></span></a>
          <a py:if="'error' not in node.status" 
-            href="${link('pcuview', pcuid=node.plc_pcuid)}"
+            href="${link('pcuview', pcuid=node.pcu.plc_pcuid)}"
             py:content="node.status">Reboot Status</a>
        </div>
       </td>
-      <td py:content="node.plc_pcu_stats['model']"></td>
-      <td py:content="len(node.plc_pcu_stats['node_ids'])"></td>
+      <td py:content="node.pcu.plc_pcu_stats['model']"></td>
+      <td py:content="len(node.pcu.plc_pcu_stats['node_ids'])"></td>
     </tr>
   </tbody>  
 </table>
index 6e241d5..b3aa16d 100644 (file)
@@ -55,23 +55,23 @@ from links import *
                                </tr>
                        </thead>
                        <tbody>
-                               <tr py:for="i,pcu in enumerate(pcuquery)" class="${i%2 and 'odd' or 'even'}" >
+                               <tr py:for="i,agg in enumerate(pcuquery)" class="${i%2 and 'odd' or 'even'}" >
                                        <td></td>
-                                       <td><a href="pcuhistory?pcu_id=${pcu.plc_pcuid}">history</a></td>
+                                       <td><a href="pcuhistory?pcu_id=${agg.pcu.plc_pcuid}">history</a></td>
                                        <td nowrap="true" >
-                                               <a class="ext-link" href="${plc_pcu_uri_id(pcu.plc_pcu_stats['pcu_id'])}">
-                                                       <span class="icon">${pcu_name(pcu.plc_pcu_stats)}</span>
+                                               <a class="ext-link" href="${plc_pcu_uri_id(agg.pcu.plc_pcu_stats['pcu_id'])}">
+                                                       <span class="icon">${pcu_name(agg.pcu.plc_pcu_stats)}</span>
                                                </a>
                                        </td>
-                                       <td py:content="pcu.entry_complete"></td>
-                                       <td id="dns-${pcu.dns_status}" py:content="pcu.dns_status"></td>
+                                       <td py:content="agg.pcu.entry_complete"></td>
+                                       <td nowrap='true' id="dns-${agg.pcu.dns_status}" py:content="agg.pcu.dns_status"></td>
                                        <td nowrap='true'>
-                                               <span py:for="port,state in pcu.ports" 
+                                               <span py:for="port,state in agg.ports" 
                                                id="port${state}" py:content="'%s, ' % port">80</span>
                                        </td>
-                                       <td width="40" id="status-${pcu.status}"><pre class="results" py:content="pcu.reboot_trial_status"></pre></td>
-                                       <td py:content="pcu.plc_pcu_stats['model']"></td>
-                                       <td py:content="len(pcu.plc_pcu_stats['node_ids'])"></td>
+                                       <td width="40" id="status-${agg.status}"><pre class="results" py:content="agg.pcu.reboot_trial_status"></pre></td>
+                                       <td py:content="agg.pcu.plc_pcu_stats['model']"></td>
+                                       <td py:content="len(agg.pcu.plc_pcu_stats['node_ids'])"></td>
                                </tr>
                        </tbody>
                </table>
@@ -154,31 +154,31 @@ from links import *
                                </tr>
                        </thead>
                        <tbody>
-                               <tr py:for="i,node in enumerate(nodequery)" class="${i%2 and 'odd' or 'even'}" >
+                               <tr py:for="i,agg in enumerate(nodequery)" class="${i%2 and 'odd' or 'even'}" >
                                        <td></td>
-                                       <td><a href="nodehistory?hostname=${node.hostname}">history</a></td>
-                                       <td id="node-${node.observed_status}" nowrap="true" >
-                                               <a class="ext-link" href="${plc_node_uri_id(node.plc_node_stats['node_id'])}">
-                                                       <span class="icon">${node.hostname}</span></a>
+                                       <td><a href="nodehistory?hostname=${agg.node.hostname}">history</a></td>
+                                       <td id="node-${agg.node.observed_status}" nowrap="true" >
+                                               <a class="ext-link" href="${plc_node_uri_id(agg.node.plc_node_stats['node_id'])}">
+                                                       <span class="icon">${agg.node.hostname}</span></a>
                                        </td>
-                                       <td py:content="diff_time(node.plc_node_stats['last_contact'])"></td>
-                                       <td py:content="diff_time(mktime(node.date_checked.timetuple()))"></td>
+                                       <td py:content="diff_time(agg.node.plc_node_stats['last_contact'])"></td>
+                                       <td py:content="diff_time(mktime(agg.node.date_checked.timetuple()))"></td>
                                        <td>
-                                               <span py:for="port,state in node.ports" 
+                                               <span py:for="port,state in agg.ports" 
                                                id="port${state}" py:content="'%s, ' % port">80</span>
                                        </td>
                                        <td>
                                                <!-- TODO: add some values/code to authenticate the operation.  -->
-                                               <!--form action="${link('pcuview', hostname=node.hostname)}" name="externalscan${i}" method='post'>
-                                               <input type='hidden' name='hostname' value='${node.hostname}'/> 
+                                               <!--form action="${link('pcuview', hostname=agg.node.hostname)}" name="externalscan${i}" method='post'>
+                                               <input type='hidden' name='hostname' value='${agg.node.hostname}'/> 
                                                <input type='hidden' name='type' value='ExternalScan' /> 
                                                </form>
                                                <a onclick='document.externalscan${i}.submit();' href="javascript: void(1);">ExternalScan</a-->
                                        </td>
                                        <td>
                                                <!-- TODO: add some values/code to authenticate the operation.  -->
-                                               <!--form action="${link('pcuview', hostname=node.hostname)}" name="internalscan${i}" method='post'>
-                                               <input type='hidden' name='hostname' value='${node.hostname}'/> 
+                                               <!--form action="${link('pcuview', hostname=agg.node.hostname)}" name="internalscan${i}" method='post'>
+                                               <input type='hidden' name='hostname' value='${agg.node.hostname}'/> 
                                                <input type='hidden' name='type' value='InternalScan' /> 
                                                </form>
                                                <a onclick='javascript: document.internalscan${i}.submit();' href="javascript: void(1);">InternalScan</a-->
@@ -186,7 +186,7 @@ from links import *
                                        <td py:if="len(pcuquery) > 0">
                                                <!-- TODO: add some values/code to authenticate the operation.  -->
                                                <!--form action="${link('pcuview', pcuid=pcu.plc_pcuid)}" name="reboot${i}" method='post'>
-                                               <input type='hidden' name='hostname' value='${node.hostname}'/> 
+                                               <input type='hidden' name='hostname' value='${agg.node.hostname}'/> 
                                                <input type='hidden' name='type' value='Reboot' /> 
                                                </form>
                                                <a onclick='javascript: document.reboot${i}.submit();' href="javascript: void(1);">Reboot</a-->
@@ -231,7 +231,11 @@ from links import *
                                        <td py:content="act.action_type"></td>
                                        <td><a class="ext-link" href="${plc_mail_uri(act.message_id)}">
                                                        <span py:if="act.message_id != 0" class="icon">${act.message_id}</span></a></td>
-                                       <td><pre py:content="act.error_string"></pre></td>
+                                       <td py:if="'bootmanager' in act.action_type">
+                                               <a href="/monitorlog/bm.${act.hostname}.log">latest bm log</a>
+                                       </td>
+                                       <td py:if="'bootmanager' not in act.action_type">
+                                               <pre py:content="act.error_string"></pre></td>
                                </tr>
                        </tbody>
                </table>