add email notice for 'failed twice' nodes
[monitor.git] / monitor / scanapi.py
index 667c504..af7fcd4 100644 (file)
@@ -63,7 +63,7 @@ def get_nodes(node_ids):
                l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports'])
        except:
                try:
-                       plc_nodes = plccache.l_plcnodes
+                       plc_nodes = plccache.l_nodes
                        for n in plc_nodes:
                                if n['node_id'] in node_ids:
                                        l_node.append(n)
@@ -204,17 +204,19 @@ class ScanNodeInternal(ScanInterface):
                                                echo '  "bmlog":"'`ls /tmp/bm.log`'",'
                                                echo '  "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",'
                                                echo '  "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",'
-                                               echo '  "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",'
                                                echo '  "dns_status":"'`host boot.planet-lab.org 2>&1`'",'
                                                echo '  "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",'
+                                               echo '  "uptime":"'`uptime`'",'
 
                                                ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` 
                                                echo '  "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",'
                                                echo '  "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",'
-                                               echo '  "rpm_version":"'`rpm -q NodeManager`'",'
+                                               echo '  "fs_status":"'`grep proc /proc/mounts | grep ro, ; if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 20 touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then timeout.pl 20 touch /vservers/monitor.log 2>&1  ; fi ; fi`'",'
+                                               echo '  "rpm_version":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 30 rpm -q NodeManager ; fi`'",'
+                                               echo '  "rpm_versions":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 45 rpm -q -a ; fi`'",'
                                                echo "}"
 EOF                            """)
-                                       
+
                                        values['ssh_error'] = errval
                                        if len(oval) > 0:
                                                #print "OVAL: %s" % oval
@@ -225,8 +227,10 @@ EOF                                """)
                                                values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '', 
                                                                                'nm_status' : '', 
                                                                                'fs_status' : '',
+                                                                               'uptime' : '',
                                                                                'dns_status' : '',
                                                                                'rpm_version' : '',
+                                                                               'rpm_versions' : '',
                                                                                'princeton_comon_dir' : "", 
                                                                                'princeton_comon_running' : "", 
                                                                                'princeton_comon_procs' : "", 'ssh_portused' : None})
@@ -234,7 +238,11 @@ EOF                                """)
                                print traceback.print_exc()
                                sys.exit(1)
 
+                       values['fs_status'] = ""
+                       print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions'])
+
                        print "RPMVERSION: %s %s" % (nodename, values['rpm_version'])
+                       print "UPTIME: %s %s" % (nodename, values['uptime'])
                        ### RUN SSH ######################
                        b_getbootcd_id = True
 
@@ -335,16 +343,7 @@ EOF                                """)
                                                                                'memsize'  : 'null'}
                        # include output value
                        ### GET PLC NODE ######################
-                       plc_lock.acquire()
-                       d_node = None
-                       try:
-                               d_node = plccache.GetNodeByName(nodename)
-                               #d_node = plc.getNodes({'hostname': nodename}, ['pcu_ids', 'site_id', 
-                               #                                               'date_created', 'last_updated', 
-                               #                                               'last_contact', 'boot_state', 'nodegroup_ids'])[0]
-                       except:
-                               traceback.print_exc()
-                       plc_lock.release()
+                       d_node = plccache.GetNodeByName(nodename)
                        values['plc_node_stats'] = d_node
 
                        ##### NMAP  ###################
@@ -486,7 +485,7 @@ class ScanPCU(ScanInterface):
                                                values['dns_status'] = "DNS-OK"
                                        else:
                                                values['dns_status'] = "DNS-MISMATCH"
-                                               continue_probe = False
+                                               values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
 
                                except Exception, err:
                                        values['dns_status'] = "DNS-NOENTRY"