X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=monitor%2Fscanapi.py;h=5928c5abf63ed8a62d8b86275ea73c95d9ab917a;hb=035a846d8617889c01cae12bc6d64eb7c48b64bd;hp=3e95ef21be40975c90e3a59592243b372b37a508;hpb=d8c4f261680cbc9cb2708cf12d97202716120dc7;p=monitor.git diff --git a/monitor/scanapi.py b/monitor/scanapi.py index 3e95ef2..5928c5a 100644 --- a/monitor/scanapi.py +++ b/monitor/scanapi.py @@ -11,8 +11,7 @@ import threading import socket from pcucontrol import reboot -from monitor import util -from monitor.util import command +from pcucontrol.util import command from monitor import config from monitor.database.info.model import * @@ -20,9 +19,8 @@ from monitor.database.info.model import * from monitor.sources import comon from monitor.wrapper import plc, plccache -from nodequery import verify,query_to_dict,node_select import traceback -from nodecommon import nmap_port_status +from monitor.common import nmap_port_status, email_exception COMON_COTOPURL= "http://summer.cs.princeton.edu/status/tabulator.cgi?" + \ "table=table_nodeview&" + \ @@ -65,7 +63,7 @@ def get_nodes(node_ids): l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports']) except: try: - plc_nodes = plccache.l_plcnodes + plc_nodes = plccache.l_nodes for n in plc_nodes: if n['node_id'] in node_ids: l_node.append(n) @@ -114,7 +112,7 @@ class ScanInterface(object): syncclass = None primarykey = 'hostname' - def __init__(self, round): + def __init__(self, round=1): self.round = round self.count = 1 @@ -135,40 +133,44 @@ class ScanInterface(object): try: if values is None: return - - fbnodesync = self.syncclass.findby_or_create( - if_new_set={'round' : self.round}, + + if self.syncclass: + fbnodesync = self.syncclass.findby_or_create( + #if_new_set={'round' : self.round}, **{ self.primarykey : nodename}) # NOTE: This code will either add a new record for the new self.round, # OR it will find the previous value, and update it with new information. # The data that is 'lost' is not that important, b/c older # history still exists. fbrec = self.recordclass.findby_or_create( - **{'round':self.round, self.primarykey:nodename}) + **{ self.primarykey:nodename}) fbrec.set( **values ) fbrec.flush() - fbnodesync.round = self.round - fbnodesync.flush() + if self.syncclass: + fbnodesync.round = self.round + fbnodesync.flush() print "%d %s %s" % (self.count, nodename, values) self.count += 1 except: print "ERROR:" + email_exception(str(nodename)) print traceback.print_exc() pass class ScanNodeInternal(ScanInterface): recordclass = FindbadNodeRecord - syncclass = FindbadNodeRecordSync + #syncclass = FindbadNodeRecordSync + syncclass = None primarykey = 'hostname' def collectNMAP(self, nodename, cohash): #### RUN NMAP ############################### values = {} - nmap = util.command.CMD() + nmap = command.CMD() print "nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) # NOTE: an empty / error value for oval, will still work. @@ -195,6 +197,7 @@ class ScanNodeInternal(ScanInterface): try: for port in [22, 806]: ssh = command.SSH('root', nodename, port) + #echo ' "fs_status":"'`touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi ; grep proc /proc/mounts | grep ro,`'",' (oval, errval) = ssh.run_noexcept2(""" <<\EOF echo "{" @@ -202,15 +205,16 @@ class ScanNodeInternal(ScanInterface): echo ' "bmlog":"'`ls /tmp/bm.log`'",' echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' - echo ' "fs_status":"'`touch /var/log/monitor 2>&1`'",' echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",' echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",' + echo ' "rpm_version":"'`rpm -q NodeManager`'",' + echo ' "rpm_versions":"'`rpm -q -a`'",' echo "}" - EOF """) +EOF """) values['ssh_error'] = errval if len(oval) > 0: @@ -223,6 +227,8 @@ class ScanNodeInternal(ScanInterface): 'nm_status' : '', 'fs_status' : '', 'dns_status' : '', + 'rpm_version' : '', + 'rpm_versions' : '', 'princeton_comon_dir' : "", 'princeton_comon_running' : "", 'princeton_comon_procs' : "", 'ssh_portused' : None}) @@ -230,6 +236,10 @@ class ScanNodeInternal(ScanInterface): print traceback.print_exc() sys.exit(1) + values['fs_status'] = "" + print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions']) + + print "RPMVERSION: %s %s" % (nodename, values['rpm_version']) ### RUN SSH ###################### b_getbootcd_id = True @@ -333,16 +343,17 @@ class ScanNodeInternal(ScanInterface): plc_lock.acquire() d_node = None try: - d_node = plc.getNodes({'hostname': nodename}, ['pcu_ids', 'site_id', - 'date_created', 'last_updated', - 'last_contact', 'boot_state', 'nodegroup_ids'])[0] + d_node = plccache.GetNodeByName(nodename) + #d_node = plc.getNodes({'hostname': nodename}, ['pcu_ids', 'site_id', + # 'date_created', 'last_updated', + # 'last_contact', 'boot_state', 'nodegroup_ids'])[0] except: traceback.print_exc() plc_lock.release() values['plc_node_stats'] = d_node ##### NMAP ################### - (n, v) = collectNMAP(nodename, None) + (n, v) = self.collectNMAP(nodename, None) values.update(v) ### GET PLC PCU ###################### @@ -362,8 +373,9 @@ class ScanNodeInternal(ScanInterface): d_site = None values['loginbase'] = "" try: - d_site = plc.getSites({'site_id': site_id}, - ['max_slices', 'slice_ids', 'node_ids', 'login_base'])[0] + d_site = plccache.GetSitesById([ site_id ])[0] + #d_site = plc.getSites({'site_id': site_id}, + # ['max_slices', 'slice_ids', 'node_ids', 'login_base'])[0] values['loginbase'] = d_site['login_base'] except: traceback.print_exc() @@ -376,11 +388,10 @@ class ScanNodeInternal(ScanInterface): return (nodename, values) - def internalprobe(hostname): - fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - if_new_set={'round' : 1}) - scannode = ScanNodeInternal(fbsync.round) + #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", + # if_new_set={'round' : 1}) + scannode = ScanNodeInternal() # fbsync.round) try: (nodename, values) = scannode.collectInternal(hostname, {}) scannode.record(None, (nodename, values)) @@ -391,9 +402,9 @@ def internalprobe(hostname): return False def externalprobe(hostname): - fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - if_new_set={'round' : 1}) - scannode = ScanNodeInternal(fbsync.round) + #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", + # if_new_set={'round' : 1}) + scannode = ScanNodeInternal() # fbsync.round) try: (nodename, values) = scannode.collectNMAP(hostname, {}) scannode.record(None, (nodename, values)) @@ -405,7 +416,7 @@ def externalprobe(hostname): class ScanPCU(ScanInterface): recordclass = FindbadPCURecord - syncclass = FindbadPCURecordSync + syncclass = None primarykey = 'plc_pcuid' def collectInternal(self, pcuname, cohash): @@ -434,7 +445,7 @@ class ScanPCU(ScanInterface): #### RUN NMAP ############################### if continue_probe: - nmap = util.command.CMD() + nmap = command.CMD() print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']) (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) # NOTE: an empty / error value for oval, will still work. @@ -480,7 +491,7 @@ class ScanPCU(ScanInterface): values['dns_status'] = "DNS-OK" else: values['dns_status'] = "DNS-MISMATCH" - continue_probe = False + values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] except Exception, err: values['dns_status'] = "DNS-NOENTRY" @@ -496,7 +507,7 @@ class ScanPCU(ScanInterface): ###### DRY RUN ############################ - if 'node_ids' in values['plc_pcu_stats'] and \ + if continue_probe and 'node_ids' in values['plc_pcu_stats'] and \ len(values['plc_pcu_stats']['node_ids']) > 0: rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], values, 1, True) @@ -512,7 +523,8 @@ class ScanPCU(ScanInterface): print "____________________________________" errors['traceback'] = traceback.format_exc() print errors['traceback'] - values['reboot_trial_status'] = errors['traceback'] + values['reboot_trial_status'] = str(errors['traceback']) + print values values['entry_complete']=" ".join(values['entry_complete'])