X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=monitor%2Fscanapi.py;h=af7fcd430b4e3373eaf2647556012409a5bed4da;hb=cccbcf3069da9b6fe5e5817b5370bf1339bf42eb;hp=4a00eef4f56e9801cdb2c6520e8ca67fc94305c3;hpb=4d67defe979b409f82bbee2168bfe90ffc184867;p=monitor.git diff --git a/monitor/scanapi.py b/monitor/scanapi.py index 4a00eef..af7fcd4 100644 --- a/monitor/scanapi.py +++ b/monitor/scanapi.py @@ -11,8 +11,7 @@ import threading import socket from pcucontrol import reboot -from monitor import util -from monitor.util import command +from pcucontrol.util import command from monitor import config from monitor.database.info.model import * @@ -20,9 +19,8 @@ from monitor.database.info.model import * from monitor.sources import comon from monitor.wrapper import plc, plccache -from nodequery import verify,query_to_dict,node_select import traceback -from nodecommon import nmap_port_status +from monitor.common import nmap_port_status, email_exception COMON_COTOPURL= "http://summer.cs.princeton.edu/status/tabulator.cgi?" + \ "table=table_nodeview&" + \ @@ -65,7 +63,7 @@ def get_nodes(node_ids): l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports']) except: try: - plc_nodes = plccache.l_plcnodes + plc_nodes = plccache.l_nodes for n in plc_nodes: if n['node_id'] in node_ids: l_node.append(n) @@ -114,7 +112,7 @@ class ScanInterface(object): syncclass = None primarykey = 'hostname' - def __init__(self, round): + def __init__(self, round=1): self.round = round self.count = 1 @@ -135,40 +133,44 @@ class ScanInterface(object): try: if values is None: return - - fbnodesync = self.syncclass.findby_or_create( - if_new_set={'round' : self.round}, + + if self.syncclass: + fbnodesync = self.syncclass.findby_or_create( + #if_new_set={'round' : self.round}, **{ self.primarykey : nodename}) # NOTE: This code will either add a new record for the new self.round, # OR it will find the previous value, and update it with new information. # The data that is 'lost' is not that important, b/c older # history still exists. fbrec = self.recordclass.findby_or_create( - **{'round':self.round, self.primarykey:nodename}) + **{ self.primarykey:nodename}) fbrec.set( **values ) fbrec.flush() - fbnodesync.round = self.round - fbnodesync.flush() + if self.syncclass: + fbnodesync.round = self.round + fbnodesync.flush() print "%d %s %s" % (self.count, nodename, values) self.count += 1 except: print "ERROR:" + email_exception(str(nodename)) print traceback.print_exc() pass class ScanNodeInternal(ScanInterface): recordclass = FindbadNodeRecord - syncclass = FindbadNodeRecordSync + #syncclass = FindbadNodeRecordSync + syncclass = None primarykey = 'hostname' def collectNMAP(self, nodename, cohash): #### RUN NMAP ############################### values = {} - nmap = util.command.CMD() + nmap = command.CMD() print "nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,80,806 %s | grep Host:" % nodename) # NOTE: an empty / error value for oval, will still work. @@ -202,16 +204,19 @@ class ScanNodeInternal(ScanInterface): echo ' "bmlog":"'`ls /tmp/bm.log`'",' echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",' echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",' - echo ' "fs_status":"'`touch /var/log/monitor 2>&1`'",' echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",' echo ' "princeton_comon_dir":"'`ls -d /vservers/princeton_comon`'",' + echo ' "uptime":"'`uptime`'",' ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",' echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",' + echo ' "fs_status":"'`grep proc /proc/mounts | grep ro, ; if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 20 touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then timeout.pl 20 touch /vservers/monitor.log 2>&1 ; fi ; fi`'",' + echo ' "rpm_version":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 30 rpm -q NodeManager ; fi`'",' + echo ' "rpm_versions":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 45 rpm -q -a ; fi`'",' echo "}" - EOF """) - +EOF """) + values['ssh_error'] = errval if len(oval) > 0: #print "OVAL: %s" % oval @@ -222,7 +227,10 @@ class ScanNodeInternal(ScanInterface): values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '', 'nm_status' : '', 'fs_status' : '', + 'uptime' : '', 'dns_status' : '', + 'rpm_version' : '', + 'rpm_versions' : '', 'princeton_comon_dir' : "", 'princeton_comon_running' : "", 'princeton_comon_procs' : "", 'ssh_portused' : None}) @@ -230,6 +238,11 @@ class ScanNodeInternal(ScanInterface): print traceback.print_exc() sys.exit(1) + values['fs_status'] = "" + print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions']) + + print "RPMVERSION: %s %s" % (nodename, values['rpm_version']) + print "UPTIME: %s %s" % (nodename, values['uptime']) ### RUN SSH ###################### b_getbootcd_id = True @@ -330,15 +343,7 @@ class ScanNodeInternal(ScanInterface): 'memsize' : 'null'} # include output value ### GET PLC NODE ###################### - plc_lock.acquire() - d_node = None - try: - d_node = plc.getNodes({'hostname': nodename}, ['pcu_ids', 'site_id', - 'date_created', 'last_updated', - 'last_contact', 'boot_state', 'nodegroup_ids'])[0] - except: - traceback.print_exc() - plc_lock.release() + d_node = plccache.GetNodeByName(nodename) values['plc_node_stats'] = d_node ##### NMAP ################### @@ -362,8 +367,9 @@ class ScanNodeInternal(ScanInterface): d_site = None values['loginbase'] = "" try: - d_site = plc.getSites({'site_id': site_id}, - ['max_slices', 'slice_ids', 'node_ids', 'login_base'])[0] + d_site = plccache.GetSitesById([ site_id ])[0] + #d_site = plc.getSites({'site_id': site_id}, + # ['max_slices', 'slice_ids', 'node_ids', 'login_base'])[0] values['loginbase'] = d_site['login_base'] except: traceback.print_exc() @@ -377,9 +383,9 @@ class ScanNodeInternal(ScanInterface): return (nodename, values) def internalprobe(hostname): - fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - if_new_set={'round' : 1}) - scannode = ScanNodeInternal(fbsync.round) + #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", + # if_new_set={'round' : 1}) + scannode = ScanNodeInternal() # fbsync.round) try: (nodename, values) = scannode.collectInternal(hostname, {}) scannode.record(None, (nodename, values)) @@ -390,9 +396,9 @@ def internalprobe(hostname): return False def externalprobe(hostname): - fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", - if_new_set={'round' : 1}) - scannode = ScanNodeInternal(fbsync.round) + #fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", + # if_new_set={'round' : 1}) + scannode = ScanNodeInternal() # fbsync.round) try: (nodename, values) = scannode.collectNMAP(hostname, {}) scannode.record(None, (nodename, values)) @@ -404,7 +410,7 @@ def externalprobe(hostname): class ScanPCU(ScanInterface): recordclass = FindbadPCURecord - syncclass = FindbadPCURecordSync + syncclass = None primarykey = 'plc_pcuid' def collectInternal(self, pcuname, cohash): @@ -433,7 +439,7 @@ class ScanPCU(ScanInterface): #### RUN NMAP ############################### if continue_probe: - nmap = util.command.CMD() + nmap = command.CMD() print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']) (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) # NOTE: an empty / error value for oval, will still work. @@ -479,7 +485,7 @@ class ScanPCU(ScanInterface): values['dns_status'] = "DNS-OK" else: values['dns_status'] = "DNS-MISMATCH" - continue_probe = False + values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] except Exception, err: values['dns_status'] = "DNS-NOENTRY" @@ -495,7 +501,7 @@ class ScanPCU(ScanInterface): ###### DRY RUN ############################ - if 'node_ids' in values['plc_pcu_stats'] and \ + if continue_probe and 'node_ids' in values['plc_pcu_stats'] and \ len(values['plc_pcu_stats']['node_ids']) > 0: rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], values, 1, True) @@ -511,7 +517,8 @@ class ScanPCU(ScanInterface): print "____________________________________" errors['traceback'] = traceback.format_exc() print errors['traceback'] - values['reboot_trial_status'] = errors['traceback'] + values['reboot_trial_status'] = str(errors['traceback']) + print values values['entry_complete']=" ".join(values['entry_complete'])