from monitor import util
from monitor.util import command
from monitor import config
-from monitor.database import FindbadNodeRecordSync, FindbadNodeRecord
+
+from monitor.database.info.model import FindbadNodeRecordSync, FindbadNodeRecord, session
+
from monitor.sources import comon
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
-import syncplcdb
from nodequery import verify,query_to_dict,node_select
import traceback
-print "starting sqlfindbad.py"
+#print "starting sqlfindbad.py"
# QUERY all nodes.
COMON_COTOPURL= "http://summer.cs.princeton.edu/status/tabulator.cgi?" + \
"table=table_nodeview&" + \
echo ' "princeton_comon":"'`ls -d /vservers/princeton_comon`'",'
ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'`
-
echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",'
echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",'
echo "}"
oval = values['kernel']
if "2.6.17" in oval or "2.6.2" in oval:
values['ssh'] = 'SSH'
- values['category'] = 'ALPHA'
+ values['category'] = 'PROD'
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
values['state'] = 'BOOT'
elif "2.6.12" in oval or "2.6.10" in oval:
values['ssh'] = 'SSH'
- values['category'] = 'PROD'
+ values['category'] = 'OLDPROD'
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
fbnodesync = FindbadNodeRecordSync.findby_or_create(hostname=nodename,
if_new_set={'round' : global_round})
- fbrec = FindbadNodeRecord(
- date_checked=datetime.fromtimestamp(values['date_checked']),
- hostname=nodename,
- loginbase=values['loginbase'],
- kernel_version=values['kernel'],
- bootcd_version=values['bootcd'],
- nm_status=values['nm'],
- fs_status=values['readonlyfs'],
- dns_status=values['dns'],
- princeton_comon_dir=values['princeton_comon'],
- princeton_comon_running=values['princeton_comon_running'],
- princeton_comon_procs=values['princeton_comon_procs'],
- plc_node_stats = values['plcnode'],
- plc_site_stats = values['plcsite'],
- plc_pcuid = values['pcu'],
- comon_stats = values['comonstats'],
- ping_status = (values['ping'] == "PING"),
- ssh_portused = values['sshport'],
- ssh_status = (values['ssh'] == "SSH"),
- ssh_error = values['ssherror'],
- observed_status = values['state'],
- )
+ # NOTE: This code will either add a new record for the new global_round,
+ # OR it will find the previous value, and update it
+ # with new information.
+ # The data that is 'lost' is not that important, b/c older
+ # history still exists.
+ fbrec = FindbadNodeRecord.findby_or_create(
+ round=global_round,
+ hostname=nodename)
+ before = fbrec.to_dict()
+ print "BEFORE, ", before
+ fbrec.flush()
+ time.sleep(2)
+ print "Setting VALUES"
+ fbrec.set( date_checked=datetime.fromtimestamp(values['date_checked']),
+ loginbase=values['loginbase'],
+ kernel_version=values['kernel'],
+ bootcd_version=values['bootcd'],
+ nm_status=values['nm'],
+ fs_status=values['readonlyfs'],
+ dns_status=values['dns'],
+ princeton_comon_dir=values['princeton_comon'],
+ princeton_comon_running=values['princeton_comon_running'],
+ princeton_comon_procs=values['princeton_comon_procs'],
+ plc_node_stats = values['plcnode'],
+ plc_site_stats = values['plcsite'],
+ plc_pcuid = values['pcu'],
+ comon_stats = values['comonstats'],
+ ping_status = (values['ping'] == "PING"),
+ ssh_portused = values['sshport'],
+ ssh_status = (values['ssh'] == "SSH"),
+ ssh_error = values['ssherror'],
+ observed_status = values['state'],
+ observed_category = values['category'])
+ after = fbrec.to_dict()
+ print "AFTER , ", after
+
+ for v in before.keys():
+ if before[v] == after[v]:
+ print "SAME FOR KEY %s" % v
+ print "%s : %s\t%s" % ( v, before[v], after[v] )
+
+ fbrec.flush()
fbnodesync.round = global_round
+ fbnodesync.flush()
+ fbsync.flush()
count += 1
print "%d %s %s" % (count, nodename, values)
for i in result:
print "Result: %s" % i
+def probe(hostname):
+ try:
+ (nodename, values) = collectPingAndSSH(hostname, {})
+ recordPingAndSSH(None, (nodename, values))
+ session.flush()
+ return True
+ except:
+ print traceback.print_exc()
+ return False
+
def checkAndRecordState(l_nodes, cohash):
global global_round
# CREATE all the work requests
for nodename in l_nodes:
fbnodesync = FindbadNodeRecordSync.findby_or_create(hostname=nodename, if_new_set={'round':0})
-
node_round = fbnodesync.round
+ fbnodesync.flush()
+
if node_round < global_round:
# recreate node stats when refreshed
#print "%s" % nodename
print FindbadNodeRecordSync.query.count()
print FindbadNodeRecord.query.count()
+ session.flush()
def main():
global global_round
global_round += 1
fbsync.round = global_round
+ fbsync.flush()
+
cotop = comon.Comon()
# lastcotop measures whether cotop is actually running. this is a better
# metric than sshstatus, or other values from CoMon
cotop_url = COMON_COTOPURL
# history information for all nodes
- #cohash = {}
- cohash = cotop.coget(cotop_url)
- l_nodes = syncplcdb.create_plcdb()
+ cohash = {}
+ #cohash = cotop.coget(cotop_url)
+ l_nodes = plccache.l_nodes
if config.nodelist:
f_nodes = util.file.getListFromFile(config.nodelist)
l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)