git://git.onelab.eu
/
monitor.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
add default /etc/monitor.conf to monitor-server.init
[monitor.git]
/
findbad.py
diff --git
a/findbad.py
b/findbad.py
index
c08fbc8
..
1e412bc
100755
(executable)
--- a/
findbad.py
+++ b/
findbad.py
@@
-13,9
+13,8
@@
from monitor.util import command
from monitor import config
from monitor.database import FindbadNodeRecordSync, FindbadNodeRecord
from monitor.sources import comon
from monitor import config
from monitor.database import FindbadNodeRecordSync, FindbadNodeRecord
from monitor.sources import comon
-from monitor.wrapper import plc
+from monitor.wrapper import plc
, plccache
-import syncplcdb
from nodequery import verify,query_to_dict,node_select
import traceback
from nodequery import verify,query_to_dict,node_select
import traceback
@@
-63,7
+62,6
@@
def collectPingAndSSH(nodename, cohash):
echo ' "princeton_comon":"'`ls -d /vservers/princeton_comon`'",'
ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'`
echo ' "princeton_comon":"'`ls -d /vservers/princeton_comon`'",'
ID=`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'`
-
echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",'
echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",'
echo "}"
echo ' "princeton_comon_running":"'`ls -d /proc/virtual/$ID`'",'
echo ' "princeton_comon_procs":"'`vps ax | grep $ID | grep -v grep | wc -l`'",'
echo "}"
@@
-97,14
+95,14
@@
EOF """)
oval = values['kernel']
if "2.6.17" in oval or "2.6.2" in oval:
values['ssh'] = 'SSH'
oval = values['kernel']
if "2.6.17" in oval or "2.6.2" in oval:
values['ssh'] = 'SSH'
- values['category'] = '
ALPHA
'
+ values['category'] = '
PROD
'
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
values['state'] = 'BOOT'
elif "2.6.12" in oval or "2.6.10" in oval:
values['ssh'] = 'SSH'
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
values['state'] = 'BOOT'
elif "2.6.12" in oval or "2.6.10" in oval:
values['ssh'] = 'SSH'
- values['category'] = 'PROD'
+ values['category'] = '
OLD
PROD'
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
if "bm.log" in values['bmlog']:
values['state'] = 'DEBUG'
else:
@@
-256,6
+254,7
@@
def recordPingAndSSH(request, result):
fbrec = FindbadNodeRecord(
date_checked=datetime.fromtimestamp(values['date_checked']),
fbrec = FindbadNodeRecord(
date_checked=datetime.fromtimestamp(values['date_checked']),
+ round=global_round,
hostname=nodename,
loginbase=values['loginbase'],
kernel_version=values['kernel'],
hostname=nodename,
loginbase=values['loginbase'],
kernel_version=values['kernel'],
@@
-275,6
+274,7
@@
def recordPingAndSSH(request, result):
ssh_status = (values['ssh'] == "SSH"),
ssh_error = values['ssherror'],
observed_status = values['state'],
ssh_status = (values['ssh'] == "SSH"),
ssh_error = values['ssherror'],
observed_status = values['state'],
+ observed_category = values['category'],
)
fbnodesync.round = global_round
)
fbnodesync.round = global_round
@@
-354,7
+354,7
@@
def main():
# history information for all nodes
#cohash = {}
cohash = cotop.coget(cotop_url)
# history information for all nodes
#cohash = {}
cohash = cotop.coget(cotop_url)
- l_nodes =
syncplcdb.create_plcdb()
+ l_nodes =
plccache.l_nodes
if config.nodelist:
f_nodes = util.file.getListFromFile(config.nodelist)
l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
if config.nodelist:
f_nodes = util.file.getListFromFile(config.nodelist)
l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)