git://git.onelab.eu
/
monitor.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
f4c3225
)
change to make findall run over other things.
author
Stephen Soltesz
<soltesz@cs.princeton.edu>
Tue, 10 Mar 2009 20:14:29 +0000
(20:14 +0000)
committer
Stephen Soltesz
<soltesz@cs.princeton.edu>
Tue, 10 Mar 2009 20:14:29 +0000
(20:14 +0000)
automate-default.sh
patch
|
blob
|
history
diff --git
a/automate-default.sh
b/automate-default.sh
index
046c1ac
..
6a948a5
100755
(executable)
--- a/
automate-default.sh
+++ b/
automate-default.sh
@@
-61,30
+61,18
@@
fi
source ${MONITOR_SCRIPT_ROOT}/agent.sh
source ${MONITOR_SCRIPT_ROOT}/agent.sh
-echo "Performing Find
bad
Nodes"
+echo "Performing Find
All
Nodes"
#########################
# 1. FINDBAD NODES
#########################
# 1. FINDBAD NODES
-${MONITOR_SCRIPT_ROOT}/find
bad
.py --increment $DATE || :
+${MONITOR_SCRIPT_ROOT}/find
all
.py --increment $DATE || :
ps ax | grep BatchMode | grep -v grep | awk '{print $1}' | xargs -r kill || :
ps ax | grep BatchMode | grep -v grep | awk '{print $1}' | xargs -r kill || :
-
-echo "Performing Findbad PCUs"
-#########################
-# 2. FINDBAD PCUS
-${MONITOR_SCRIPT_ROOT}/findbadpcu.py --increment $DATE || :
# clean up stray 'locfg' processes that hang around inappropriately...
ps ax | grep locfg | grep -v grep | awk '{print $1}' | xargs -r kill || :
# clean up stray 'locfg' processes that hang around inappropriately...
ps ax | grep locfg | grep -v grep | awk '{print $1}' | xargs -r kill || :
-echo "Performing uptime changes for sites, nodes, and pcus"
-########################
-# 3. record last-changed for sites, nodes and pcus.
-${MONITOR_SCRIPT_ROOT}/sitebad.py || :
-${MONITOR_SCRIPT_ROOT}/nodebad.py || :
-${MONITOR_SCRIPT_ROOT}/pcubad.py || :
-
echo "Archiving pkl files"
#########################
# Archive pkl files.
echo "Archiving pkl files"
#########################
# Archive pkl files.
-for f in
findbad act_all findbadpcus
l_plcnodes site_persistflags node_persistflags pcu_persistflags ; do
+for f in
act_all
l_plcnodes site_persistflags node_persistflags pcu_persistflags ; do
if [ -f ${MONITOR_DATA_ROOT}/production.$f.pkl ] ; then
cp ${MONITOR_DATA_ROOT}/production.$f.pkl ${MONITOR_ARCHIVE_ROOT}/`date +%F-%H:%M`.production.$f.pkl
else
if [ -f ${MONITOR_DATA_ROOT}/production.$f.pkl ] ; then
cp ${MONITOR_DATA_ROOT}/production.$f.pkl ${MONITOR_ARCHIVE_ROOT}/`date +%F-%H:%M`.production.$f.pkl
else
@@
-92,7
+80,6
@@
for f in findbad act_all findbadpcus l_plcnodes site_persistflags node_persistfl
fi
done
fi
done
-#echo "Running grouprins on all dbg nodes"
############################
# 5. Check if there are any nodes in dbg state. Clean up afterward.
#${MONITOR_SCRIPT_ROOT}/grouprins.py --mail=1 --reboot --nodeselect 'state=DOWN&&boot_state=(boot|rins|dbg|diag)' --stopselect "state=BOOT" || :
############################
# 5. Check if there are any nodes in dbg state. Clean up afterward.
#${MONITOR_SCRIPT_ROOT}/grouprins.py --mail=1 --reboot --nodeselect 'state=DOWN&&boot_state=(boot|rins|dbg|diag)' --stopselect "state=BOOT" || :