cd ${MONITOR_SCRIPT_ROOT}
set -e
DATE=`date +%Y-%m-%d-%T`
-MONITOR_PID="$HOME/monitor/SKIP"
+MONITOR_PID="${MONITOR_SCRIPT_ROOT}/SKIP"
echo "Performing API test"
API=$(./testapi.py)
fi
+set +e
AGENT=`ps ax | grep ssh-agent | grep -v grep`
+set -e
if [ -z "$AGENT" ] ; then
echo "starting ssh agent"
# if no agent is running, set it up.
# clean up stray 'locfg' processes that hang around inappropriately...
ps ax | grep locfg | grep -v grep | awk '{print $1}' | xargs kill || :
-echo "Generating web data"
+#echo "Generating web data"
# badcsv.txt
-${MONITOR_SCRIPT_ROOT}/printbadcsv.py | grep -v loading | tr -d ' ' > badcsv.txt
-cp badcsv.txt /plc/data/var/www/html/monitor/
-${MONITOR_SCRIPT_ROOT}/showlatlon.py | head -9 | awk 'BEGIN {print "<table>"} { print "<tr><td>", $0, "</td></tr>"} END{print "</table>"}' | sed -e 's\|\</td><td>\g' > /plc/data/var/www/html/monitor/regions.html
+#${MONITOR_SCRIPT_ROOT}/printbadcsv.py | grep -v loading | tr -d ' ' > badcsv.txt
+#cp badcsv.txt /plc/data/var/www/html/monitor/
+#${MONITOR_SCRIPT_ROOT}/showlatlon.py | head -9 | awk 'BEGIN {print "<table>"} { print "<tr><td>", $0, "</td></tr>"} END{print "</table>"}' | sed -e 's\|\</td><td>\g' > /plc/data/var/www/html/monitor/regions.html
echo "Performing uptime changes for sites, nodes, and pcus"
########################
${MONITOR_SCRIPT_ROOT}/pkl2php.py -i act_all -o act_all
${MONITOR_SCRIPT_ROOT}/pkl2php.py -i plcdb_hn2lb -o plcdb_hn2lb
${MONITOR_SCRIPT_ROOT}/pkl2php.py -i findbad -o findbadnodes
-${MONITOR_SCRIPT_ROOT}/pkl2php.py -i ad_dbTickets -o ad_dbTickets
-${MONITOR_SCRIPT_ROOT}/pkl2php.py -i idTickets -o idTickets
+#${MONITOR_SCRIPT_ROOT}/pkl2php.py -i ad_dbTickets -o ad_dbTickets
+#${MONITOR_SCRIPT_ROOT}/pkl2php.py -i idTickets -o idTickets
echo "Archiving pkl files"
#########################
echo "Running grouprins on all dbg nodes"
############################
# 5. Check if there are any nodes in dbg state. Clean up afterward.
-${MONITOR_SCRIPT_ROOT}/grouprins.py --mail=1 \
- --nodeselect 'state=DEBUG&&boot_state=(rins|dbg|boot)' \
- --stopselect 'state=BOOT&&kernel=2.6.22.19-vs2.3.0.34.9.planetlab' \
- --reboot || :
-${MONITOR_SCRIPT_ROOT}/findbad.py --increment --cachenodes --debug=0 --dbname="findbad" --nodeselect 'state=DEBUG&&boot_state=dbg||state=DEBUG&&boot_state=boot' || :
-
-echo "Collecting RT database dump"
-##########################
-# 6. cache the RT db locally.
-python ${MONITOR_SCRIPT_ROOT}/rt.py
+${MONITOR_SCRIPT_ROOT}/grouprins.py --mail=1 --reboot --nodeselect 'state=DOWN&&boot_state=(boot|rins|dbg|diag)' --stopselect "state=BOOT" || :
+${MONITOR_SCRIPT_ROOT}/grouprins.py --mail=1 --reboot --nodeselect 'state=DEBUG&&boot_state=(rins|dbg|boot)' --stopselect 'state=BOOT' || :
+cp ${MONITOR_SCRIPT_ROOT}/monitor.log ${MONITOR_ARCHIVE_ROOT}/`date +%F-%H:%M`.monitor.log
rm -f $MONITOR_PID
import database
import time
import mailer
-from unified_model import cmpCategoryVal
import sys
import emailTxt
import string
diag.save()
else:
- print "NOT sending email : %s %s" % (config.mail, record.data['rt'])
+ print "NOT sending email : %s" % config.mail
return
#from config import options as config
options = Options()
- update_section(options, 'commandline', True)
- update_section(options, 'monitorconfig')
+ try:
+ update_section(options, 'commandline', True)
+ except:
+ pass
+ try:
+ update_section(options, 'monitorconfig')
+ except:
+ pass
#for i in dir(config):
# if "__" not in i:
input_text = "Subject: %s\n"
input_text += "Requestor: %s\n"% FROM
input_text += "id: ticket/new\n"
- input_text += "Queue: Monitor\n"
+ input_text += "Queue: %s\n" % config.RT_QUEUE
for recipient in to:
input_text += "AdminCc: %s\n" % recipient
input_text += "Text: %s"
RT_WEB_USER=
RT_WEB_PASSWORD=
RT_WEB_DEBUG=0
+RT_QUEUE=
# PLC admin account
API_SERVER=https://boot.planet-lab.org/PLCAPI/
MONITOR_HOSTNAME=monitor.planet-lab.org
MONITOR_SCRIPT_ROOT=/usr/share/monitor-server
MONITOR_DATA_ROOT=/var/lib/monitor-server
-MONITOR_ARCHIVE_ROOT=/usr/share/monitor-server/archive-pdb
+MONITOR_ARCHIVE_ROOT=/var/lib/monitor-server/archive-pdb
email=monitor@another-lab.org
def fb_print_nodeinfo(fbnode):
pf = PersistFlags(fbnode['hostname'], 1, db='node_persistflags')
- fbnode['last_change'] = diff_time(pf.last_changed)
+ try:
+ fbnode['last_change'] = diff_time(pf.last_changed)
+ except:
+ fbnode['last_change'] = diff_time(time.time())
print " Checked: ",
if 'checked' in fbnode:
print "%11.11s " % diff_time(fbnode['checked'])
# get site details.
s = api.GetSites(loginbase)[0]
# get people at site
- p = api.GetPersons(s['person_ids'])[0]
+ p = api.GetPersons(s['person_ids'])
# pull out those with the right role.
emails = [ person['email'] for person in filter(lambda x: 'tech' in x['roles'], p) ]
return emails
# get site details.
s = api.GetSites(loginbase)[0]
# get people at site
- p = api.GetPersons(s['person_ids'])[0]
+ p = api.GetPersons(s['person_ids'])
# pull out those with the right role.
emails = [ person['email'] for person in filter(lambda x: 'pi' in x['roles'], p) ]
return emails
print str
keys = categories.keys()
- for cat in ['BOOT-ALPHA', 'BOOT-PROD', 'BOOT-OLDBOOTCD', 'DEBUG-ALPHA',
- 'DEBUG-PROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
+ for cat in ['BOOT-PROD', 'BOOT-OLDPROD', 'BOOT-OLDBOOTCD', 'DEBUG-PROD',
+ 'DEBUG-OLDPROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
if cat not in keys:
categories[cat] = 0
keys = categories.keys()
- for cat in ['BOOT-ALPHA', 'BOOT-PROD', 'BOOT-OLDBOOTCD', 'DEBUG-ALPHA',
- 'DEBUG-PROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
+ for cat in ['BOOT-PROD', 'BOOT-OLDPROD', 'BOOT-OLDBOOTCD', 'DEBUG-PROD',
+ 'DEBUG-OLDPROD', 'DEBUG-OLDBOOTCD', 'DOWN-ERROR']:
if cat in keys:
print "%d," % categories[cat],
print ""
raise Exception("No index %s or %s in map" % (v1, v2))
def cmpCategoryVal(v1, v2):
- map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
+ # Terrible hack to manage migration to no more 'ALPHA' states.
+ if v1 == 'ALPHA': v1 = "PROD"
+ if v2 == 'ALPHA': v2 = "PROD"
+ #map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
+ map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
return cmpValMap(v1,v2,map)
if ADMIN & roles:
contacts += [config.email]
if TECH & roles:
- contacts += [TECHEMAIL % self.loginbase]
+ #contacts += [TECHEMAIL % self.loginbase]
+ contacts += plc.getTechEmails(loginbase)
if PI & roles:
- contacts += [PIEMAIL % self.loginbase]
+ #contacts += [PIEMAIL % self.loginbase]
+ contacts += plc.getSliceUserEmails(loginbase)
if USER & roles:
+ contacts += plc.getSliceUserEmails(loginbase)
slices = plc.slices(self.loginbase)
if len(slices) >= 1:
- for slice in slices:
- contacts += [SLICEMAIL % slice]
+ #for slice in slices:
+ # contacts += [SLICEMAIL % slice]
print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
else:
print "SLIC: %20s : 0 slices" % self.loginbase
return cmpMap(l1,l2,'state', map)
def cmpCategoryVal(v1, v2):
- map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
+ map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
return cmpValMap(v1,v2,map)
def cmpCategory(l1, l2):
- map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
+ map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
return cmpMap(l1,l2,'category', map)
def cmpPCU(l1, l2):
function plc_site_link($site_name)
{
- return "https://www.planet-lab.org/db/sites/index.php?site_pattern=" . $site_name;
+ return "https://" . MONITOR_HOSTNAME . "/db/sites/index.php?site_pattern=" . $site_name;
}
function pcu_link($pcu)
{
- return "https://www.planet-lab.org/db/sites/pcu.php?id=" . $pcu['pcu_id'];
+ return "https://" . MONITOR_HOSTNAME . "/db/sites/pcu.php?id=" . $pcu['pcu_id'];
}
function pcu_site($pcu)