- remove unused monitor-client init script
- fix UP/DOWN summary on nodes page.
- make node page display all nodes by default
- add boot_server field
- add myops_ssh_key to the keychain
- use ext_consortium_id to distinguish pending sites.
Setting tag Monitor-3.0-23
--This line, and those below, will be ignored--
Please write a changelog for this new tag in the section above
DIFF=========
Index: monitor.cron
===================================================================
--- monitor.cron (.../tags/Monitor-3.0-22) (révision 15357)
+++ monitor.cron (.../trunk) (révision 15357)
@@ -1,5 +0,0 @@
-# Runs once a day at 12pm to fetch the monitor account keys in case it was
-# inaccessible previously due to a network outage.
-
-0 12 * * * root /etc/init.d/monitor start > /dev/null 2>&1
-
Index: monitor-client.init
===================================================================
--- monitor-client.init (.../tags/Monitor-3.0-22) (révision 15357)
+++ monitor-client.init (.../trunk) (révision 15357)
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# monitor Enables the monitor account by setting up the ssh key from the enabled PLC.
-#
-# Load before nm, vcached, and vservers, vserver-reference
-# chkconfig: 3 59 80
-# description: Fetch monitor ssh key to enable access to machine via monitor immediately.
-#
-# Stephen Soltesz <soltesz@cs.princeton.edu>
-# Copyright (C) 2008 The Trustees of Princeton University
-#
-# $Id$
-#
-
-case "$1" in
- start|restart|reload)
- ;;
- stop|status)
- exit 0
- ;;
- *)
- echo $"Usage: $0 {start|stop|restart|status}"
- exit 1
- ;;
-esac
-
-#
-# NOTE: This user is not used by monitor for the moment so better not create it.
-#
-
-# # NOTE: If user already exists, this just exists with status 9. I think it's
-# # ok to simply let this command check and error out.
-# # Parse PLC configuration
-# if [ -r /etc/planetlab/plc_config ] ; then
-# . /etc/planetlab/plc_config
-# else
-# PLC_NAME="PlanetLab"
-# PLC_SLICE_PREFIX="pl"
-# PLC_BOOT_HOST="boot.planet-lab.org"
-# fi
-
-# USER="${PLC_SLICE_PREFIX}_monitor"
-# /usr/sbin/useradd -p "" -m $USER &> /dev/null || :
-
-# if [ ! -d /home/$USER/.ssh ] ; then
-# mkdir /home/$USER/.ssh
-# chmod 700 /home/$USER/.ssh
-# chown $USER.$USER /home/$USER/.ssh
-# fi
-
-# URL="http://${PLC_BOOT_HOST}/PlanetLabConf/keys.php?$USER"
-# curl -s "$URL" > /home/$USER/.ssh/authorized_keys
-# chown $USER.$USER /home/$USER/.ssh/authorized_keys
Index: nodequery.py
===================================================================
--- nodequery.py (.../tags/Monitor-3.0-22) (révision 15357)
+++ nodequery.py (.../trunk) (révision 15357)
@@ -36,6 +36,8 @@
fbnode['bootcd_version'] = fbnode['bootcd_version'].split()[-1]
else:
fbnode['bootcd_version'] = "unknown"
+ if not fbnode['boot_server']:
+ fbnode['boot_server'] = "unknown"
fbnode['pcu'] = color_pcu_state(fbnode)
if not fields:
@@ -58,7 +60,7 @@
#print "ERROR!!!!!!!!!!!!!!!!!!!!!"
pass
- print "%(hostname)-45s | %(date_checked)11.11s | %(boot_state)5.5s| %(observed_status)8.8s | %(ssh_status)5.5s | %(pcu)6.6s | %(bootcd_version)6.6s | %(kernel_version)s" % fbnode
+ print "%(hostname)-45s | %(date_checked)11.11s | %(boot_state)5.5s| %(observed_status)8.8s | %(ssh_status)5.5s | %(pcu)6.6s | %(bootcd_version)6.6s | %(boot_server)s | %(kernel_version)s" % fbnode
else:
format = ""
for f in fields:
Index: web/MonitorWeb/monitorweb/controllers.py
===================================================================
--- web/MonitorWeb/monitorweb/controllers.py (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/monitorweb/controllers.py (.../trunk) (révision 15357)
@@ -53,6 +53,7 @@
external_dns_status = widgets.CheckBox(label="Hostname Resolves?")
kernel_version = widgets.CheckBox(label="Kernel")
bootcd_version = widgets.CheckBox(label="BootCD")
+ boot_server = widgets.CheckBox(label="Boot Server")
observed_status = widgets.CheckBox(label="Observed Status")
uptime = widgets.CheckBox(label="Uptime")
traceroute = widgets.CheckBox(label="Traceroute")
Index: web/MonitorWeb/monitorweb/templates/nodescanhistory.kid
===================================================================
--- web/MonitorWeb/monitorweb/templates/nodescanhistory.kid (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/monitorweb/templates/nodescanhistory.kid (.../trunk) (révision 15357)
@@ -62,6 +62,7 @@
<th class="sortable plekit_table">Stat</th>
<th class="sortable plekit_table">kernel</th>
<th class="sortable plekit_table">BootCD</th>
+ <th class="sortable plekit_table">Boot Server</th>
<th class="sortable plekit_table">Last_contact</th>
</tr>
</thead>
@@ -76,6 +77,7 @@
<td py:content="node.node.plc_node_stats['boot_state']">boot</td>
<td nowrap="true" py:content="node.kernel"></td>
<td nowrap="true" py:content="node.node.bootcd_version"></td>
+ <td nowrap="true" py:content="node.node.boot_server"></td>
<td id="node-${node.node.observed_status}" py:content="diff_time(node.node.plc_node_stats['last_contact'])"></td>
</span>
</tr>
Index: web/MonitorWeb/monitorweb/templates/node_template.kid
===================================================================
--- web/MonitorWeb/monitorweb/templates/node_template.kid (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/monitorweb/templates/node_template.kid (.../trunk) (révision 15357)
@@ -15,6 +15,7 @@
<th>pcu</th>
<th>kernel</th>
<th>BootCD</th>
+ <th>Boot Server</th>
<th>last_contact</th>
</span>
<span py:if="node is not None">
@@ -41,6 +42,7 @@
</td>
<td nowrap="true" py:content="node.kernel"></td>
<td nowrap="true" py:content="node.node.bootcd_version"></td>
+ <td nowrap="true" py:content="node.node.boot_server"></td>
<td id="node-${node.node.observed_status}" py:content="diff_time(node.node.plc_node_stats['last_contact'])"></td>
</span>
</span>
Index: web/MonitorWeb/monitorweb/templates/nodelist.kid
===================================================================
--- web/MonitorWeb/monitorweb/templates/nodelist.kid (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/monitorweb/templates/nodelist.kid (.../trunk) (révision 15357)
@@ -17,8 +17,8 @@
</script>
<center>
- <b py:content="'BOOT: %d' % len([agg for agg in query if agg.node.observed_status == 'BOOT'])"></b> |
- <b py:content="'DOWN: %d' % len([agg for agg in query if agg.node.observed_status == 'DOWN'])"></b><br/>
+ <b py:content="'UP: %d' % len([agg for agg in query if agg.node.status in ('online', 'good')])"></b> |
+ <b py:content="'DOWN: %d' % len([agg for agg in query if agg.node.status not in ('online', 'good')])"></b><br/>
</center>
<table id="nodelist" cellpadding="0" border="0" class="plekit_table sortable-onload-2 colstyle-alt no-arrow paginationcallback-nodelist_paginator max-pages-10 paginate-25">
@@ -58,6 +58,7 @@
<th class="sortable plekit_table">pcu</th>
<th class="sortable plekit_table">kernel</th>
<th class="sortable plekit_table">BootCD</th>
+ <th class="sortable plekit_table">Boot Server</th>
<th class="sortable-sortLastContact plekit_table">Last_contact</th>
</tr>
</thead>
Index: web/MonitorWeb/monitorweb/templates/nodefast.kid
===================================================================
--- web/MonitorWeb/monitorweb/templates/nodefast.kid (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/monitorweb/templates/nodefast.kid (.../trunk) (révision 15357)
@@ -17,16 +17,16 @@
</script>
<center>
- <b py:content="'BOOT: %d' % len([agg for agg in query if agg.node.status == 'good'])"></b> |
- <b py:content="'DOWN: %d' % len([agg for agg in query if agg.node.status == 'down'])"></b><br/>
+ <b py:content="'UP: %d' % len([agg for agg in query if agg.node.status in ('online', 'good')])"></b> |
+ <b py:content="'DOWN: %d' % len([agg for agg in query if agg.node.status not in ('online', 'good')])"></b><br/>
</center>
-<table id="nodelist" cellpadding="0" border="0" class="plekit_table sortable-onload-2 colstyle-alt no-arrow paginationcallback-nodelist_paginator max-pages-10 paginate-25">
+<table id="nodelist" cellpadding="0" border="0" class="plekit_table sortable-onload-2 colstyle-alt no-arrow paginationcallback-nodelist_paginator max-pages-10 paginate-999">
<thead>
<tr class='pagesize_area'><td class='pagesize_area' colspan='10'>
<form class='pagesize' action='satisfy_xhtml_validator'><fieldset>
- <input class='pagesize_input' type='text' id="nodelist_pagesize" value='25'
+ <input class='pagesize_input' type='text' id="nodelist_pagesize" value='999'
onkeyup='plekit_pagesize_set("nodelist","nodelist_pagesize", 25);'
size='3' maxlength='3' />
<label class='pagesize_label'> items/page </label>
Index: web/MonitorWeb/dev.cfg
===================================================================
--- web/MonitorWeb/dev.cfg (.../tags/Monitor-3.0-22) (révision 15357)
+++ web/MonitorWeb/dev.cfg (.../trunk) (révision 15357)
@@ -31,7 +31,7 @@
autoreload.package="monitorweb"
-server.socket_host="monitor.planet-lab.org"
+server.socket_host="www.planet-lab.eu"
server.socket_port=8082
#server.webpath="/monitor/"
#base_url_filter.on = False
Index: upgrade/monitor-server-3.0-22.sql
===================================================================
--- upgrade/monitor-server-3.0-22.sql (.../tags/Monitor-3.0-22) (révision 0)
+++ upgrade/monitor-server-3.0-22.sql (.../trunk) (révision 15357)
@@ -0,0 +1,5 @@
+-- If there's an existing database, these commands will upgrade it to the
+-- current version
+
+ALTER TABLE findbadnoderecord ADD COLUMN boot_server varchar DEFAULT NULL;
+ALTER TABLE findbadnoderecord_history ADD COLUMN boot_server varchar DEFAULT NULL;
Index: Monitor.spec
===================================================================
--- Monitor.spec (.../tags/Monitor-3.0-22) (révision 15357)
+++ Monitor.spec (.../trunk) (révision 15357)
@@ -129,8 +129,8 @@
%install
rm -rf $RPM_BUILD_ROOT
#################### CLIENT
-install -D -m 755 monitor-client.init $RPM_BUILD_ROOT/%{_initrddir}/monitor
-install -D -m 644 monitor.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/monitor
+#install -D -m 755 monitor-client.init $RPM_BUILD_ROOT/%{_initrddir}/monitor
+#install -D -m 644 monitor.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/monitor
install -D -m 755 timeout.pl $RPM_BUILD_ROOT/usr/bin/timeout.pl
@@ -208,8 +208,8 @@
%files client
%defattr(-,root,root)
-%{_initrddir}/monitor
-%{_sysconfdir}/cron.d/monitor
+#%{_initrddir}/monitor
+#%{_sysconfdir}/cron.d/monitor
/usr/bin/timeout.pl
%files pcucontrol
Index: monitor/wrapper/plc.py
===================================================================
--- monitor/wrapper/plc.py (.../tags/Monitor-3.0-22) (révision 15357)
+++ monitor/wrapper/plc.py (.../trunk) (révision 15357)
@@ -14,6 +14,11 @@
import traceback
from monitor import database
+# note: this needs to be consistent with the value in PLEWWW/planetlab/includes/plc_functions.php
+PENDING_CONSORTIUM_ID = 0
+# not used in monitor
+#APPROVED_CONSORTIUM_ID = 999999
+
try:
from monitor import config
debug = config.debug
@@ -116,12 +121,12 @@
except:
print "Call %s FAILED: Using old cached data" % cachename
load_old_cache = True
-
+
if load_old_cache:
values = database.dbLoad(cachename)
else:
database.dbDump(cachename, values)
-
+
return values
else:
values = database.dbLoad(cachename)
@@ -324,6 +329,22 @@
#'last_updated', 'peer_node_id', 'ssh_rsa_key' ])
return nodes
+
+# Check if the site is a pending site that needs to be approved.
+def isPendingSite(loginbase):
+ api = xmlrpclib.Server(auth.server, verbose=False)
+ try:
+ site = api.GetSites(auth.auth, loginbase)[0]
+ except Exception, exc:
+ login.info("ERROR: No site %s" % loginbase)
+ return False
+
+ if not site['enabled'] and site['ext_consortium_id'] == PENDING_CONSORTIUM_ID:
+ return True
+
+ return False
+
+
'''
Sets boot state of a node.
'''
@@ -400,6 +421,7 @@
def enableSlices(nodename):
api = xmlrpclib.Server(auth.server, verbose=False, allow_none=True)
+
for slice in slices(siteId(nodename)):
logger.info("Enabling slices %s" % slice)
try:
@@ -417,6 +439,7 @@
logger.info("enableSlices: %s" % exc)
print "exception: %s" % exc
+
#I'm commenting this because this really should be a manual process.
#'''
#Enable suspended site slices.
@@ -428,6 +451,12 @@
# api.SliceAttributeAdd(auth.auth, slice, "plc_slice_state", {"state" : "suspended"})
#
def enableSiteSliceCreation(loginbase):
+ if isPendingSite(loginbase):
+ msg = "INFO: enableSiteSliceCreation: Pending Site (%s)" % loginbase
+ print msg
+ logger.info(msg)
+ return
+
api = xmlrpclib.Server(auth.server, verbose=False, allow_none=True)
try:
logger.info("Enabling slice creation for site %s" % loginbase)
@@ -442,10 +471,7 @@
api = xmlrpclib.Server(auth.server, verbose=False, allow_none=True)
try:
loginbase = siteId(nodename)
- logger.info("Enabling slice creation for site %s" % loginbase)
- if not debug:
- logger.info("\tcalling UpdateSite(%s, enabled=True)" % loginbase)
- api.UpdateSite(auth.auth, loginbase, {'enabled': True})
+ enableSiteSliceCreation(loginbase)
except Exception, exc:
print "ERROR: enableSliceCreation: %s" % exc
logger.info("ERROR: enableSliceCreation: %s" % exc)
@@ -453,13 +479,20 @@
'''
Removes site's ability to create slices. Returns previous max_slices
'''
-def removeSiteSliceCreation(sitename):
- print "removeSiteSliceCreation(%s)" % sitename
+def removeSiteSliceCreation(loginbase):
+ print "removeSiteSliceCreation(%s)" % loginbase
+
+ if isPendingSite(loginbase):
+ msg = "INFO: removeSiteSliceCreation: Pending Site (%s)" % loginbase
+ print msg
+ logger.info(msg)
+ return
+
api = xmlrpclib.Server(auth.server, verbose=False)
try:
- logger.info("Removing slice creation for site %s" % sitename)
+ logger.info("Removing slice creation for site %s" % loginbase)
if not debug:
- api.UpdateSite(auth.auth, sitename, {'enabled': False})
+ api.UpdateSite(auth.auth, loginbase, {'enabled': False})
except Exception, exc:
logger.info("removeSiteSliceCreation: %s" % exc)
@@ -471,12 +504,7 @@
api = xmlrpclib.Server(auth.server, verbose=False)
try:
loginbase = siteId(nodename)
- #numslices = api.GetSites(auth.auth, {"login_base": loginbase},
- # ["max_slices"])[0]['max_slices']
- logger.info("Removing slice creation for site %s" % loginbase)
- if not debug:
- #api.UpdateSite(auth.auth, loginbase, {'max_slices': 0})
- api.UpdateSite(auth.auth, loginbase, {'enabled': False})
+ removeSiteSliceCreation(loginbase)
except Exception, exc:
logger.info("removeSliceCreation: %s" % exc)
Index: monitor/database/info/findbad.py
===================================================================
--- monitor/database/info/findbad.py (.../tags/Monitor-3.0-22) (révision 15357)
+++ monitor/database/info/findbad.py (.../trunk) (révision 15357)
@@ -38,6 +38,7 @@
# INTERNAL
kernel_version = Field(String,default=None)
bootcd_version = Field(String,default=None)
+ boot_server = Field(String,default=None)
nm_status = Field(String,default=None)
fs_status = Field(String,default=None)
iptables_status = Field(String,default=None)
Index: monitor/scanapi.py
===================================================================
--- monitor/scanapi.py (.../tags/Monitor-3.0-22) (révision 15357)
+++ monitor/scanapi.py (.../trunk) (révision 15357)
@@ -237,6 +237,7 @@
echo ' "kernel_version":"'`uname -a`'",'
echo ' "bmlog":"'`ls /tmp/bm.log`'",'
echo ' "bootcd_version":"'`cat /mnt/cdrom/bootme/ID`'",'
+ echo ' "boot_server":"'`cat /mnt/cdrom/bootme/BOOTSERVER`'",'
echo ' "nm_status":"'`ps ax | grep nm.py | grep -v grep`'",'
echo ' "dns_status":"'`host boot.planet-lab.org 2>&1`'",'
echo ' "iptables_status":"'`iptables -t mangle -nL | awk '$1~/^[A-Z]+$/ {modules[$1]=1;}END{for (k in modules) {if (k) printf "%s ",k;}}'`'",'
@@ -260,6 +261,7 @@
break
else:
values.update({'kernel_version': "", 'bmlog' : "", 'bootcd_version' : '',
+ 'boot_server' : '',
'nm_status' : '',
'fs_status' : '',
'uptime' : '',
Index: automate-default.sh
===================================================================
--- automate-default.sh (.../tags/Monitor-3.0-22) (révision 15357)
+++ automate-default.sh (.../trunk) (révision 15357)
@@ -56,6 +56,7 @@
# if no agent is running, set it up.
ssh-agent > ${MONITOR_SCRIPT_ROOT}/agent.sh
source ${MONITOR_SCRIPT_ROOT}/agent.sh
+ ssh-add /etc/planetlab/myops_ssh_key.rsa
ssh-add /etc/planetlab/debug_ssh_key.rsa
ssh-add /etc/planetlab/root_ssh_key.rsa
fi
%define name monitor
%define version 3.0
-%define taglevel 22
+%define taglevel 23
%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}
%global python_sitearch %( python -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)" )
%changelog
+* Mon Oct 19 2009 Baris Metin <Talip-Baris.Metin@sophia.inria.fr> - Monitor-3.0-23
+- - remove monitor-client.cron
+- - remove unused monitor-client init script
+- - fix UP/DOWN summary on nodes page.
+- - make node page display all nodes by default
+- - add boot_server field
+- - add myops_ssh_key to the keychain
+- - use ext_consortium_id to distinguish pending sites.
+
* Fri Oct 09 2009 Baris Metin <Talip-Baris.Metin@sophia.inria.fr> - Monitor-3.0-22
- show/hide advance query form.
%{zabbix_webdir}
%changelog
+* Mon Oct 19 2009 Baris Metin <Talip-Baris.Metin@sophia.inria.fr> - Monitor-3.0-23
+- - remove monitor-client.cron
+- - remove unused monitor-client init script
+- - fix UP/DOWN summary on nodes page.
+- - make node page display all nodes by default
+- - add boot_server field
+- - add myops_ssh_key to the keychain
+- - use ext_consortium_id to distinguish pending sites.
+
* Fri Oct 09 2009 Baris Metin <Talip-Baris.Metin@sophia.inria.fr> - Monitor-3.0-22
- show/hide advance query form.
%define module_current_branch 2.0
-%define taglevel 22
+%define taglevel 23
%define version 3.0