#!/usr/bin/python
+import os.path, sys
+import re
import subprocess
from optparse import OptionParser
class BuildBoxes:
# everything in the onelab.eu domain
- domain = 'onelab.eu'
+ domain = 'pl.sophia.inria.fr'
testmaster = 'testmaster'
- build_boxes = \
- [ "mirror" ] + \
- [ "build%d"%i for i in range (1,4) ]
+ build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
plc_boxes = [ "testplc" ]
qemu_boxes = \
- [ "testqemu%d"%i for i in range (1,4) ] + \
- [ "testqemu32-%d"%i for i in range (1,6) ]
+ [ "qemu64-%d"%i for i in range (1,4) ] + \
+ [ "qemu32-%d"%i for i in range (1,6) ]
test_boxes = plc_boxes + qemu_boxes
+ testmaster_boxes = [ testmaster ]
def __init__ (self):
# dummy defaults
self.boxes = []
- self.do_tracker = False
+ self.do_tracker_qemus = False
+ self.do_tracker_plcs = False
def fqdn (self, box):
return "%s.%s"%(box,self.domain)
+
+ ssh_command=['ssh','-o','ConnectTimeout=3']
@staticmethod
def root (box): return "root@%s"%box
- def run (self,argv,message):
+ @staticmethod
+ def ssh(box):
+ return BuildBoxes.ssh_command + [ BuildBoxes.root(box) ]
+
+ def header (self,message):
+ print "===============",message
+ sys.stdout.flush()
+
+ def run (self,argv,message, trash_err=False):
if self.options.dry_run:
print 'DRY_RUN:',
print " ".join(argv)
+ return 0
else:
- if message: print "===============",message
- subprocess.call(argv)
+ if message: self.header(message)
+ if not trash_err:
+ return subprocess.call(argv)
+ else:
+ return subprocess.call(argv,stderr=file('/dev/null','w'))
- def backquote (self, argv):
- return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
+ def run_ssh (self, box, argv, message, trash_err=False):
+ result=self.run (self.ssh(box) + argv, message, trash_err)
+ if result!=0:
+ print "WARNING: failed to run %s on %s"%(" ".join(argv),box)
+ return result
+
+ def backquote (self, argv, trash_err=False):
+ if not trash_err:
+ return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
+ else:
+ return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
+
+ def backquote_ssh (self, box, argv, trash_err=False):
+ # first probe the ssh link
+ hostname=self.backquote ( self.ssh(box) + [ "hostname"], trash_err=True )
+ if not hostname:
+ print "%s unreachable"%self.root(box)
+ return ''
+ else:
+ return self.backquote( ['ssh',self.root(box)] + argv, trash_err)
def reboot (self,box):
command=['ssh',self.root(box),'shutdown','-r','now']
self.run (command,"Rebooting %s"%box)
- def handle_trackers (self):
+ def handle_tracker_plcs (self):
+ box = self.fqdn (self.testmaster)
+ filename="tracker-plcs"
+ if not self.options.probe:
+ command=["rm","-rf",filename]
+ self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
+ else:
+ self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
+ read_command = ["cat",filename]
+ trackers=self.backquote_ssh(box,read_command)
+ for tracker in trackers.split('\n'):
+ if not tracker: continue
+ try:
+ tracker=tracker.strip()
+ [hostname,buildname]=tracker.split('@')
+ [left,plcname]=buildname.rsplit('-',1)
+ print self.margin_outline(plcname),tracker
+ except:
+ print self.margin(""),tracker
+
+ def handle_tracker_qemus (self):
box = self.fqdn (self.testmaster)
- if self.options.probe:
- command=['ssh',self.root(box),"head","-100","'trackers*'"]
- self.run(command,"========== Inspecting trackers on %s"%box)
+ filename="tracker-qemus"
+ if not self.options.probe:
+ command=["rm","-rf",filename]
+ self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
else:
- command=['ssh',self.root(box),"rm","-rf","'trackers*'"]
- self.run(command,"========== Cleaning up trackers on %s"%box)
+ self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
+ read_command = ["cat",filename]
+ trackers=self.backquote_ssh(box,read_command)
+ for tracker in trackers.split('\n'):
+ if not tracker: continue
+ try:
+ tracker=tracker.strip()
+ [hostname,buildname,nodename]=tracker.split('@')
+ nodename=nodename.split('.')[0]
+ print self.margin_outline(nodename),tracker
+ except:
+ print self.margin(""),tracker
def handle_build_box (self,box):
- if self.options.probe:
- command=['ssh',self.root(box),'pgrep','build']
+ if not self.options.probe:
+ self.reboot(box)
+ else:
+ command=['uptime']
+ uptime=self.backquote_ssh(box,command,True).strip()
+
+ command=['pgrep','build']
if self.options.dry_run:
- self.run(command,None)
+ self.run_ssh(box,command,None)
else:
- pids=self.backquote(command)
+ pids=self.backquote_ssh(box,command,True)
if not pids:
- print 'No build process on',box
+ self.header ('No build process on %s (%s)'%(box,uptime))
else:
- command=['ssh',self.root(box),'ps'] + [ pid for pid in pids.split("\n") if pid]
- self.run(command,"Active build processes on %s"%box)
- else:
- self.reboot(box)
+ command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+ self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True)
+
+ vplc_matcher = re.compile(".*(vplc[0-9]+$)")
+ def vplcname (self, vservername):
+ match = self.vplc_matcher.match(vservername)
+ if match: return match.groups(0)
+ else: return ""
+
+ margin_format="%-14s"
+ def margin(self,string): return self.margin_format%string
+ def outline (self, string): return '== %s =='%string
+ def margin_outline (self, string): return self.margin(self.outline(string))
def handle_plc_box (self,box):
- if self.options.probe:
- command=['ssh',self.root(box),'vserver-stat']
- self.run(command,"Active vservers on %s"%box)
- else:
+ if not self.options.probe:
self.reboot(box)
+ else:
+ command=['vserver-stat']
+ if self.options.dry_run:
+ self.run_ssh(box,command,"Active vservers on %s"%box)
+ else:
+ # try to find fullname (vserver_stat truncates to a ridiculously short name)
+ try:
+ self.header ("vserver map on %s"%box)
+ # fetch the contexts for all vservers on that box
+ map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
+ context_map=self.backquote_ssh (box,map_command)
+ # at this point we have a set of lines like
+ # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
+ ctx_dict={}
+ for map_line in context_map.split("\n"):
+ if not map_line: continue
+ [path,xid] = map_line.split(':')
+ ctx_dict[xid]=os.path.basename(os.path.dirname(path))
+ # at this point ctx_id maps context id to vservername
+
+ vserver_stat = self.backquote_ssh (box,command)
+ for vserver_line in vserver_stat.split("\n"):
+ if not vserver_line: continue
+ context=vserver_line.split()[0]
+ if context=="CTX":
+ print self.margin(""),vserver_line
+ continue
+ longname=ctx_dict[context]
+ print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
+ except:
+ self.run_ssh(box,command,"Fine-grained method failed - fallback to plain vserver-stat")
+
+ vnode_matcher = re.compile(".*(vnode[0-9]+)")
+ def vnodename (self, ps_line):
+ match = self.vnode_matcher.match(ps_line)
+ if match: return match.groups(0)
+ else: return ""
def handle_qemu_box (self,box):
- if self.options.probe:
- command=['ssh',self.root(box),'pgrep','qemu']
+ if not self.options.probe:
+ self.reboot(box)
+ else:
+ command=['lsmod']
+ modules=self.backquote_ssh(box,command).split('\n')
+ kqemu_msg='*NO kqemu MODULE LOADED*'
+ for module in modules:
+ if module.find('kqemu')==0:
+ kqemu_msg='kqemu OK'
+
+ command=['pgrep','qemu']
if self.options.dry_run:
- self.run(command,None)
+ self.run_ssh(box,command,None)
else:
- pids=self.backquote(command)
+ pids=self.backquote_ssh(box,command)
if not pids:
- print 'No qemu process on',box
+ self.header ('No qemu process on %s'%box)
else:
- command=['ssh',self.root(box),'ps'] + [ pid for pid in pids.split("\n") if pid]
- self.run(command,"Active qemu processes on %s"%box)
- else:
- self.reboot(box)
+ self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
+ command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+ ps_lines = self.backquote_ssh (box,command).split("\n")
+ for ps_line in ps_lines:
+ if not ps_line or ps_line.find('PID') >=0 : continue
+ print self.margin_outline(self.vnodename(ps_line)), ps_line
+
+ # the ouput of ps -o pid,command gives us <pid> bash <buildname>/run_log
+ def testmaster_buildname (self, ps_line):
+ chunks=ps_line.split()
+ path=chunks[2]
+ [buildname,command]=path.split('/')
+ return buildname
+
+ def handle_testmaster_box (self, box):
+ command=['pgrep','run_log']
+ if self.options.dry_run:
+ self.run_ssh(box,command,None)
+ else:
+ pids=self.backquote_ssh(box,command)
+ if not pids:
+ self.header ('No run_log process on %s'%box)
+ else:
+ self.header ("Active run_log processes on %s"%(box))
+ command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+ ps_lines = self.backquote_ssh (box,command).split("\n")
+ for ps_line in ps_lines:
+ if not ps_line or ps_line.find('PID') >=0 : continue
+ print self.margin_outline(self.testmaster_buildname(ps_line)), ps_line
+
- def handle_box(self,box):
+ def handle_box(self,box,type):
if box in self.qemu_boxes:
- self.handle_qemu_box(self.fqdn(box))
+ if type=="qemu": self.handle_qemu_box(self.fqdn(box))
elif box in self.plc_boxes:
- self.handle_plc_box(self.fqdn(box))
- else:
+ if type=="plc": self.handle_plc_box(self.fqdn(box))
+ elif box in self.testmaster_boxes:
+ if type=='testmaster': self.handle_testmaster_box(self.fqdn(box))
+ elif type=="build":
self.handle_build_box(self.fqdn(box))
+ def handle_disk (self,box):
+ box=self.fqdn(box)
+ return self.run_ssh(box,["df","-h",],"Disk space on %s"%box)
+
def main (self):
usage="""%prog [options] [hostname..(s)]
-Default is to act on test boxes only (with trackers clean)"""
+Default is to act on test boxes only"""
parser = OptionParser (usage=usage)
+ parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
+ help="Dry run")
+ parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
+ help="Actually reset/reboot stuff instead of just probing it")
+ # no need for -p = probe, as this is the default
+ parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
+ help="Acts on the plc box only")
+
parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
help="Acts on build and test boxes")
parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
help="Acts on build boxes only")
+ parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
+ help="Only acts on the qemu boxes")
parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
help="Only wipes trackers")
- parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
- help="Dry run")
- parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
- help="Actually reset/reboot stuff instead of just probing it")
- parser.add_option ("-p","--probe", action="store_true",dest="probe",
- help="Probe stuff, no side effect")
+ parser.add_option ("-m","--master",action="store_true",dest="testmaster_only",default=False,
+ help="Display the testmaster status")
+ parser.add_option ("-d","--disk",action="store_true",dest="show_disk",default=False,
+ help="Only inspects disk status")
(self.options,args) = parser.parse_args()
if args:
self.boxes=args
# if hostnames are specified, let's stay on the safe side and don't reset trackers
- self.do_tracker = False
+ self.do_tracker_plcs = False
+ self.do_tracker_qemus = False
elif self.options.all_boxes:
- self.boxes=self.build_boxes + self.test_boxes
- self.do_tracker = True
+ self.boxes=self.test_boxes + self.build_boxes
+ self.do_tracker_plcs = True
+ self.do_tracker_qemus = True
elif self.options.build_only:
self.boxes=self.build_boxes
- self.do_tracker = False
+ self.do_tracker_plcs = False
+ self.do_tracker_qemus = False
+ elif self.options.qemu_only:
+ self.boxes=self.qemu_boxes
+ self.do_tracker_plcs = False
+ self.do_tracker_qemus = True
+ elif self.options.plc_only:
+ self.boxes=self.plc_boxes
+ self.do_tracker_plcs = True
+ self.do_tracker_qemus = False
+ elif self.options.testmaster_only:
+ self.boxes=self.testmaster_boxes
+ self.do_tracker_plcs = False
+ self.do_tracker_qemus = False
elif self.options.trackers_only:
self.boxes = []
- self.do_tracker = True
+ self.do_tracker_plcs = True
+ self.do_tracker_qemus = True
# default
else:
self.boxes = self.test_boxes
- self.do_tracker = True
+ self.do_tracker_plcs = True
+ self.do_tracker_qemus = True
- if self.do_tracker:
- self.handle_trackers ()
- for box in self.boxes:
- self.handle_box (box)
+ if self.options.show_disk:
+ for box in self.boxes: self.handle_disk(box)
+ return
+ # ALL OTHERS
+ for box in self.boxes: self.handle_box (box,"build")
+ # TESTMASTER
+ for box in self.boxes: self.handle_box (box,"testmaster")
+ # PLCS
+ if self.do_tracker_plcs:self.handle_tracker_plcs ()
+ for box in self.boxes: self.handle_box (box,"plc")
+ # QEMU
+ if self.do_tracker_qemus:self.handle_tracker_qemus ()
+ for box in self.boxes: self.handle_box (box,"qemu")
if __name__ == "__main__":
BuildBoxes().main()