from TestSsh import TestSsh
from TestMapper import TestMapper
+# too painful to propagate this cleanly
+verbose=None
+
def header (message,banner=True):
if not message: return
if banner: print "===============",
self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname,
dry_run=options.dry_run)
+ def hostname_fedora (self,virt=None):
+ result = "%s {"%self.hostname
+ if virt: result += "%s-"%virt
+ result += "%s"%self.fedora()
+ # too painful to propagate this cleanly
+ global verbose
+ if verbose:
+ result += "-%s" % self.uname()
+ result += "}"
+ return result
+
+ separator = "===composite==="
+
+ # probe the ssh link
+ # take this chance to gather useful stuff
+ def probe (self):
+ # try it only once
+ if self._probed is not None: return self._probed
+ composite_command = [ ]
+ composite_command += [ "hostname" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uptime" ]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "uname", "-r"]
+ composite_command += [ ";" , "echo", Box.separator , ";" ]
+ composite_command += [ "cat" , "/etc/fedora-release" ]
+
+ # due to colons and all, this is going wrong on the local box (typically testmaster)
+ # I am reluctant to change TestSsh as it might break all over the place, so
+ if self.test_ssh().is_local():
+ probe_argv = [ "bash", "-c", " ".join (composite_command) ]
+ else:
+ probe_argv=self.test_ssh().actual_argv(composite_command)
+ composite=self.backquote ( probe_argv, trash_err=True )
+ self._hostname = self._uptime = self._uname = self._fedora = "** Unknown **"
+ if not composite:
+ print "root@%s unreachable"%self.hostname
+ self._probed=''
+ else:
+ try:
+ pieces = composite.split(Box.separator)
+ pieces = [ x.strip() for x in pieces ]
+ [self._hostname, self._uptime, self._uname, self._fedora] = pieces
+ # customize
+ self._uptime = ', '.join([ x.strip() for x in self._uptime.split(',')[2:]])
+ self._fedora = self._fedora.replace("Fedora release ","f").split(" ")[0]
+ except:
+ import traceback
+ print 'BEG issue with pieces',pieces
+ traceback.print_exc()
+ print 'END issue with pieces',pieces
+ self._probed=self._hostname
+ return self._probed
+
+ # use argv=['bash','-c',"the command line"]
def uptime(self):
+ self.probe()
if hasattr(self,'_uptime') and self._uptime: return self._uptime
- return '*undef* uptime'
- def sense_uptime (self):
- command=['uptime']
- self._uptime=self.backquote_ssh(command,trash_err=True).strip()
- if not self._uptime: self._uptime='unreachable'
+ return '*unprobed* uptime'
+ def uname(self):
+ self.probe()
+ if hasattr(self,'_uname') and self._uname: return self._uname
+ return '*unprobed* uname'
+ def fedora(self):
+ self.probe()
+ if hasattr(self,'_fedora') and self._fedora: return self._fedora
+ return '*unprobed* fedora'
def run(self,argv,message=None,trash_err=False,dry_run=False):
if dry_run:
result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
return result
- def probe (self):
- if self._probed is not None: return self._probed
- # first probe the ssh link
- probe_argv=self.test_ssh().actual_argv(['hostname'])
- self._probed=self.backquote ( probe_argv, trash_err=True )
- if not self._probed: print "root@%s unreachable"%self.hostname
- return self._probed
-
- # use argv=['bash','-c',"the command line"]
# if you have any shell-expanded arguments like *
# and if there's any chance the command is adressed to the local host
def backquote_ssh (self, argv, trash_err=False):
def list(self, verbose=False):
if not self.build_instances:
- header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
+ header ('No build process on %s (%s)'%(self.hostname_fedora(),self.uptime()))
else:
- header ("Builds on %s (%s)"%(self.hostname,self.uptime()))
+ header ("Builds on %s (%s)"%(self.hostname_fedora(),self.uptime()))
for b in self.build_instances:
header (b.line(),banner=False)
if not options.soft:
Box.reboot(self,options)
else:
+ self.soft_reboot (options)
+
+class BuildVsBox (BuildBox):
+ def soft_reboot (self, options):
command=['pkill','vbuild']
self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run)
# inspect box and find currently running builds
matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
- matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*init-vserver.*\s+(?P<buildname>[^\s]+)\s*\Z")
+ matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*initvm.*\s+(?P<buildname>[^\s]+)\s*\Z")
def sense(self, options):
- print 'bb',
- self.sense_uptime()
+ print 'vb',
pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True)
if not pids: return
command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
ps_lines=self.backquote_ssh (command).split('\n')
for line in ps_lines:
if not line.strip() or line.find('PID')>=0: continue
- m=BuildBox.matcher.match(line)
+ m=BuildVsBox.matcher.match(line)
if m:
date=time.strftime('%Y-%m-%d',time.localtime(time.time()))
buildname=m.group('buildname').replace('@DATE@',date)
self.add_build (buildname,m.group('pid'))
continue
- m=BuildBox.matcher_building_vm.match(line)
+ m=BuildVsBox.matcher_building_vm.match(line)
if m:
# buildname is expansed here
self.add_build (buildname,m.group('pid'))
continue
- header('BuildBox.sense: command %r returned line that failed to match'%command)
+ header('BuildVsBox.sense: command %r returned line that failed to match'%command)
header(">>%s<<"%line)
+class BuildLxcBox (BuildBox):
+ def soft_reboot (self, options):
+ command=['pkill','lbuild']
+ self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run)
+
+ # inspect box and find currently running builds
+ def sense(self, options):
+ print 'xb (Substrate.BuildLxcBox.sense - NIY)',
+
############################################################
class PlcInstance:
def __init__ (self, plcbox):
for p in self.plc_instances:
header (p.line(),banner=False)
- def get_uname(self):
- self._uname=self.backquote_ssh(['uname','-r']).strip()
-
- # expecting sense () to have filled self._uname
- def uname(self):
- if hasattr(self,'_uname') and self._uname: return self._uname
- return '*undef* uname'
-
+# we do not this at INRIA any more
class PlcVsBox (PlcBox):
def add_vserver (self,vservername,ctxid):
self.plc_instances.append(PlcVsInstance(self,vservername,ctxid))
def line(self):
- msg="%s [max=%d,free=%d, VS-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
+ msg="%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="vs"), self.max_plcs,self.free_slots(),self.uptime())
return msg
def plc_instance_by_vservername (self, vservername):
def sense (self, options):
print 'vp',
- self.get_uname()
# try to find fullname (vserver_stat truncates to a ridiculously short name)
# fetch the contexts for all vservers on that box
map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
# a line describing the box
def line(self):
- msg="%s [max=%d,free=%d, LXC-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
- return msg
+ return "%s [max=%d,free=%d] (%s)"%(self.hostname_fedora(virt="lxc"),
+ self.max_plcs,self.free_slots(),
+ self.uptime(),
+ )
def plc_instance_by_lxcname (self, lxcname):
for p in self.plc_instances:
# sense is expected to fill self.plc_instances with PlcLxcInstance's
# to describe the currently running VM's
- # as well as to call self.get_uname() once
def sense (self, options):
print "xp",
- self.get_uname()
command="rsync lxc-driver.sh %s:/root"%self.hostname
commands.getstatusoutput(command)
command=['/root/lxc-driver.sh','-c','sense_all']
pid=lxc_line.split(";")[1]
timestamp=lxc_line.split(";")[2]
self.add_lxc(lxcname,pid)
- timestamp=int(timestamp)
+ try: timestamp=int(timestamp)
+ except: timestamp=0
p=self.plc_instance_by_lxcname(lxcname)
if not p:
print 'WARNING zombie plc',self.hostname,lxcname
self.qemu_instances.append(dummy)
def line (self):
- msg="%s [max=%d,free=%d] (%s)"%(self.hostname, self.max_qemus,self.free_slots(),self.driver())
- return msg
+ return "%s [max=%d,free=%d] (%s) %s"%(
+ self.hostname_fedora(virt="qemu"), self.max_qemus,self.free_slots(),
+ self.uptime(),self.driver())
def list(self, verbose=False):
if not self.qemu_instances:
- header ('No qemu process on %s'%(self.line()))
+ header ('No qemu on %s'%(self.line()))
else:
- header ("Active qemu processes on %s"%(self.line()))
+ header ("Qemus on %s"%(self.line()))
self.qemu_instances.sort(timestamp_sort)
for q in self.qemu_instances:
header (q.line(),banner=False)
self._driver='kqemu module loaded'
# kvm might be loaded without kvm_intel (we dont have AMD)
elif module.find('kvm_intel')==0:
- self._driver='kvm_intel module loaded'
+ self._driver='kvm_intel OK'
########## find out running pids
pids=self.backquote_ssh(['pgrep','qemu'])
if not pids: return
header(">>%s<<"%line)
########## retrieve alive instances and map to build
live_builds=[]
- command=['grep','.','*/*/qemu.pid','/dev/null']
+ command=['grep','.','/vservers/*/*/qemu.pid','/dev/null']
pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for pid_line in pid_lines:
if not pid_line.strip(): continue
# expect <build>/<nodename>/qemu.pid:<pid>pid
try:
- (buildname,nodename,tail)=pid_line.split('/')
+ (_,__,buildname,nodename,tail)=pid_line.split('/')
(_,pid)=tail.split(':')
q=self.qemu_instance_by_pid (pid)
if not q: continue
# retrieve timestamps
if not live_builds: return
command= ['grep','.']
- command += ['%s/*/timestamp'%b for b in live_builds]
+ command += ['/vservers/%s/*/timestamp'%b for b in live_builds]
command += ['/dev/null']
ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for ts_line in ts_lines:
if not ts_line.strip(): continue
# expect <build>/<nodename>/timestamp:<timestamp>
try:
- (buildname,nodename,tail)=ts_line.split('/')
+ (_,__,buildname,nodename,tail)=ts_line.split('/')
nodename=nodename.replace('qemu-','')
(_,timestamp)=tail.split(':')
timestamp=int(timestamp)
else: msg += " !!!pids=%s!!!"%self.pids
msg += " @%s"%self.pretty_timestamp()
if self.broken_steps:
- msg += " [BROKEN=" + " ".join( [ "%s@%s"%(s,i) for (i,s) in self.broken_steps ] ) + "]"
+ # sometimes we have an empty plcindex
+ msg += " [BROKEN=" + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]"
return msg
class TestBox (Box):
self.test_instances.append(i)
i.set_broken(plcindex, step)
- matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/vservers/(?P<buildname>[^/]+)$")
- matcher_grep=re.compile ("/vservers/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+ matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
+ matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+ matcher_grep_missing=re.compile ("grep: /root/(?P<buildname>[^/]+)/logs/trace: No such file or directory")
def sense (self, options):
print 'tm',
- self.sense_uptime()
self.starting_ips=[x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x]
# scan timestamps on all tests
# this is likely to not invoke ssh so we need to be a bit smarter to get * expanded
# xxx would make sense above too
- command=['bash','-c',"grep . /vservers/*/timestamp /dev/null"]
+ command=['bash','-c',"grep . /root/*/timestamp /dev/null"]
ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for ts_line in ts_lines:
if not ts_line.strip(): continue
- # expect /vservers/<buildname>/timestamp:<timestamp>
+ # expect /root/<buildname>/timestamp:<timestamp>
try:
(ts_file,timestamp)=ts_line.split(':')
ts_file=os.path.dirname(ts_file)
t=self.add_timestamp(buildname,timestamp)
except: print 'WARNING, could not parse ts line',ts_line
- command=['bash','-c',"grep KO /vservers/*/logs/trace-* /dev/null" ]
+ # let's try to be robust here -- tests that fail very early like e.g.
+ # "Cannot make space for a PLC instance: vplc IP pool exhausted", that occurs as part of provision
+ # will result in a 'trace' symlink to an inexisting 'trace-<>.txt' because no step has gone through
+ # simple 'trace' sohuld exist though as it is created by run_log
+ command=['bash','-c',"grep KO /root/*/logs/trace /dev/null 2>&1" ]
trace_lines=self.backquote_ssh (command).split('\n')
for line in trace_lines:
if not line.strip(): continue
+ m=TestBox.matcher_grep_missing.match(line)
+ if m:
+ buildname=m.group('buildname')
+ self.add_broken(buildname,'','NO STEP DONE')
+ continue
m=TestBox.matcher_grep.match(line)
if m:
buildname=m.group('buildname')
def line (self):
- return "%s (%s)"%(self.hostname,self.uptime())
+ return self.hostname_fedora()
def list (self, verbose=False):
# verbose shows all tests
if verbose:
instances = self.test_instances
- msg="knwown tests"
+ msg="tests"
else:
instances = [ i for i in self.test_instances if i.is_running() ]
- msg="known running tests"
+ msg="running tests"
if not instances:
header ("No %s on %s"%(msg,self.line()))
self.options.reboot=False
self.options.soft=False
self.test_box = TestBox (self.test_box_spec())
- self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
- # for compat with older LocalSubstrate
- try:
- self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_vs_boxes_spec ()]
- self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()]
- except:
- self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_boxes_spec ()]
- self.plc_lxc_boxes = [ ]
+ self.build_vs_boxes = [ BuildVsBox(h) for h in self.build_vs_boxes_spec() ]
+ self.build_lxc_boxes = [ BuildLxcBox(h) for h in self.build_lxc_boxes_spec() ]
+ self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_vs_boxes_spec ()]
+ self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()]
self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
self._sensed=False
# which plc boxes are we interested in ?
def rescope (self, plcs_on_vs, plcs_on_lxc):
+ self.build_boxes = self.build_vs_boxes + self.build_lxc_boxes
self.plc_boxes=[]
if plcs_on_vs: self.plc_boxes += self.plc_vs_boxes
if plcs_on_lxc: self.plc_boxes += self.plc_lxc_boxes
#################### show results for interactive mode
def get_box (self,boxname):
for b in self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] :
- if b.shortname()==boxname:
- return b
+ if b.shortname()==boxname: return b
+ try:
+ if b.shortname()==boxname.split('.')[0]: return b
+ except: pass
print "Could not find box %s"%boxname
return None
if self.options.qemus: boxes += self.qemu_boxes
if self.options.all: boxes += self.all_boxes
+ global verbose
+ verbose=self.options.verbose
# default scope is -b -p -q -t
if not boxes:
boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box]