# where to send notifications upon load_starting
self.substrate=substrate
- def list (self):
+ def list (self, verbose=False):
for i in self.pool_items: print i.line()
def line (self):
return
self.build_instances.append(BuildInstance(buildname, pid, self))
- def list(self):
+ def list(self, verbose=False):
if not self.build_instances:
header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
else:
# inspect box and find currently running builds
matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
- matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*init-vserver.*-i\s+eth.\s+(?P<buildname>[^\s]+)\s*\Z")
+ matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*init-vserver.*\s+(?P<buildname>[^\s]+)\s*\Z")
def sense(self, options):
print 'bb',
self.sense_uptime()
class PlcLxcInstance (PlcInstance):
# does lxc have a context id of any kind ?
- def __init__ (self, plcbox, lxcname):
+ def __init__ (self, plcbox, lxcname, pid):
PlcInstance.__init__(self, plcbox)
self.lxcname = lxcname
+ self.pid = pid
- def kill (self):
- print "TODO lxc PlcLxcInstance.kill ..."
+ def vplcname (self):
+ return self.lxcname.split('-')[-1]
+ def buildname (self):
+ return self.lxcname.rsplit('-',2)[0]
def line (self):
- return "TODO lxc PlcLxcInstance.line with lxcname=%s"%(self.lxcname)
+ msg="== %s =="%(self.vplcname())
+ msg += " [=%s]"%self.lxcname
+ if self.pid==-1: msg+=" not (yet?) running"
+ else: msg+=" (pid=%s)"%self.pid
+ if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
+ else: msg += " *unknown timestamp*"
+ return msg
+
+ def kill (self):
+ command="rsync lxc-driver.sh %s:/root"%self.plc_box.hostname
+ commands.getstatusoutput(command)
+ msg="lxc container stopping %s on %s"%(self.lxcname,self.plc_box.hostname)
+ self.plc_box.run_ssh(['/root/lxc-driver.sh','-c','stop_lxc','-n',self.lxcname],msg)
+ self.plc_box.forget(self)
##########
class PlcBox (Box):
def reboot (self, options):
if not options.soft:
- self.reboot(options)
+ Box.reboot(self,options)
else:
self.soft_reboot (options)
- def list(self):
+ def list(self, verbose=False):
if not self.plc_instances:
header ('No plc running on %s'%(self.line()))
else:
self.plc_instances.append(PlcVsInstance(self,vservername,ctxid))
def line(self):
- msg="%s [max=%d,%d free, VS-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
+ msg="%s [max=%d,free=%d, VS-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
return msg
def plc_instance_by_vservername (self, vservername):
return None
def soft_reboot (self, options):
- self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers",
+ self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers on %s"%(self.hostname,),
dry_run=options.dry_run)
def sense (self, options):
class PlcLxcBox (PlcBox):
+ def add_lxc (self,lxcname,pid):
+ for plc in self.plc_instances:
+ if plc.lxcname==lxcname:
+ header("WARNING, duplicate myplc %s running on %s"%\
+ (lxcname,self.hostname),banner=False)
+ return
+ self.plc_instances.append(PlcLxcInstance(self,lxcname,pid))
+
+
# a line describing the box
def line(self):
- msg="%s [max=%d,%d free, LXC-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
+ msg="%s [max=%d,free=%d, LXC-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
return msg
-
+
+ def plc_instance_by_lxcname (self, lxcname):
+ for p in self.plc_instances:
+ if p.lxcname==lxcname: return p
+ return None
+
# essentially shutdown all running containers
def soft_reboot (self, options):
- print "TODO lxc PlcLxcBox.soft_reboot"
+ command="rsync lxc-driver.sh %s:/root"%self.hostname
+ commands.getstatusoutput(command)
+ self.run_ssh(['/root/lxc-driver.sh','-c','stop_all'],"Stopping all running lxc containers on %s"%(self.hostname,),
+ dry_run=options.dry_run)
+
# sense is expected to fill self.plc_instances with PlcLxcInstance's
# to describe the currently running VM's
# as well as to call self.get_uname() once
def sense (self, options):
- print "xp (todo:PlcLxcBox.sense)",
+ print "xp",
self.get_uname()
-
+ command="rsync lxc-driver.sh %s:/root"%self.hostname
+ commands.getstatusoutput(command)
+ command=['/root/lxc-driver.sh','-c','sense_all']
+ lxc_stat = self.backquote_ssh (command)
+ for lxc_line in lxc_stat.split("\n"):
+ if not lxc_line: continue
+ lxcname=lxc_line.split(";")[0]
+ pid=lxc_line.split(";")[1]
+ timestamp=lxc_line.split(";")[2]
+ self.add_lxc(lxcname,pid)
+ timestamp=int(timestamp)
+ p=self.plc_instance_by_lxcname(lxcname)
+ if not p:
+ print 'WARNING zombie plc',self.hostname,lxcname
+ print '... was expecting',lxcname,'in',[i.lxcname for i in self.plc_instances]
+ continue
+ p.set_timestamp(timestamp)
############################################################
class QemuInstance:
self.qemu_instances.append(dummy)
def line (self):
- msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_slots(),self.driver())
+ msg="%s [max=%d,free=%d] (%s)"%(self.hostname, self.max_qemus,self.free_slots(),self.driver())
return msg
- def list(self):
+ def list(self, verbose=False):
if not self.qemu_instances:
header ('No qemu process on %s'%(self.line()))
else:
def reboot (self, options):
if not options.soft:
- self.reboot(options)
+ Box.reboot(self,options)
else:
self.run_ssh(['pkill','qemu'],"Killing qemu instances",
dry_run=options.dry_run)
def sense(self, options):
print 'qn',
modules=self.backquote_ssh(['lsmod']).split('\n')
- self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+ self._driver='*NO kqemu/kvm_intel MODULE LOADED*'
for module in modules:
if module.find('kqemu')==0:
self._driver='kqemu module loaded'
- # kvm might be loaded without vkm_intel (we dont have AMD)
+ # kvm might be loaded without kvm_intel (we dont have AMD)
elif module.find('kvm_intel')==0:
self._driver='kvm_intel module loaded'
########## find out running pids
header(">>%s<<"%line)
########## retrieve alive instances and map to build
live_builds=[]
- command=['grep','.','*/*/qemu.pid','/dev/null']
+ command=['grep','.','/vservers/*/*/qemu.pid','/dev/null']
pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for pid_line in pid_lines:
if not pid_line.strip(): continue
# expect <build>/<nodename>/qemu.pid:<pid>pid
try:
- (buildname,nodename,tail)=pid_line.split('/')
+ (_,__,buildname,nodename,tail)=pid_line.split('/')
(_,pid)=tail.split(':')
q=self.qemu_instance_by_pid (pid)
if not q: continue
# retrieve timestamps
if not live_builds: return
command= ['grep','.']
- command += ['%s/*/timestamp'%b for b in live_builds]
+ command += ['/vservers/%s/*/timestamp'%b for b in live_builds]
command += ['/dev/null']
ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
for ts_line in ts_lines:
if not ts_line.strip(): continue
# expect <build>/<nodename>/timestamp:<timestamp>
try:
- (buildname,nodename,tail)=ts_line.split('/')
+ (_,__,buildname,nodename,tail)=ts_line.split('/')
nodename=nodename.replace('qemu-','')
(_,timestamp)=tail.split(':')
timestamp=int(timestamp)
def set_now (self): self.timestamp=int(time.time())
def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+ def is_running (self): return len(self.pids) != 0
def add_pid (self,pid):
self.pids.append(pid)
else: msg += " !!!pids=%s!!!"%self.pids
msg += " @%s"%self.pretty_timestamp()
if self.broken_steps:
- msg += " [BROKEN=" + " ".join( [ "%s@%s"%(s,i) for (i,s) in self.broken_steps ] ) + "]"
+ # sometimes we have an empty plcindex
+ msg += " [BROKEN=" + " ".join( [ "%s@%s"%(s,i) if i else s for (i,s) in self.broken_steps ] ) + "]"
return msg
class TestBox (Box):
matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+ matcher_grep_missing=re.compile ("grep: /root/(?P<buildname>[^/]+)/logs/trace: No such file or directory")
def sense (self, options):
print 'tm',
self.sense_uptime()
t=self.add_timestamp(buildname,timestamp)
except: print 'WARNING, could not parse ts line',ts_line
- command=['bash','-c',"grep KO /root/*/logs/trace-* /dev/null" ]
+ # let's try to be robust here -- tests that fail very early like e.g.
+ # "Cannot make space for a PLC instance: vplc IP pool exhausted", that occurs as part of provision
+ # will result in a 'trace' symlink to an inexisting 'trace-<>.txt' because no step has gone through
+ # simple 'trace' sohuld exist though as it is created by run_log
+ command=['bash','-c',"grep KO /root/*/logs/trace /dev/null 2>&1" ]
trace_lines=self.backquote_ssh (command).split('\n')
for line in trace_lines:
if not line.strip(): continue
+ m=TestBox.matcher_grep_missing.match(line)
+ if m:
+ buildname=m.group('buildname')
+ self.add_broken(buildname,'','NO STEP DONE')
+ continue
m=TestBox.matcher_grep.match(line)
if m:
buildname=m.group('buildname')
def line (self):
return "%s (%s)"%(self.hostname,self.uptime())
- def list (self):
- if not self.test_instances:
- header ("No known tests on %s"%self.line())
+ def list (self, verbose=False):
+ # verbose shows all tests
+ if verbose:
+ instances = self.test_instances
+ msg="knwown tests"
+ else:
+ instances = [ i for i in self.test_instances if i.is_running() ]
+ msg="known running tests"
+
+ if not instances:
+ header ("No %s on %s"%(msg,self.line()))
else:
- header ("Known tests on %s"%self.line())
- self.test_instances.sort(timestamp_sort)
- for i in self.test_instances: print i.line()
+ header ("%s on %s"%(msg,self.line()))
+ instances.sort(timestamp_sort)
+ for i in instances: print i.line()
+ # show 'starting' regardless of verbose
if self.starting_ips:
header ("Starting IP addresses on %s"%self.line())
self.starting_ips.sort()
for starting in self.starting_ips: print starting
+ else:
+ header ("Empty 'starting' on %s"%self.line())
############################################################
class Options: pass
self.rescope (plcs_on_vs=plcs_on_vs, plcs_on_lxc=plcs_on_lxc)
- def rescope(self, plcs_on_vs, plcs_on_lxc):
+ # which plc boxes are we interested in ?
+ def rescope (self, plcs_on_vs, plcs_on_lxc):
self.plc_boxes=[]
if plcs_on_vs: self.plc_boxes += self.plc_vs_boxes
if plcs_on_lxc: self.plc_boxes += self.plc_lxc_boxes
self.default_boxes = self.plc_boxes + self.qemu_boxes
self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
+ def summary_line (self):
+ msg = "["
+ msg += " %d vp"%len(self.plc_vs_boxes)
+ msg += " %d xp"%len(self.plc_lxc_boxes)
+ msg += " %d tried plc boxes"%len(self.plc_boxes)
+ msg += "]"
+ return msg
+
def fqdn (self, hostname):
if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
return hostname
self._sensed=True
return True
- def list (self):
+ def list (self, verbose=False):
for b in self.default_boxes:
b.list()
except:
msg=""
if not plc_boxname: msg += " PLC boxes are full"
- if not vplc_hostname: msg += " vplc IP pool exhausted"
- raise Exception,"Could not make space for a PLC instance:"+msg
+ if not vplc_hostname: msg += " vplc IP pool exhausted"
+ msg += " %s"%self.summary_line()
+ raise Exception,"Cannot make space for a PLC instance:"+msg
freed_plc_boxname=plc_instance_to_kill.plc_box.hostname
freed_vplc_hostname=plc_instance_to_kill.vplcname()
message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(),
msg=""
if not qemu_boxname: msg += " QEMU boxes are full"
if not vnode_hostname: msg += " vnode IP pool exhausted"
- raise Exception,"Could not make space for a QEMU instance:"+msg
+ msg += " %s"%self.summary_line()
+ raise Exception,"Cannot make space for a QEMU instance:"+msg
freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname
freed_vnode_hostname=short_hostname(qemu_instance_to_kill.nodename)
# kill it
nodemap={'host_box':qemu_boxname,
'node_fields:hostname':vnode_fqdn,
'interface_fields:ip':ip,
+ 'ipaddress_fields:ip_addr':ip,
'interface_fields:mac':mac,
}
nodemap.update(self.network_settings())
for box in box_or_names:
if not isinstance(box,Box): box=self.get_box(box)
if not box: continue
- box.list()
+ box.list(self.options.verbose)
def reboot_boxes(self,box_or_names):
for box in box_or_names:
box.reboot(self.options)
####################
- # can be run as a utility to manage the local infrastructure
+ # can be run as a utility to probe/display/manage the local infrastructure
def main (self):
parser=OptionParser()
parser.add_option ('-r',"--reboot",action='store_true',dest='reboot',default=False,
if self.options.qemus: boxes += self.qemu_boxes
if self.options.all: boxes += self.all_boxes
- # default scope is -b -p -q
+ # default scope is -b -p -q -t
if not boxes:
- boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
+ boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box]
if self.options.reboot: self.reboot_boxes (boxes)
else: self.list_boxes (boxes)