fix
[tests.git] / system / Substrate.py
index e912133..53b0c89 100644 (file)
@@ -150,7 +150,7 @@ class Pool:
         # where to send notifications upon load_starting
         self.substrate=substrate
 
-    def list (self):
+    def list (self, verbose=False):
         for i in self.pool_items: print i.line()
 
     def line (self):
@@ -336,7 +336,7 @@ class BuildBox (Box):
                 return
         self.build_instances.append(BuildInstance(buildname, pid, self))
 
-    def list(self):
+    def list(self, verbose=False):
         if not self.build_instances: 
             header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
         else:
@@ -346,16 +346,16 @@ class BuildBox (Box):
 
     def reboot (self, options):
         if not options.soft:
-            self.reboot(options)
+            Box.reboot(self,options)
         else:
             command=['pkill','vbuild']
             self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run)
 
     # inspect box and find currently running builds
     matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
-    matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*init-vserver.*-i\s+eth.\s+(?P<buildname>[^\s]+)\s*\Z")
+    matcher_building_vm=re.compile("\s*(?P<pid>[0-9]+).*init-vserver.*\s+(?P<buildname>[^\s]+)\s*\Z")
     def sense(self, options):
-        print 'b',
+        print 'bb',
         self.sense_uptime()
         pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True)
         if not pids: return
@@ -415,15 +415,31 @@ class PlcVsInstance (PlcInstance):
 
 class PlcLxcInstance (PlcInstance):
     # does lxc have a context id of any kind ?
-    def __init__ (self, plcbox, lxcname):
+    def __init__ (self, plcbox, lxcname, pid):
         PlcInstance.__init__(self, plcbox)
         self.lxcname = lxcname
+       self.pid = pid
 
-    def kill (self):
-        print "TODO PlcLxcInstance.kill"
+    def vplcname (self):
+        return self.lxcname.split('-')[-1]
+    def buildname (self):
+        return self.lxcname.rsplit('-',2)[0]
 
     def line (self):
-        return "TODO PlcLxcInstance.line"
+        msg="== %s =="%(self.vplcname())
+        msg += " [=%s]"%self.lxcname
+        if self.pid==-1:  msg+=" not (yet?) running"
+        else:              msg+=" (pid=%s)"%self.pid
+        if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
+        else:              msg += " *unknown timestamp*"
+        return msg
+
+    def kill (self):
+        command="rsync lxc-driver.sh  %s:/root"%self.plc_box.hostname
+       commands.getstatusoutput(command)
+       msg="lxc container stopping %s on %s"%(self.lxcname,self.plc_box.hostname)
+       self.plc_box.run_ssh(['/root/lxc-driver.sh','-c','stop_lxc','-n',self.lxcname],msg)
+        self.plc_box.forget(self)
 
 ##########
 class PlcBox (Box):
@@ -432,18 +448,8 @@ class PlcBox (Box):
         self.plc_instances=[]
         self.max_plcs=max_plcs
 
-class PlcVsBox (PlcBox):
-
-    def add_vserver (self,vservername,ctxid):
-        for plc in self.plc_instances:
-            if plc.vservername==vservername: 
-                header("WARNING, duplicate myplc %s running on %s"%\
-                           (vservername,self.hostname),banner=False)
-                return
-        self.plc_instances.append(PlcVsInstance(self,vservername,ctxid))
-    
-    def forget (self, plc_instance):
-        self.plc_instances.remove(plc_instance)
+    def free_slots (self):
+        return self.max_plcs - len(self.plc_instances)
 
     # fill one slot even though this one is not started yet
     def add_dummy (self, plcname):
@@ -451,41 +457,58 @@ class PlcVsBox (PlcBox):
         dummy.set_now()
         self.plc_instances.append(dummy)
 
-    def line(self): 
-        msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
-        return msg
-        
-    def list(self):
+    def forget (self, plc_instance):
+        self.plc_instances.remove(plc_instance)
+
+    def reboot (self, options):
+        if not options.soft:
+            Box.reboot(self,options)
+        else:
+            self.soft_reboot (options)
+
+    def list(self, verbose=False):
         if not self.plc_instances: 
-            header ('No vserver running on %s'%(self.line()))
+            header ('No plc running on %s'%(self.line()))
         else:
             header ("Active plc VMs on %s"%self.line())
             self.plc_instances.sort(timestamp_sort)
             for p in self.plc_instances: 
                 header (p.line(),banner=False)
 
-    def free_slots (self):
-        return self.max_plcs - len(self.plc_instances)
+    def get_uname(self):
+        self._uname=self.backquote_ssh(['uname','-r']).strip()
 
+    # expecting sense () to have filled self._uname
     def uname(self):
         if hasattr(self,'_uname') and self._uname: return self._uname
         return '*undef* uname'
 
+class PlcVsBox (PlcBox):
+
+    def add_vserver (self,vservername,ctxid):
+        for plc in self.plc_instances:
+            if plc.vservername==vservername: 
+                header("WARNING, duplicate myplc %s running on %s"%\
+                           (vservername,self.hostname),banner=False)
+                return
+        self.plc_instances.append(PlcVsInstance(self,vservername,ctxid))
+    
+    def line(self): 
+        msg="%s [max=%d,free=%d, VS-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
+        return msg
+        
     def plc_instance_by_vservername (self, vservername):
         for p in self.plc_instances:
             if p.vservername==vservername: return p
         return None
 
-    def reboot (self, options):
-        if not options.soft:
-            self.reboot(options)
-        else:
-            self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers",
-                         dry_run=options.dry_run)
+    def soft_reboot (self, options):
+        self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers on %s"%(self.hostname,),
+                     dry_run=options.dry_run)
 
     def sense (self, options):
-        print 'p',
-        self._uname=self.backquote_ssh(['uname','-r']).strip()
+        print 'vp',
+        self.get_uname()
         # try to find fullname (vserver_stat truncates to a ridiculously short name)
         # fetch the contexts for all vservers on that box
         map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
@@ -536,21 +559,56 @@ class PlcVsBox (PlcBox):
 
 class PlcLxcBox (PlcBox):
 
-    def add_dummy (self, plcname):
-        print "TODO PlcLxcBox.add_dummy"
+    def add_lxc (self,lxcname,pid):
+        for plc in self.plc_instances:
+            if plc.lxcname==lxcname:
+                header("WARNING, duplicate myplc %s running on %s"%\
+                           (lxcname,self.hostname),banner=False)
+                return
+        self.plc_instances.append(PlcLxcInstance(self,lxcname,pid))    
 
-    def free_slots (self):
-        print "TODO PlcLxcBox.free_slots"
 
-    def list (self):
-        print "TODO PlcLxcBox.list"
+    # a line describing the box
+    def line(self): 
+        msg="%s [max=%d,free=%d, LXC-based] (%s)"%(self.hostname, self.max_plcs,self.free_slots(),self.uname())
+        return msg
+    
+    def plc_instance_by_lxcname (self, lxcname):
+        for p in self.plc_instances:
+            if p.lxcname==lxcname: return p
+        return None
+    
+    # essentially shutdown all running containers
+    def soft_reboot (self, options):
+        command="rsync lxc-driver.sh  %s:/root"%self.hostname
+        commands.getstatusoutput(command)
+       self.run_ssh(['/root/lxc-driver.sh','-c','stop_all'],"Stopping all running lxc containers on %s"%(self.hostname,),
+                     dry_run=options.dry_run)
 
-    def reboot (self, options):
-        print "TODO PlcLxcBox.reboot"
 
+    # sense is expected to fill self.plc_instances with PlcLxcInstance's 
+    # to describe the currently running VM's
+    # as well as to call  self.get_uname() once
     def sense (self, options):
-        print "TODO PlcLxcBox.sense"
-
+        print "xp",
+        self.get_uname()
+       command="rsync lxc-driver.sh  %s:/root"%self.hostname
+        commands.getstatusoutput(command)
+       command=['/root/lxc-driver.sh','-c','sense_all']
+        lxc_stat = self.backquote_ssh (command)
+       for lxc_line in lxc_stat.split("\n"):
+            if not lxc_line: continue
+            lxcname=lxc_line.split(";")[0]
+           pid=lxc_line.split(";")[1]
+           timestamp=lxc_line.split(";")[2]
+            self.add_lxc(lxcname,pid)
+            timestamp=int(timestamp)
+            p=self.plc_instance_by_lxcname(lxcname)
+            if not p:
+                print 'WARNING zombie plc',self.hostname,lxcname
+                print '... was expecting',lxcname,'in',[i.lxcname for i in self.plc_instances]
+                continue
+            p.set_timestamp(timestamp)
 
 ############################################################
 class QemuInstance: 
@@ -609,10 +667,10 @@ class QemuBox (Box):
         self.qemu_instances.append(dummy)
 
     def line (self):
-        msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_slots(),self.driver())
+        msg="%s [max=%d,free=%d] (%s)"%(self.hostname, self.max_qemus,self.free_slots(),self.driver())
         return msg
 
-    def list(self):
+    def list(self, verbose=False):
         if not self.qemu_instances: 
             header ('No qemu process on %s'%(self.line()))
         else:
@@ -641,20 +699,20 @@ class QemuBox (Box):
 
     def reboot (self, options):
         if not options.soft:
-            self.reboot(options)
+            Box.reboot(self,options)
         else:
             self.run_ssh(['pkill','qemu'],"Killing qemu instances",
                          dry_run=options.dry_run)
 
     matcher=re.compile("\s*(?P<pid>[0-9]+).*-cdrom\s+(?P<nodename>[^\s]+)\.iso")
     def sense(self, options):
-        print 'q',
+        print 'qn',
         modules=self.backquote_ssh(['lsmod']).split('\n')
-        self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+        self._driver='*NO kqemu/kvm_intel MODULE LOADED*'
         for module in modules:
             if module.find('kqemu')==0:
                 self._driver='kqemu module loaded'
-            # kvm might be loaded without vkm_intel (we dont have AMD)
+            # kvm might be loaded without kvm_intel (we dont have AMD)
             elif module.find('kvm_intel')==0:
                 self._driver='kvm_intel module loaded'
         ########## find out running pids
@@ -672,13 +730,13 @@ class QemuBox (Box):
             header(">>%s<<"%line)
         ########## retrieve alive instances and map to build
         live_builds=[]
-        command=['grep','.','*/*/qemu.pid','/dev/null']
+        command=['grep','.','/vservers/*/*/qemu.pid','/dev/null']
         pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
         for pid_line in pid_lines:
             if not pid_line.strip(): continue
             # expect <build>/<nodename>/qemu.pid:<pid>pid
             try:
-                (buildname,nodename,tail)=pid_line.split('/')
+                (_,__,buildname,nodename,tail)=pid_line.split('/')
                 (_,pid)=tail.split(':')
                 q=self.qemu_instance_by_pid (pid)
                 if not q: continue
@@ -688,14 +746,14 @@ class QemuBox (Box):
         # retrieve timestamps
         if not live_builds: return
         command=   ['grep','.']
-        command += ['%s/*/timestamp'%b for b in live_builds]
+        command += ['/vservers/%s/*/timestamp'%b for b in live_builds]
         command += ['/dev/null']
         ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
         for ts_line in ts_lines:
             if not ts_line.strip(): continue
             # expect <build>/<nodename>/timestamp:<timestamp>
             try:
-                (buildname,nodename,tail)=ts_line.split('/')
+                (_,__,buildname,nodename,tail)=ts_line.split('/')
                 nodename=nodename.replace('qemu-','')
                 (_,timestamp)=tail.split(':')
                 timestamp=int(timestamp)
@@ -724,6 +782,7 @@ class TestInstance:
     def set_now (self): self.timestamp=int(time.time())
     def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
 
+    def is_running (self): return len(self.pids) != 0
 
     def add_pid (self,pid):
         self.pids.append(pid)
@@ -789,7 +848,7 @@ class TestBox (Box):
     matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
     matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
     def sense (self, options):
-        print 't',
+        print 'tm',
         self.sense_uptime()
         self.starting_ips=[x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x]
 
@@ -842,24 +901,35 @@ class TestBox (Box):
     def line (self):
         return "%s (%s)"%(self.hostname,self.uptime())
 
-    def list (self):
-        if not self.test_instances:
-            header ("No known tests on %s"%self.line())
+    def list (self, verbose=False):
+        # verbose shows all tests
+        if verbose:
+            instances = self.test_instances
+            msg="knwown tests"
         else:
-            header ("Known tests on %s"%self.line())
-            self.test_instances.sort(timestamp_sort)
-            for i in self.test_instances: print i.line()
+            instances = [ i for i in self.test_instances if i.is_running() ]
+            msg="known running tests"
+
+        if not instances:
+            header ("No %s on %s"%(msg,self.line()))
+        else:
+            header ("%s on %s"%(msg,self.line()))
+            instances.sort(timestamp_sort)
+            for i in instances: print i.line()
+        # show 'starting' regardless of verbose
         if self.starting_ips:
             header ("Starting IP addresses on %s"%self.line())
             self.starting_ips.sort()
             for starting in self.starting_ips: print starting
+        else:
+            header ("Empty 'starting' on %s"%self.line())
 
 ############################################################
 class Options: pass
 
 class Substrate:
 
-    def __init__ (self, use_plc_vs_boxes=True, use_plc_lxc_boxes=False):
+    def __init__ (self, plcs_on_vs=True, plcs_on_lxc=False):
         self.options=Options()
         self.options.dry_run=False
         self.options.verbose=False
@@ -867,23 +937,37 @@ class Substrate:
         self.options.soft=False
         self.test_box = TestBox (self.test_box_spec())
         self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
-        self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_vs_boxes_spec ()]
-        self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()]
+        # for compat with older LocalSubstrate
+        try:
+            self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_vs_boxes_spec ()]
+            self.plc_lxc_boxes = [ PlcLxcBox (h,m) for (h,m) in self.plc_lxc_boxes_spec ()]
+        except:
+            self.plc_vs_boxes = [ PlcVsBox (h,m) for (h,m) in self.plc_boxes_spec ()]
+            self.plc_lxc_boxes = [ ]
         self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
         self._sensed=False
 
         self.vplc_pool = Pool (self.vplc_ips(),"for vplcs",self)
         self.vnode_pool = Pool (self.vnode_ips(),"for vnodes",self)
         
-        self.rescope (use_plc_vs_boxes, use_plc_lxc_boxes)
+        self.rescope (plcs_on_vs=plcs_on_vs, plcs_on_lxc=plcs_on_lxc)
 
-    def rescope(self, plcs_on_vs, plcs_on_lxc):
+    # which plc boxes are we interested in ?
+    def rescope (self, plcs_on_vs, plcs_on_lxc):
         self.plc_boxes=[]
         if plcs_on_vs: self.plc_boxes += self.plc_vs_boxes
         if plcs_on_lxc: self.plc_boxes += self.plc_lxc_boxes
         self.default_boxes = self.plc_boxes + self.qemu_boxes
         self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
 
+    def summary_line (self):
+        msg  = "["
+        msg += " %d vp"%len(self.plc_vs_boxes)
+        msg += " %d xp"%len(self.plc_lxc_boxes)
+        msg += " %d tried plc boxes"%len(self.plc_boxes)
+        msg += "]"
+        return msg
+
     def fqdn (self, hostname):
         if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
         return hostname
@@ -897,7 +981,7 @@ class Substrate:
         self._sensed=True
         return True
 
-    def list (self):
+    def list (self, verbose=False):
         for b in self.default_boxes:
             b.list()
 
@@ -982,8 +1066,9 @@ class Substrate:
                 except:
                     msg=""
                     if not plc_boxname: msg += " PLC boxes are full"
-                    if not vplc_hostname: msg += " vplc IP pool exhausted" 
-                    raise Exception,"Could not make space for a PLC instance:"+msg
+                    if not vplc_hostname: msg += " vplc IP pool exhausted"
+                    msg += " %s"%self.summary_line()
+                    raise Exception,"Cannot make space for a PLC instance:"+msg
                 freed_plc_boxname=plc_instance_to_kill.plc_box.hostname
                 freed_vplc_hostname=plc_instance_to_kill.vplcname()
                 message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(),
@@ -1077,7 +1162,8 @@ class Substrate:
                         msg=""
                         if not qemu_boxname: msg += " QEMU boxes are full"
                         if not vnode_hostname: msg += " vnode IP pool exhausted" 
-                        raise Exception,"Could not make space for a QEMU instance:"+msg
+                        msg += " %s"%self.summary_line()
+                        raise Exception,"Cannot make space for a QEMU instance:"+msg
                     freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname
                     freed_vnode_hostname=short_hostname(qemu_instance_to_kill.nodename)
                     # kill it
@@ -1100,6 +1186,7 @@ class Substrate:
             nodemap={'host_box':qemu_boxname,
                      'node_fields:hostname':vnode_fqdn,
                      'interface_fields:ip':ip, 
+                     'ipaddress_fields:ip_addr':ip, 
                      'interface_fields:mac':mac,
                      }
             nodemap.update(self.network_settings())
@@ -1143,7 +1230,7 @@ class Substrate:
         for box in box_or_names:
             if not isinstance(box,Box): box=self.get_box(box)
             if not box: continue
-            box.list()
+            box.list(self.options.verbose)
 
     def reboot_boxes(self,box_or_names):
         for box in box_or_names:
@@ -1152,7 +1239,7 @@ class Substrate:
             box.reboot(self.options)
 
     ####################
-    # can be run as a utility to manage the local infrastructure
+    # can be run as a utility to probe/display/manage the local infrastructure
     def main (self):
         parser=OptionParser()
         parser.add_option ('-r',"--reboot",action='store_true',dest='reboot',default=False,
@@ -1165,8 +1252,6 @@ class Substrate:
                            help='add build boxes')
         parser.add_option ('-p',"--plc",action='store_true',dest='plcs',default=False,
                            help='add plc boxes')
-        parser.add_option ('-X', "--lxc",action='store_true',dest='plcs_use_lxc',
-                           help='use lxc-enabled plc boxes instead of vs-enabled ones')
         parser.add_option ('-q',"--qemu",action='store_true',dest='qemus',default=False,
                            help='add qemu boxes') 
         parser.add_option ('-a',"--all",action='store_true',dest='all',default=False,
@@ -1177,8 +1262,7 @@ class Substrate:
                            help='dry run mode')
         (self.options,args)=parser.parse_args()
 
-        if self.options.plcs_use_lxc:
-            self.rescope (plcs_on_vs=False, plcs_on_lxc=True)
+        self.rescope (plcs_on_vs=True, plcs_on_lxc=True)
 
         boxes=args
         if self.options.testbox: boxes += [self.test_box]
@@ -1187,9 +1271,9 @@ class Substrate:
         if self.options.qemus: boxes += self.qemu_boxes
         if self.options.all: boxes += self.all_boxes
         
-        # default scope is -b -p -q
+        # default scope is -b -p -q -t
         if not boxes:
-            boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
+            boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box]
 
         if self.options.reboot: self.reboot_boxes (boxes)
         else:                   self.list_boxes (boxes)