cleanup unused/old slice.rspec
[tests.git] / system / Substrate.py
index 6441620..9387579 100644 (file)
@@ -69,6 +69,37 @@ def timestamp_sort(o1,o2): return o1.timestamp-o2.timestamp
 
 def short_hostname (hostname):
     return hostname.split('.')[0]
+
+####################
+# the place were other test instances tell about their not-yet-started
+# instances, that go undetected through sensing
+class Starting:
+
+    location='/root/starting'
+    def __init__ (self):
+        self.tuples=[]
+
+    def load (self):
+        try:    self.tuples=[line.strip().split('@') 
+                             for line in file(Starting.location).readlines()]
+        except: self.tuples=[]
+
+    def vnames (self) : 
+        self.load()
+        return [ x for (x,_) in self.tuples ]
+
+    def add (self, vname, bname):
+        if not vname in self.vnames():
+            file(Starting.location,'a').write("%s@%s\n"%(vname,bname))
+            
+    def delete_vname (self, vname):
+        self.load()
+        if vname in self.vnames():
+            f=file(Starting.location,'w')
+            for (v,b) in self.tuples: 
+                if v != vname: f.write("%s@%s\n"%(v,b))
+            f.close()
+    
 ####################
 # pool class
 # allows to pick an available IP among a pool
@@ -113,9 +144,11 @@ class PoolItem:
 
 class Pool:
 
-    def __init__ (self, tuples,message):
+    def __init__ (self, tuples,message, substrate):
         self.pool_items= [ PoolItem (hostname,userdata) for (hostname,userdata) in tuples ] 
         self.message=message
+        # where to send notifications upon load_starting
+        self.substrate=substrate
 
     def list (self):
         for i in self.pool_items: print i.line()
@@ -150,40 +183,34 @@ class Pool:
                 return (i.hostname,i.userdata)
         return None
 
-    # the place were other test instances tell about their not-yet-started
-    # instances, that go undetected through sensing
-    starting='/root/starting'
-    def add_starting (self, name):
-        try:    items=[line.strip() for line in file(Pool.starting).readlines()]
-        except: items=[]
-        if not name in items:
-            file(Pool.starting,'a').write(name+'\n')
+    ####################
+    # we have a starting instance of our own
+    def add_starting (self, vname, bname):
+        Starting().add(vname,bname)
         for i in self.pool_items:
-            if i.hostname==name: i.status='mine'
-            
-    # we load this after actual sensing; 
+            if i.hostname==vname: i.status='mine'
+
+    # load the starting instances from the common file
+    # remember that might be ours
+    # return the list of (vname,bname) that are not ours
     def load_starting (self):
-        try:    items=[line.strip() for line in file(Pool.starting).readlines()]
-        except: items=[]
-        for i in self.pool_items:
-            if i.hostname in items:
-                if i.status=='free' : i.status='starting'
+        starting=Starting()
+        starting.load()
+        new_tuples=[]
+        for (v,b) in starting.tuples:
+            for i in self.pool_items:
+                if i.hostname==v and i.status=='free':
+                    i.status='starting'
+                    new_tuples.append( (v,b,) )
+        return new_tuples
 
     def release_my_starting (self):
         for i in self.pool_items:
-            if i.status=='mine': 
-                self.del_starting(i.hostname)
+            if i.status=='mine':
+                Starting().delete_vname (i.hostname)
                 i.status=None
 
-    def del_starting (self, name):
-        try:    items=[line.strip() for line in file(Pool.starting).readlines()]
-        except: items=[]
-        if name in items:
-            f=file(Pool.starting,'w')
-            for item in items: 
-                if item != name: f.write(item+'\n')
-            f.close()
-    
+
     ##########
     def _sense (self):
         for item in self.pool_items:
@@ -201,10 +228,10 @@ class Pool:
         print 'Sensing IP pool',self.message,
         self._sense()
         print 'Done'
-        self.load_starting()
+        for (vname,bname) in self.load_starting():
+            self.substrate.add_starting_dummy (bname, vname)
         print 'After starting: IP pool'
         print self.line()
-
     # OS-dependent ping option (support for macos, for convenience)
     ping_timeout_option = None
     # returns True when a given hostname/ip responds to ping
@@ -226,6 +253,7 @@ class Pool:
 class Box:
     def __init__ (self,hostname):
         self.hostname=hostname
+        self._probed=None
     def shortname (self):
         return short_hostname(self.hostname)
     def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False)
@@ -261,21 +289,27 @@ class Box:
         return result
 
     def backquote (self, argv, trash_err=False):
+        # print 'running backquote',argv
         if not trash_err:
             result= subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
         else:
             result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
         return result
 
-    def backquote_ssh (self, argv, trash_err=False):
+    def probe (self):
+        if self._probed is not None: return self._probed
         # first probe the ssh link
         probe_argv=self.test_ssh().actual_argv(['hostname'])
-        hostname=self.backquote ( probe_argv, trash_err=True )
-        if not hostname:
-            print "root@%s unreachable"%self.hostname
-            return ''
-        else:
-            return self.backquote( self.test_ssh().actual_argv(argv), trash_err)
+        self._probed=self.backquote ( probe_argv, trash_err=True )
+        if not self._probed: print "root@%s unreachable"%self.hostname
+        return self._probed
+
+    # use argv=['bash','-c',"the command line"]
+    # if you have any shell-expanded arguments like *
+    # and if there's any chance the command is adressed to the local host
+    def backquote_ssh (self, argv, trash_err=False):
+        if not self.probe(): return ''
+        return self.backquote( self.test_ssh().actual_argv(argv), trash_err)
 
 ############################################################
 class BuildInstance:
@@ -319,7 +353,7 @@ class BuildBox (Box):
 
     # inspect box and find currently running builds
     matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
-    def sense(self,options):
+    def sense(self, options):
         print 'b',
         self.sense_uptime()
         pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True)
@@ -444,9 +478,11 @@ class PlcBox (Box):
             if not vserver_line: continue
             context=vserver_line.split()[0]
             if context=="CTX": continue
-            longname=ctx_dict[context]
-            self.add_vserver(longname,context)
-#            print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
+            try:
+                longname=ctx_dict[context]
+                self.add_vserver(longname,context)
+            except:
+                print 'WARNING: found ctx %s in vserver_stat but was unable to figure a corresp. vserver'%context
 
         # scan timestamps 
         running_vsnames = [ i.vservername for i in self.plc_instances ]
@@ -604,6 +640,7 @@ class QemuBox (Box):
                 live_builds.append(buildname)
             except: print 'WARNING, could not parse pid line',pid_line
         # retrieve timestamps
+        if not live_builds: return
         command=   ['grep','.']
         command += ['%s/*/timestamp'%b for b in live_builds]
         command += ['/dev/null']
@@ -627,24 +664,38 @@ class QemuBox (Box):
 
 ####################
 class TestInstance:
-    def __init__ (self, pid, buildname):
-        self.pids=[pid]
+    def __init__ (self, buildname, pid=0):
+        self.pids=[]
+        if pid!=0: self.pid.append(pid)
         self.buildname=buildname
         # latest trace line
         self.trace=''
         # has a KO test
         self.broken_steps=[]
+        self.timestamp = 0
+
+    def set_timestamp (self,timestamp): self.timestamp=timestamp
+    def set_now (self): self.timestamp=int(time.time())
+    def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+
+
     def add_pid (self,pid):
         self.pids.append(pid)
     def set_broken (self,plcindex, step): 
         self.broken_steps.append ( (plcindex, step,) )
 
     def line (self):
-        msg = " == %s =="%self.buildname
-        msg += " (pid=%s)"%(self.pids)
+        double='=='
+        if self.pids: double='*'+double[1]
+        if self.broken_steps: double=double[0]+'B'
+        msg = " %s %s =="%(double,self.buildname)
+        if not self.pids:       pass
+        elif len(self.pids)==1: msg += " (pid=%s)"%self.pids[0]
+        else:                   msg += " !!!pids=%s!!!"%self.pids
+        msg += " @%s"%self.pretty_timestamp()
         if self.broken_steps:
-            msg += "\n BROKEN IN STEPS "
-            for (i,s) in self.broken_steps: msg += "step=%s,plc=%s"%(s,i)
+            msg += "\n BROKEN IN STEPS"
+            for (i,s) in self.broken_steps: msg += " %s@%s"%(s,i)
         return msg
 
 class TestBox (Box):
@@ -657,27 +708,74 @@ class TestBox (Box):
         # can't reboot a vserver VM
         self.run_ssh (['pkill','run_log'],"Terminating current runs",
                       dry_run=options.dry_run)
-        self.run_ssh (['rm','-f','/root/starting'],"Cleaning /root/starting",
+        self.run_ssh (['rm','-f',Starting.location],"Cleaning %s"%Starting.location,
                       dry_run=options.dry_run)
 
     def get_test (self, buildname):
         for i in self.test_instances:
             if i.buildname==buildname: return i
 
-    def add_test (self, pid, buildname):
+    # we scan ALL remaining test results, even the ones not running
+    def add_timestamp (self, buildname, timestamp):
         i=self.get_test(buildname)
-        if i:
-            print "WARNING: 2 concurrent tests run on same build %s"%buildname
-            i.add_pid (pid)
+        if i:   
+            i.set_timestamp(timestamp)
+        else:   
+            i=TestInstance(buildname,0)
+            i.set_timestamp(timestamp)
+            self.test_instances.append(i)
+
+    def add_running_test (self, pid, buildname):
+        i=self.get_test(buildname)
+        if not i:
+            self.test_instances.append (TestInstance (buildname,pid))
             return
-        self.test_instances.append (TestInstance (pid,buildname))
+        if i.pids:
+            print "WARNING: 2 concurrent tests run on same build %s"%buildname
+        i.add_pid (pid)
+
+    def add_broken (self, buildname, plcindex, step):
+        i=self.get_test(buildname)
+        if not i:
+            i=TestInstance(buildname)
+            self.test_instances.append(i)
+        i.set_broken(plcindex, step)
 
     matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
-    matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
+    matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace.*:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
     def sense (self, options):
         print 't',
         self.sense_uptime()
-        self.starting_ips=self.backquote_ssh(['cat','/root/starting']).strip().split('\n')
+        self.starting_ips=[x for x in self.backquote_ssh(['cat',Starting.location], trash_err=True).strip().split('\n') if x]
+
+        # scan timestamps on all tests
+        # this is likely to not invoke ssh so we need to be a bit smarter to get * expanded
+        # xxx would make sense above too
+        command=['bash','-c',"grep . /root/*/timestamp /dev/null"]
+        ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+        for ts_line in ts_lines:
+            if not ts_line.strip(): continue
+            # expect /root/<buildname>/timestamp:<timestamp>
+            try:
+                (ts_file,timestamp)=ts_line.split(':')
+                ts_file=os.path.dirname(ts_file)
+                buildname=os.path.basename(ts_file)
+                timestamp=int(timestamp)
+                t=self.add_timestamp(buildname,timestamp)
+            except:  print 'WARNING, could not parse ts line',ts_line
+
+        command=['bash','-c',"grep KO /root/*/logs/trace* /dev/null" ]
+        trace_lines=self.backquote_ssh (command).split('\n')
+        for line in trace_lines:
+            if not line.strip(): continue
+            m=TestBox.matcher_grep.match(line)
+            if m: 
+                buildname=m.group('buildname')
+                plcindex=m.group('plcindex')
+                step=m.group('step')
+                self.add_broken(buildname,plcindex, step)
+            else: header("command %r returned line that failed to match\n%s"%(command,line))
+
         pids = self.backquote_ssh (['pgrep','run_log'],trash_err=True)
         if not pids: return
         command=['ls','-ld'] + ["/proc/%s/cwd"%pid for pid in pids.split("\n") if pid]
@@ -688,47 +786,24 @@ class TestBox (Box):
             if m: 
                 pid=m.group('pid')
                 buildname=m.group('buildname')
-                self.add_test(pid, buildname)
+                self.add_running_test(pid, buildname)
             else: header("command %r returned line that failed to match\n%s"%(command,line))
-        buildnames=[i.buildname for i in self.test_instances]
-        if not buildnames: return
-
-# messy - tail has different output if one or several args
-#        command=['tail','-n','1'] + [ "/root/%s/logs/trace"%b for b in buildnames ]
-#        trace_lines=self.backquote_ssh (command).split('\n\n')
-#        header('TAIL')
-#        for line in trace_lines:
-#            if not line.strip(): continue
-#            print 'line [[[%s]]]'%line
-        command=['grep','KO'] + [ "/root/%s/logs/trace"%b for b in buildnames ] + [ "/dev/null" ]
-        trace_lines=self.backquote_ssh (command).split('\n')
-        for line in trace_lines:
-            if not line.strip(): continue
-            m=TestBox.matcher_grep.match(line)
-            if m: 
-                buildname=m.group('buildname')
-                plcindex=m.group('plcindex')
-                step=m.group('step')
-                self.get_test(buildname).set_broken (plcindex, step)
-            else: header("command %r returned line that failed to match\n%s"%(command,line))
-            
         
         
     def line (self):
         return "%s (%s)"%(self.hostname,self.uptime())
 
     def list (self):
-        if not self.starting_ips:
-            header ("No starting IP addresses on %s"%self.line())
-        else:
-            header ("IP addresses currently starting up on %s"%self.line())
-            self.starting_ips.sort()
-            for starting in self.starting_ips: print starting
         if not self.test_instances:
-            header ("No running tests on %s"%self.line())
+            header ("No known tests on %s"%self.line())
         else:
-            header ("Running tests on %s"%self.line())
+            header ("Known tests on %s"%self.line())
+            self.test_instances.sort(timestamp_sort)
             for i in self.test_instances: print i.line()
+        if self.starting_ips:
+            header ("Starting IP addresses on %s"%self.line())
+            self.starting_ips.sort()
+            for starting in self.starting_ips: print starting
 
 ############################################################
 class Options: pass
@@ -745,11 +820,12 @@ class Substrate:
         self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
         self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()]
         self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
-        self.all_boxes = self.plc_boxes + self.qemu_boxes
+        self.default_boxes = self.plc_boxes + self.qemu_boxes
+        self.all_boxes = self.build_boxes + [ self.test_box ] + self.plc_boxes + self.qemu_boxes
         self._sensed=False
 
-        self.vplc_pool = Pool (self.vplc_ips(),"for vplcs")
-        self.vnode_pool = Pool (self.vnode_ips(),"for vnodes")
+        self.vplc_pool = Pool (self.vplc_ips(),"for vplcs",self)
+        self.vnode_pool = Pool (self.vnode_ips(),"for vnodes",self)
 
     def fqdn (self, hostname):
         if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
@@ -759,19 +835,28 @@ class Substrate:
     def sense (self,force=False):
         if self._sensed and not force: return False
         print 'Sensing local substrate...',
-        for b in self.all_boxes: b.sense(self.options)
+        for b in self.default_boxes: b.sense(self.options)
         print 'Done'
         self._sensed=True
         return True
 
+    def list (self):
+        for b in self.default_boxes:
+            b.list()
+
     def add_dummy_plc (self, plc_boxname, plcname):
         for pb in self.plc_boxes:
             if pb.hostname==plc_boxname:
                 pb.add_dummy(plcname)
+                return True
     def add_dummy_qemu (self, qemu_boxname, qemuname):
         for qb in self.qemu_boxes:
             if qb.hostname==qemu_boxname:
                 qb.add_dummy(qemuname)
+                return True
+
+    def add_starting_dummy (self, bname, vname):
+        return self.add_dummy_plc (bname, vname) or self.add_dummy_qemu (bname, vname)
 
     ########## 
     def provision (self,plcs,options):
@@ -782,6 +867,7 @@ class Substrate:
             plcs = [ self.provision_qemus (plc,options) for plc in plcs ]
             # update the SFA spec accordingly
             plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ]
+            self.list()
             return plcs
         except Exception, e:
             print '* Could not provision this test on current substrate','--',e,'--','exiting'
@@ -810,7 +896,7 @@ class Substrate:
             plc_boxname = options.ips_bplc.pop()
             vplc_hostname=options.ips_vplc.pop()
         else:
-            if self.sense(): self.list_all()
+            if self.sense(): self.list()
             plc_boxname=None
             vplc_hostname=None
             # try to find an available IP 
@@ -858,7 +944,7 @@ class Substrate:
         # 
         self.add_dummy_plc(plc_boxname,plc['name'])
         vplc_ip = self.vplc_pool.get_ip(vplc_hostname)
-        self.vplc_pool.add_starting(vplc_hostname)
+        self.vplc_pool.add_starting(vplc_hostname, plc_boxname)
 
         #### compute a helpful vserver name
         # remove domain in hostname
@@ -905,7 +991,7 @@ class Substrate:
                 qemu_boxname=options.ips_bnode.pop()
                 vnode_hostname=options.ips_vnode.pop()
             else:
-                if self.sense(): self.list_all()
+                if self.sense(): self.list()
                 qemu_boxname=None
                 vnode_hostname=None
                 # try to find an available IP 
@@ -948,10 +1034,10 @@ class Substrate:
                         vnode_hostname=freed_vnode_hostname
                         self.vnode_pool.set_mine(vnode_hostname)
 
-            self.add_dummy_qemu (qemu_boxname,nodename)
+            self.add_dummy_qemu (qemu_boxname,vnode_hostname)
             mac=self.vnode_pool.retrieve_userdata(vnode_hostname)
             ip=self.vnode_pool.get_ip (vnode_hostname)
-            self.vnode_pool.add_starting(vnode_hostname)
+            self.vnode_pool.add_starting(vnode_hostname,qemu_boxname)
 
             vnode_fqdn = self.fqdn(vnode_hostname)
             nodemap={'host_box':qemu_boxname,
@@ -974,9 +1060,6 @@ class Substrate:
         plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
         plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
         plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' 
-        for site in plc['sites']:
-            for node in site['nodes']:
-                plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
        return plc
 
     #################### release:
@@ -993,23 +1076,23 @@ class Substrate:
         print "Could not find box %s"%boxname
         return None
 
-    def list_boxes(self,boxes):
+    def list_boxes(self,box_or_names):
         print 'Sensing',
-        for box in boxes:
-            b=self.get_box(box)
-            if not b: continue
-            b.sense(self.options)
+        for box in box_or_names:
+            if not isinstance(box,Box): box=self.get_box(box)
+            if not box: continue
+            box.sense(self.options)
         print 'Done'
-        for box in boxes:
-            b=self.get_box(box)
-            if not b: continue
-            b.list()
+        for box in box_or_names:
+            if not isinstance(box,Box): box=self.get_box(box)
+            if not box: continue
+            box.list()
 
-    def reboot_boxes(self,boxes):
-        for box in boxes:
-            b=self.get_box(box)
-            if not b: continue
-            b.reboot(self.options)
+    def reboot_boxes(self,box_or_names):
+        for box in box_or_names:
+            if not isinstance(box,Box): box=self.get_box(box)
+            if not box: continue
+            box.reboot(self.options)
 
     ####################
     # can be run as a utility to manage the local infrastructure
@@ -1027,6 +1110,8 @@ class Substrate:
                            help='add plc boxes')
         parser.add_option ('-q',"--qemu",action='store_true',dest='qemus',default=False,
                            help='add qemu boxes') 
+        parser.add_option ('-a',"--all",action='store_true',dest='all',default=False,
+                           help='address all known  boxes, like -b -t -p -q')
         parser.add_option ('-v',"--verbose",action='store_true',dest='verbose',default=False,
                            help='verbose mode')
         parser.add_option ('-n',"--dry_run",action='store_true',dest='dry_run',default=False,
@@ -1034,17 +1119,15 @@ class Substrate:
         (self.options,args)=parser.parse_args()
 
         boxes=args
-        if self.options.testbox: boxes += [self.test_box.hostname]
-        if self.options.builds: boxes += [b.hostname for b in self.build_boxes]
-        if self.options.plcs: boxes += [b.hostname for b in self.plc_boxes]
-        if self.options.qemus: boxes += [b.hostname for b in self.qemu_boxes]
-        boxes=list(set(boxes))
+        if self.options.testbox: boxes += [self.test_box]
+        if self.options.builds: boxes += self.build_boxes
+        if self.options.plcs: boxes += self.plc_boxes
+        if self.options.qemus: boxes += self.qemu_boxes
+        if self.options.all: boxes += self.all_boxes
         
-        # default scope
+        # default scope is -b -p -q
         if not boxes:
-            boxes = [ b.hostname for b in \
-                          self.build_boxes + [ self.test_box ] + \
-                          self.plc_boxes + self.qemu_boxes ]
+            boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
 
         if self.options.reboot: self.reboot_boxes (boxes)
         else:                   self.list_boxes (boxes)