first cherrypy skeleton for the onelab sso
[infrastructure.git] / scripts / manage-infrastructure.py
index 81d3d21..58fbbf4 100755 (executable)
@@ -5,11 +5,11 @@ import re
 import subprocess
 from optparse import OptionParser
 
-class BuildBoxes:
+class Infrastructure:
 
     # everything in the onelab.eu domain
     domain = 'pl.sophia.inria.fr'
-    build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
+    build_boxes = [ "devel", "liquid", "reed", "velvet", ]
     plc_boxes = [ "testplc" ]
     testmaster = 'testmaster'
     testmaster_boxes = [ testmaster ]
@@ -29,7 +29,7 @@ class BuildBoxes:
     def refresh_cache (self):
         retrieved= \
             self.backquote_ssh(self.fqdn(self.testmaster),['LocalTestResources.py'],trash_err=True)
-        remove="."+BuildBoxes.domain
+        remove="."+Infrastructure.domain
         retrieved = [ x.replace(remove,"").strip() for x in retrieved.split()]
         self.qemu_boxes = retrieved
         cache=self.cache_file()
@@ -53,7 +53,7 @@ class BuildBoxes:
 
     @staticmethod
     def ssh(box):
-        return BuildBoxes.ssh_command + [ BuildBoxes.root(box) ]
+        return Infrastructure.ssh_command + [ Infrastructure.root(box) ]
 
     def header (self,message):
         print "===============",message
@@ -96,6 +96,16 @@ class BuildBoxes:
         command=['ssh',self.root(box),'shutdown','-r','now']
         self.run (command,"Rebooting %s"%box)
 
+    def handle_starting (self):
+        box = self.fqdn (self.testmaster)
+        filename="starting"
+        if not self.options.probe:
+            command=["rm","-rf",filename]
+            self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
+        else:
+            read_command = ["cat",filename]
+            self.run_ssh(box,read_command,"++++++++++ Inspecting %s on %s"%(filename,box))
+
     def handle_tracker_plcs (self):
         box = self.fqdn (self.testmaster)
         filename="tracker-plcs"
@@ -110,8 +120,7 @@ class BuildBoxes:
                 if not tracker: continue
                 try:
                     tracker=tracker.strip()
-                    [hostname,buildname]=tracker.split('@')
-                    [left,plcname]=buildname.rsplit('-',1)
+                    (hostname,buildname,plcname)=tracker.split('@')
                     print self.margin_outline(plcname),tracker
                 except:
                     print self.margin(""),tracker
@@ -190,6 +199,21 @@ class BuildBoxes:
     def margin_outline (self, string): return self.margin(self.outline(string))
 
     def handle_plc_box (self,box):
+# initial approach was to first scan vserver-stat, but it's not needed
+        if not self.options.probe:
+#            # remove mark for all running servers to avoid resurrection
+#            if vserver_names:
+#                bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
+#                stop_command=['bash','-c',"'" + bash + "'"]
+#                self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
+            # just trash all marks 
+            stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
+            self.run_ssh(box,stop_command,"Removing all vserver marks on %s"%box)
+            if not self.options.soft:
+                self.reboot(box)
+            else:
+                self.run_ssh(box,['service','util-vserver','stop'],"Stopping all running vservers")
+            return
         # even for rebooting we need to scan vserver-stat to stop the vservers properly
         vserver_names=[]
         command=['vserver-stat']
@@ -219,13 +243,6 @@ class BuildBoxes:
             longname=ctx_dict[context]
             vserver_names.append(longname)
             print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
-        if not self.options.probe:
-            # remove mark for all running servers to avoid resurrection
-            if vserver_names:
-                bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
-                stop_command=['bash','-c',"'" + bash + "'"]
-                self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
-            self.reboot(box)
 
     vnode_matcher = re.compile(".*(vnode[0-9]+)")
     def vnodename (self, ps_line):
@@ -235,14 +252,20 @@ class BuildBoxes:
 
     def handle_qemu_box (self,box):
         if not self.options.probe:
-            self.reboot(box)
+            if not self.options.soft:
+                self.reboot(box)
+            else:
+                self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
         else:
             command=['lsmod']
             modules=self.backquote_ssh(box,command).split('\n')
-            kqemu_msg='*NO kqemu MODULE LOADED*'
+            kqemu_msg='*NO kqemu/kmv_intel MODULE LOADED*'
             for module in modules:
                 if module.find('kqemu')==0:
-                    kqemu_msg='kqemu OK'
+                    kqemu_msg='kqemu module loaded'
+                # kvm might be loaded without vkm_intel (we dont have AMD)
+                elif module.find('kvm_intel')==0:
+                    kqemu_msg='kvm_intel module loaded'
             
             command=['pgrep','qemu']
             if self.options.dry_run:
@@ -250,7 +273,7 @@ class BuildBoxes:
             else:
                 pids=self.backquote_ssh(box,command)
                 if not pids:
-                    self.header ('No qemu process on %s'%box)
+                    self.header ('No qemu process on %s (%s)'%(box,kqemu_msg))
                 else:
                     self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
                     command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
@@ -311,16 +334,19 @@ Default is to act on test boxes only"""
                            help="Dry run")
         parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
                            help="Actually reset/reboot stuff instead of just probing it")
+        parser.add_option ("-s","--soft",action="store_true",dest="soft",default=False,
+                           help="Soft reset instead of hard reboot of the boxes")
         # no need for -p = probe, as this is the default
         parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
                            help="Acts on the plc box only")
 
+        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
+                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
+
         parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
                            help="Acts on build and test boxes")
         parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
                            help="Acts on build boxes only")
-        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
-                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
         parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
                            help="Only acts on the qemu boxes")
         parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
@@ -360,7 +386,6 @@ Default is to act on test boxes only"""
         elif self.options.plc_only:
             self.boxes=self.plc_boxes
             self.do_tracker_plcs = True
-            self.do_tracker_plcs = False
             self.do_tracker_qemus = False
         elif self.options.testmaster_only:
             self.boxes=self.testmaster_boxes
@@ -381,7 +406,9 @@ Default is to act on test boxes only"""
             return
 
         # PLCS
-        if self.do_tracker_plcs:self.handle_tracker_plcs ()
+        if self.do_tracker_plcs:
+            self.handle_tracker_plcs ()
+            self.handle_starting ()
         for box in self.boxes:  self.handle_box (box,"plc")
         # QEMU
         if self.do_tracker_qemus:self.handle_tracker_qemus ()
@@ -392,4 +419,4 @@ Default is to act on test boxes only"""
         for box in self.boxes:  self.handle_box (box,"testmaster")
 
 if __name__ == "__main__":
-    BuildBoxes().main()
+    Infrastructure().main()