first cherrypy skeleton for the onelab sso

[infrastructure.git] / scripts / manage-infrastructure.py
diff --git a/scripts/manage-infrastructure.py b/scripts/manage-infrastructure.py

index 81d3d21..58fbbf4 100755 (executable)
--- a/scripts/manage-infrastructure.py
+++ b/scripts/manage-infrastructure.py
@@ -5,11 +5,11 @@ import re
  import subprocess
  from optparse import OptionParser
  
-class BuildBoxes:
+class Infrastructure:
  
      # everything in the onelab.eu domain
      domain = 'pl.sophia.inria.fr'
-    build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
+    build_boxes = [ "devel", "liquid", "reed", "velvet", ]
      plc_boxes = [ "testplc" ]
      testmaster = 'testmaster'
      testmaster_boxes = [ testmaster ]
@@ -29,7 +29,7 @@ class BuildBoxes:
      def refresh_cache (self):
          retrieved= \
              self.backquote_ssh(self.fqdn(self.testmaster),['LocalTestResources.py'],trash_err=True)
-        remove="."+BuildBoxes.domain
+        remove="."+Infrastructure.domain
          retrieved = [ x.replace(remove,"").strip() for x in retrieved.split()]
          self.qemu_boxes = retrieved
          cache=self.cache_file()
@@ -53,7 +53,7 @@ class BuildBoxes:
  
      @staticmethod
      def ssh(box):
-        return BuildBoxes.ssh_command + [ BuildBoxes.root(box) ]
+        return Infrastructure.ssh_command + [ Infrastructure.root(box) ]
  
      def header (self,message):
          print "===============",message
@@ -96,6 +96,16 @@ class BuildBoxes:
          command=['ssh',self.root(box),'shutdown','-r','now']
          self.run (command,"Rebooting %s"%box)
  
+    def handle_starting (self):
+        box = self.fqdn (self.testmaster)
+        filename="starting"
+        if not self.options.probe:
+            command=["rm","-rf",filename]
+            self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
+        else:
+            read_command = ["cat",filename]
+            self.run_ssh(box,read_command,"++++++++++ Inspecting %s on %s"%(filename,box))
+
      def handle_tracker_plcs (self):
          box = self.fqdn (self.testmaster)
          filename="tracker-plcs"
@@ -110,8 +120,7 @@ class BuildBoxes:
                  if not tracker: continue
                  try:
                      tracker=tracker.strip()
-                    [hostname,buildname]=tracker.split('@')
-                    [left,plcname]=buildname.rsplit('-',1)
+                    (hostname,buildname,plcname)=tracker.split('@')
                      print self.margin_outline(plcname),tracker
                  except:
                      print self.margin(""),tracker
@@ -190,6 +199,21 @@ class BuildBoxes:
      def margin_outline (self, string): return self.margin(self.outline(string))
  
      def handle_plc_box (self,box):
+# initial approach was to first scan vserver-stat, but it's not needed
+        if not self.options.probe:
+#            # remove mark for all running servers to avoid resurrection
+#            if vserver_names:
+#                bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
+#                stop_command=['bash','-c',"'" + bash + "'"]
+#                self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
+            # just trash all marks 
+            stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
+            self.run_ssh(box,stop_command,"Removing all vserver marks on %s"%box)
+            if not self.options.soft:
+                self.reboot(box)
+            else:
+                self.run_ssh(box,['service','util-vserver','stop'],"Stopping all running vservers")
+            return
          # even for rebooting we need to scan vserver-stat to stop the vservers properly
          vserver_names=[]
          command=['vserver-stat']
@@ -219,13 +243,6 @@ class BuildBoxes:
              longname=ctx_dict[context]
              vserver_names.append(longname)
              print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
-        if not self.options.probe:
-            # remove mark for all running servers to avoid resurrection
-            if vserver_names:
-                bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
-                stop_command=['bash','-c',"'" + bash + "'"]
-                self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
-            self.reboot(box)
  
      vnode_matcher = re.compile(".*(vnode[0-9]+)")
      def vnodename (self, ps_line):
@@ -235,14 +252,20 @@ class BuildBoxes:
  
      def handle_qemu_box (self,box):
          if not self.options.probe:
-            self.reboot(box)
+            if not self.options.soft:
+                self.reboot(box)
+            else:
+                self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
          else:
              command=['lsmod']
              modules=self.backquote_ssh(box,command).split('\n')
-            kqemu_msg='*NO kqemu MODULE LOADED*'
+            kqemu_msg='*NO kqemu/kmv_intel MODULE LOADED*'
              for module in modules:
                  if module.find('kqemu')==0:
-                    kqemu_msg='kqemu OK'
+                    kqemu_msg='kqemu module loaded'
+                # kvm might be loaded without vkm_intel (we dont have AMD)
+                elif module.find('kvm_intel')==0:
+                    kqemu_msg='kvm_intel module loaded'
              
              command=['pgrep','qemu']
              if self.options.dry_run:
@@ -250,7 +273,7 @@ class BuildBoxes:
              else:
                  pids=self.backquote_ssh(box,command)
                  if not pids:
-                    self.header ('No qemu process on %s'%box)
+                    self.header ('No qemu process on %s (%s)'%(box,kqemu_msg))
                  else:
                      self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
                      command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
@@ -311,16 +334,19 @@ Default is to act on test boxes only"""
                             help="Dry run")
          parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
                             help="Actually reset/reboot stuff instead of just probing it")
+        parser.add_option ("-s","--soft",action="store_true",dest="soft",default=False,
+                           help="Soft reset instead of hard reboot of the boxes")
          # no need for -p = probe, as this is the default
          parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
                             help="Acts on the plc box only")
  
+        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
+                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
+
          parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
                             help="Acts on build and test boxes")
          parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
                             help="Acts on build boxes only")
-        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
-                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
          parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
                             help="Only acts on the qemu boxes")
          parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
@@ -360,7 +386,6 @@ Default is to act on test boxes only"""
          elif self.options.plc_only:
              self.boxes=self.plc_boxes
              self.do_tracker_plcs = True
-            self.do_tracker_plcs = False
              self.do_tracker_qemus = False
          elif self.options.testmaster_only:
              self.boxes=self.testmaster_boxes
@@ -381,7 +406,9 @@ Default is to act on test boxes only"""
              return
  
          # PLCS
-        if self.do_tracker_plcs:self.handle_tracker_plcs ()
+        if self.do_tracker_plcs:
+            self.handle_tracker_plcs ()
+            self.handle_starting ()
          for box in self.boxes:  self.handle_box (box,"plc")
          # QEMU
          if self.do_tracker_qemus:self.handle_tracker_qemus ()
@@ -392,4 +419,4 @@ Default is to act on test boxes only"""
          for box in self.boxes:  self.handle_box (box,"testmaster")
  
  if __name__ == "__main__":
-    BuildBoxes().main()
+    Infrastructure().main()