add --soft option to trash stuff without rebooting the phy boxes
[infrastructure.git] / scripts / manage-infrastructure.py
index 685c964..ccc381f 100755 (executable)
@@ -190,39 +190,50 @@ class BuildBoxes:
     def margin_outline (self, string): return self.margin(self.outline(string))
 
     def handle_plc_box (self,box):
+# initial approach was to first scan vserver-stat, but it's not needed
         if not self.options.probe:
-            self.reboot(box)
-        else:
-            command=['vserver-stat']
-            if self.options.dry_run:
-                self.run_ssh(box,command,"Active vservers on %s"%box)
+#            # remove mark for all running servers to avoid resurrection
+#            if vserver_names:
+#                bash="; ".join( [ "rm -f /etc/vservers/%s/apps/init/mark"%vs for vs in vserver_names ] )
+#                stop_command=['bash','-c',"'" + bash + "'"]
+#                self.run_ssh(box,stop_command,"Removing mark on running vservers on %s"%box)
+            # just trash all marks 
+            stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
+            self.run_ssh(box,stop_command,"Removing all vserver marks on %s"%box)
+            if not self.options.soft:
+                self.reboot(box)
             else:
-                # try to find fullname (vserver_stat truncates to a ridiculously short name)
-                try:
-                    self.header ("vserver map on %s"%box)
-                    # fetch the contexts for all vservers on that box
-                    map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
-                    context_map=self.backquote_ssh (box,map_command)
-                    # at this point we have a set of lines like
-                    # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
-                    ctx_dict={}
-                    for map_line in context_map.split("\n"):
-                        if not map_line: continue
-                        [path,xid] = map_line.split(':')
-                        ctx_dict[xid]=os.path.basename(os.path.dirname(path))
-                    # at this point ctx_id maps context id to vservername
-
-                    vserver_stat = self.backquote_ssh (box,command)
-                    for vserver_line in vserver_stat.split("\n"):
-                        if not vserver_line: continue
-                        context=vserver_line.split()[0]
-                        if context=="CTX": 
-                            print self.margin(""),vserver_line
-                            continue
-                        longname=ctx_dict[context]
-                        print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
-                except:
-                    self.run_ssh(box,command,"Fine-grained method failed - fallback to plain vserver-stat")
+                self.run_ssh(box,['service','util-vserver','stop'],"Stopping all running vservers")
+            return
+        # even for rebooting we need to scan vserver-stat to stop the vservers properly
+        vserver_names=[]
+        command=['vserver-stat']
+        if self.options.dry_run:
+            self.run_ssh(box,command,"Active vservers on %s"%box)
+        # try to find fullname (vserver_stat truncates to a ridiculously short name)
+        self.header ("vserver map on %s"%box)
+        # fetch the contexts for all vservers on that box
+        map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
+        context_map=self.backquote_ssh (box,map_command)
+        # at this point we have a set of lines like
+        # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
+        ctx_dict={}
+        for map_line in context_map.split("\n"):
+            if not map_line: continue
+            [path,xid] = map_line.split(':')
+            ctx_dict[xid]=os.path.basename(os.path.dirname(path))
+        # at this point ctx_id maps context id to vservername
+
+        vserver_stat = self.backquote_ssh (box,command)
+        for vserver_line in vserver_stat.split("\n"):
+            if not vserver_line: continue
+            context=vserver_line.split()[0]
+            if context=="CTX": 
+                print self.margin(""),vserver_line
+                continue
+            longname=ctx_dict[context]
+            vserver_names.append(longname)
+            print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
 
     vnode_matcher = re.compile(".*(vnode[0-9]+)")
     def vnodename (self, ps_line):
@@ -232,14 +243,19 @@ class BuildBoxes:
 
     def handle_qemu_box (self,box):
         if not self.options.probe:
-            self.reboot(box)
+            if not self.options.soft:
+                self.reboot(box)
+            else:
+                self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
         else:
             command=['lsmod']
             modules=self.backquote_ssh(box,command).split('\n')
-            kqemu_msg='*NO kqemu MODULE LOADED*'
+            kqemu_msg='*NO kqemu/kmv MODULE LOADED*'
             for module in modules:
                 if module.find('kqemu')==0:
-                    kqemu_msg='kqemu OK'
+                    kqemu_msg='kqemu module loaded'
+                elif module.find('kvm')==0:
+                    kqemu_msg='kvm module loaded'
             
             command=['pgrep','qemu']
             if self.options.dry_run:
@@ -247,7 +263,7 @@ class BuildBoxes:
             else:
                 pids=self.backquote_ssh(box,command)
                 if not pids:
-                    self.header ('No qemu process on %s'%box)
+                    self.header ('No qemu process on %s (%s)'%(box,kqemu_msg))
                 else:
                     self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
                     command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
@@ -308,16 +324,19 @@ Default is to act on test boxes only"""
                            help="Dry run")
         parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
                            help="Actually reset/reboot stuff instead of just probing it")
+        parser.add_option ("-s","--soft",action="store_true",dest="soft",default=False,
+                           help="Soft reset instead of hard reboot of the boxes")
         # no need for -p = probe, as this is the default
         parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
                            help="Acts on the plc box only")
 
+        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
+                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
+
         parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
                            help="Acts on build and test boxes")
         parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
                            help="Acts on build boxes only")
-        parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
-                           help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
         parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
                            help="Only acts on the qemu boxes")
         parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,