From 585d2b338d57c1012035547a310923f332351ea2 Mon Sep 17 00:00:00 2001 From: thierry Date: Fri, 26 Feb 2010 09:12:21 +0000 Subject: [PATCH] outline boxes that are down --- scripts/manage-infrastructure.py | 88 ++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/scripts/manage-infrastructure.py b/scripts/manage-infrastructure.py index 2138cae..bdd434e 100755 --- a/scripts/manage-infrastructure.py +++ b/scripts/manage-infrastructure.py @@ -25,9 +25,15 @@ class BuildBoxes: def fqdn (self, box): return "%s.%s"%(box,self.domain) + + ssh_command=['ssh','-o','ConnectTimeout=3'] @staticmethod def root (box): return "root@%s"%box + @staticmethod + def ssh(box): + return BuildBoxes.ssh_command + [ BuildBoxes.root(box) ] + def header (self,message): print "===============",message @@ -35,21 +41,35 @@ class BuildBoxes: if self.options.dry_run: print 'DRY_RUN:', print " ".join(argv) + return 0 else: if message: self.header(message) if not trash_err: - subprocess.call(argv) + return subprocess.call(argv) else: - subprocess.call(argv,stderr=file('/dev/null','w')) + return subprocess.call(argv,stderr=file('/dev/null','w')) + def run_ssh (self, box, argv, message, trash_err=False): + result=self.run (self.ssh(box) + argv, message, trash_err) + if result!=0: + print "WARNING: failed to run %s on %s"%(" ".join(argv),box) + return result + def backquote (self, argv, trash_err=False): if not trash_err: return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0] else: - null = open('/dev/null','w') - result = subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=null).communicate()[0] - null.close() - return result + return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0] + + def backquote_ssh (self, box, argv, trash_err=False): +# print 'BACKQUOTE_SSH [%s] %s'%(box,' '.join(argv)) + # first probe the ssh link + hostname=self.backquote ( self.ssh(box) + [ "hostname"], trash_err=True ) + if not hostname: + print "%s unreachable"%self.root(box) + return '' + else: + return self.backquote( ['ssh',self.root(box)] + argv, trash_err) def reboot (self,box): command=['ssh',self.root(box),'shutdown','-r','now'] @@ -59,12 +79,12 @@ class BuildBoxes: box = self.fqdn (self.testmaster) filename="tracker-plcs" if not self.options.probe: - command=['ssh',self.root(box),"rm","-rf",filename] - self.run(command,"Cleaning up %s on %s"%(filename,box)) + command=["rm","-rf",filename] + self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box)) else: self.header ("++++++++++ Inspecting %s on %s"%(filename,box)) - read_command = ['ssh',self.root(box),"cat",filename] - trackers=self.backquote(read_command) + read_command = ["cat",filename] + trackers=self.backquote_ssh(box,read_command) for tracker in trackers.split('\n'): if not tracker: continue try: @@ -79,12 +99,12 @@ class BuildBoxes: box = self.fqdn (self.testmaster) filename="tracker-qemus" if not self.options.probe: - command=['ssh',self.root(box),"rm","-rf",filename] - self.run(command,"Cleaning up %s on %s"%(filename,box)) + command=["rm","-rf",filename] + self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box)) else: self.header ("++++++++++ Inspecting %s on %s"%(filename,box)) - read_command = ['ssh',self.root(box),"cat",filename] - trackers=self.backquote(read_command) + read_command = ["cat",filename] + trackers=self.backquote_ssh(box,read_command) for tracker in trackers.split('\n'): if not tracker: continue try: @@ -99,19 +119,19 @@ class BuildBoxes: if not self.options.probe: self.reboot(box) else: - command=['ssh',self.root(box),'uptime'] - uptime=self.backquote(command,True).strip() + command=['uptime'] + uptime=self.backquote_ssh(box,command,True).strip() - command=['ssh',self.root(box),'pgrep','build'] + command=['pgrep','build'] if self.options.dry_run: - self.run(command,None) + self.run_ssh(box,command,None) else: - pids=self.backquote(command,True) + pids=self.backquote_ssh(box,command,True) if not pids: self.header ('No build process on %s (%s)'%(box,uptime)) else: - command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] - self.run(command,"Active build processes on %s (%s)"%(box,uptime),True) + command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] + self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True) vplc_matcher = re.compile(".*(vplc[0-9]+$)") def vplcname (self, vservername): @@ -128,16 +148,16 @@ class BuildBoxes: if not self.options.probe: self.reboot(box) else: - command=['ssh',self.root(box),'vserver-stat'] + command=['vserver-stat'] if self.options.dry_run: - self.run(command,"Active vservers on %s"%box) + self.run_ssh(box,command,"Active vservers on %s"%box) else: # try to find fullname (vserver_stat truncates to a ridiculously short name) try: self.header ("vserver map on %s"%box) # fetch the contexts for all vservers on that box - map_command=['ssh',self.root(box),'grep','.','/etc/vservers/*/context','/dev/null',] - context_map=self.backquote (map_command) + map_command=['grep','.','/etc/vservers/*/context','/dev/null',] + context_map=self.backquote_ssh (box,map_command) # at this point we have a set of lines like # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144 ctx_dict={} @@ -147,7 +167,7 @@ class BuildBoxes: ctx_dict[xid]=os.path.basename(os.path.dirname(path)) # at this point ctx_id maps context id to vservername - vserver_stat = self.backquote (command) + vserver_stat = self.backquote_ssh (box,command) for vserver_line in vserver_stat.split("\n"): if not vserver_line: continue context=vserver_line.split()[0] @@ -157,7 +177,7 @@ class BuildBoxes: longname=ctx_dict[context] print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals() except: - self.run(command,"Fine-grained method failed - fallback to plain vserver-stat") + self.run_ssh(box,command,"Fine-grained method failed - fallback to plain vserver-stat") vnode_matcher = re.compile(".*(vnode[0-9]+)") def vnodename (self, ps_line): @@ -170,24 +190,24 @@ class BuildBoxes: if not self.options.probe: self.reboot(box) else: - command=['ssh',self.root(box),'lsmod'] - modules=self.backquote(command).split('\n') + command=['lsmod'] + modules=self.backquote_ssh(box,command).split('\n') kqemu_msg='*NO kqemu MODULE LOADED*' for module in modules: if module.find('kqemu')==0: kqemu_msg='kqemu OK' - command=['ssh',self.root(box),'pgrep','qemu'] + command=['pgrep','qemu'] if self.options.dry_run: - self.run(command,None) + self.run_ssh(box,command,None) else: - pids=self.backquote(command) + pids=self.backquote_ssh(box,command) if not pids: self.header ('No qemu process on %s'%box) else: self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg)) - command=['ssh',self.root(box),'ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] - ps_lines = self.backquote (command).split("\n") + command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid] + ps_lines = self.backquote_ssh (box,command).split("\n") for ps_line in ps_lines: if not ps_line or ps_line.find('PID') >=0 : continue print self.margin_outline(self.vnodename(ps_line)), ps_line -- 2.47.0