6 from optparse import OptionParser
10 # everything in the onelab.eu domain
11 domain = 'pl.sophia.inria.fr'
12 testmaster = 'testmaster'
13 build_boxes = [ "mirror", "liquid", "reed", "velvet", ]
14 plc_boxes = [ "testplc" ]
15 # qemu32-5 is officially dead
17 [ "qemu64-%d"%i for i in range (1,4) ] + \
18 [ "qemu32-%d"%i for i in range (1,5) ]
19 test_boxes = plc_boxes + qemu_boxes
20 testmaster_boxes = [ testmaster ]
25 self.do_tracker_qemus = False
26 self.do_tracker_plcs = False
29 return "%s.%s"%(box,self.domain)
31 ssh_command=['ssh','-o','ConnectTimeout=3']
33 def root (box): return "root@%s"%box
37 return BuildBoxes.ssh_command + [ BuildBoxes.root(box) ]
39 def header (self,message):
40 print "===============",message
43 def run (self,argv,message, trash_err=False):
44 if self.options.dry_run:
49 if message: self.header(message)
51 return subprocess.call(argv)
53 return subprocess.call(argv,stderr=file('/dev/null','w'))
55 def run_ssh (self, box, argv, message, trash_err=False):
56 result=self.run (self.ssh(box) + argv, message, trash_err)
58 print "WARNING: failed to run %s on %s"%(" ".join(argv),box)
61 def backquote (self, argv, trash_err=False):
63 return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
65 return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
67 def backquote_ssh (self, box, argv, trash_err=False):
68 # first probe the ssh link
69 hostname=self.backquote ( self.ssh(box) + [ "hostname"], trash_err=True )
71 print "%s unreachable"%self.root(box)
74 return self.backquote( ['ssh',self.root(box)] + argv, trash_err)
76 def reboot (self,box):
77 command=['ssh',self.root(box),'shutdown','-r','now']
78 self.run (command,"Rebooting %s"%box)
80 def handle_tracker_plcs (self):
81 box = self.fqdn (self.testmaster)
82 filename="tracker-plcs"
83 if not self.options.probe:
84 command=["rm","-rf",filename]
85 self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
87 self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
88 read_command = ["cat",filename]
89 trackers=self.backquote_ssh(box,read_command)
90 for tracker in trackers.split('\n'):
91 if not tracker: continue
93 tracker=tracker.strip()
94 [hostname,buildname]=tracker.split('@')
95 [left,plcname]=buildname.rsplit('-',1)
96 print self.margin_outline(plcname),tracker
98 print self.margin(""),tracker
100 def handle_tracker_qemus (self):
101 box = self.fqdn (self.testmaster)
102 filename="tracker-qemus"
103 if not self.options.probe:
104 command=["rm","-rf",filename]
105 self.run_ssh(box,command,"Cleaning up %s on %s"%(filename,box))
107 self.header ("++++++++++ Inspecting %s on %s"%(filename,box))
108 read_command = ["cat",filename]
109 trackers=self.backquote_ssh(box,read_command)
110 for tracker in trackers.split('\n'):
111 if not tracker: continue
113 tracker=tracker.strip()
114 [hostname,buildname,nodename]=tracker.split('@')
115 nodename=nodename.split('.')[0]
116 print self.margin_outline(nodename),tracker
118 print self.margin(""),tracker
120 def handle_build_box (self,box):
121 if not self.options.probe:
125 uptime=self.backquote_ssh(box,command,True).strip()
127 command=['pgrep','build']
128 if self.options.dry_run:
129 self.run_ssh(box,command,None)
131 pids=self.backquote_ssh(box,command,True)
133 self.header ('No build process on %s (%s)'%(box,uptime))
135 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
136 self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True)
138 # this one is more accurate as it locates processes in the vservers as well
139 # but it's so sloooowww
140 def handle_build_box_deep (self,box):
141 if not self.options.probe:
145 uptime=self.backquote_ssh(box,command,True).strip()
148 if self.options.dry_run:
149 self.run_ssh(box,command,None)
151 # simulate grep vbuild
152 vps_lines=[ line for line in self.backquote_ssh(box,command,True).split("\n")
153 if line.find('vbuild') >= 0]
154 pids=[ line.split()[0] for line in vps_lines ]
156 self.header ('No build process on %s (%s)'%(box,uptime))
158 command=['vps','-o','pid,command'] + pids
159 self.run_ssh(box,command,"Active build processes on %s (%s)"%(box,uptime),True)
162 vplc_matcher = re.compile(".*(vplc[0-9]+$)")
163 def vplcname (self, vservername):
164 match = self.vplc_matcher.match(vservername)
165 if match: return match.groups(0)
168 margin_format="%-14s"
169 def margin(self,string): return self.margin_format%string
170 def outline (self, string): return '== %s =='%string
171 def margin_outline (self, string): return self.margin(self.outline(string))
173 def handle_plc_box (self,box):
174 if not self.options.probe:
177 command=['vserver-stat']
178 if self.options.dry_run:
179 self.run_ssh(box,command,"Active vservers on %s"%box)
181 # try to find fullname (vserver_stat truncates to a ridiculously short name)
183 self.header ("vserver map on %s"%box)
184 # fetch the contexts for all vservers on that box
185 map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
186 context_map=self.backquote_ssh (box,map_command)
187 # at this point we have a set of lines like
188 # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
190 for map_line in context_map.split("\n"):
191 if not map_line: continue
192 [path,xid] = map_line.split(':')
193 ctx_dict[xid]=os.path.basename(os.path.dirname(path))
194 # at this point ctx_id maps context id to vservername
196 vserver_stat = self.backquote_ssh (box,command)
197 for vserver_line in vserver_stat.split("\n"):
198 if not vserver_line: continue
199 context=vserver_line.split()[0]
201 print self.margin(""),vserver_line
203 longname=ctx_dict[context]
204 print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
206 self.run_ssh(box,command,"Fine-grained method failed - fallback to plain vserver-stat")
208 vnode_matcher = re.compile(".*(vnode[0-9]+)")
209 def vnodename (self, ps_line):
210 match = self.vnode_matcher.match(ps_line)
211 if match: return match.groups(0)
214 def handle_qemu_box (self,box):
215 if not self.options.probe:
219 modules=self.backquote_ssh(box,command).split('\n')
220 kqemu_msg='*NO kqemu MODULE LOADED*'
221 for module in modules:
222 if module.find('kqemu')==0:
225 command=['pgrep','qemu']
226 if self.options.dry_run:
227 self.run_ssh(box,command,None)
229 pids=self.backquote_ssh(box,command)
231 self.header ('No qemu process on %s'%box)
233 self.header ("Active qemu processes on %s (%s)"%(box,kqemu_msg))
234 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
235 ps_lines = self.backquote_ssh (box,command).split("\n")
236 for ps_line in ps_lines:
237 if not ps_line or ps_line.find('PID') >=0 : continue
238 print self.margin_outline(self.vnodename(ps_line)), ps_line
240 # the ouput of ps -o pid,command gives us <pid> bash <buildname>/run_log
241 def testmaster_buildname (self, ps_line):
242 chunks=ps_line.split()
244 [buildname,command]=path.split('/')
247 def handle_testmaster_box (self, box):
248 if not self.options.probe:
251 command=['pgrep','run_log']
252 if self.options.dry_run:
253 self.run_ssh(box,command,None)
255 pids=self.backquote_ssh(box,command)
257 self.header ('No run_log process on %s'%box)
259 self.header ("Active run_log processes on %s"%(box))
260 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
261 ps_lines = self.backquote_ssh (box,command).split("\n")
262 for ps_line in ps_lines:
263 if not ps_line or ps_line.find('PID') >=0 : continue
264 print self.margin_outline(self.testmaster_buildname(ps_line)), ps_line
267 def handle_box(self,box,type):
268 if box in self.qemu_boxes:
269 if type=="qemu": self.handle_qemu_box(self.fqdn(box))
270 elif box in self.plc_boxes:
271 if type=="plc": self.handle_plc_box(self.fqdn(box))
272 elif box in self.testmaster_boxes:
273 if type=='testmaster': self.handle_testmaster_box(self.fqdn(box))
275 if self.options.deep:
276 self.handle_build_box_deep(self.fqdn(box))
278 self.handle_build_box(self.fqdn(box))
280 def handle_disk (self,box):
282 return self.run_ssh(box,["df","-h",],"Disk space on %s"%box)
285 usage="""%prog [options] [hostname..(s)]
286 Default is to act on test boxes only"""
287 parser = OptionParser (usage=usage)
288 parser.add_option ("-n","--dry-run",action="store_true",dest="dry_run",default=False,
290 parser.add_option ("-r","--reboot", action="store_false",dest="probe",default=True,
291 help="Actually reset/reboot stuff instead of just probing it")
292 # no need for -p = probe, as this is the default
293 parser.add_option ("-p","--plc", action="store_true",dest="plc_only",default=False,
294 help="Acts on the plc box only")
296 parser.add_option ("-a","--all",action="store_true",dest="all_boxes",default=False,
297 help="Acts on build and test boxes")
298 parser.add_option ("-b","--build",action="store_true",dest="build_only",default=False,
299 help="Acts on build boxes only")
300 parser.add_option ("-e","--deep",action="store_true", dest="deep", default=False,
301 help="on build boxes, shows vbuild processes in vservers as well; signif. slower")
302 parser.add_option ("-q","--qemu",action="store_true",dest="qemu_only",default=False,
303 help="Only acts on the qemu boxes")
304 parser.add_option ("-t","--trackers",action="store_true",dest="trackers_only",default=False,
305 help="Only wipes trackers")
306 parser.add_option ("-m","--master",action="store_true",dest="testmaster_only",default=False,
307 help="Display the testmaster status")
308 parser.add_option ("-d","--disk",action="store_true",dest="show_disk",default=False,
309 help="Only inspects disk status")
311 (self.options,args) = parser.parse_args()
313 # use given hostnames if provided
316 # if hostnames are specified, let's stay on the safe side and don't reset trackers
317 self.do_tracker_plcs = False
318 self.do_tracker_qemus = False
319 elif self.options.all_boxes:
320 self.boxes=self.test_boxes + self.build_boxes + self.testmaster_boxes
321 self.do_tracker_plcs = True
322 self.do_tracker_qemus = True
323 elif self.options.build_only:
324 self.boxes=self.build_boxes
325 self.do_tracker_plcs = False
326 self.do_tracker_qemus = False
327 elif self.options.qemu_only:
328 self.boxes=self.qemu_boxes
329 self.do_tracker_plcs = False
330 self.do_tracker_qemus = True
331 elif self.options.plc_only:
332 self.boxes=self.plc_boxes
333 self.do_tracker_plcs = True
334 self.do_tracker_qemus = False
335 elif self.options.testmaster_only:
336 self.boxes=self.testmaster_boxes
337 self.do_tracker_plcs = False
338 self.do_tracker_qemus = False
339 elif self.options.trackers_only:
341 self.do_tracker_plcs = True
342 self.do_tracker_qemus = True
345 self.boxes = self.test_boxes
346 self.do_tracker_plcs = True
347 self.do_tracker_qemus = True
349 if self.options.show_disk:
350 for box in self.boxes: self.handle_disk(box)
354 if self.do_tracker_plcs:self.handle_tracker_plcs ()
355 for box in self.boxes: self.handle_box (box,"plc")
357 if self.do_tracker_qemus:self.handle_tracker_qemus ()
358 for box in self.boxes: self.handle_box (box,"qemu")
360 for box in self.boxes: self.handle_box (box,"build")
362 for box in self.boxes: self.handle_box (box,"testmaster")
364 if __name__ == "__main__":