2 # Thierry Parmentelat <thierry.parmentelat@inria.fr>
3 # Copyright (C) 2010 INRIA
5 # #################### history
7 # see also Substrate.readme
9 # This is a complete rewrite of TestResources/Tracker/Pool
10 # we don't use trackers anymore and just probe/sense the running
11 # boxes to figure out where we are
12 # in order to implement some fairness in the round-robin allocation scheme
13 # we need an indication of the 'age' of each running entity,
14 # hence the 'timestamp-*' steps in TestPlc
16 # this should be much more flexible:
17 # * supports several plc boxes
18 # * supports several qemu guests per host
19 # * no need to worry about tracker being in sync or not
21 # #################### howto use
23 # each site is to write its own LocalSubstrate.py,
24 # (see e.g. LocalSubstrate.inria.py)
25 # LocalSubstrate.py is expected to be in /root on the testmaster box
28 # . the vserver-capable boxes used for hosting myplcs
29 # . and their admissible load (max # of myplcs)
30 # . the pool of DNS-names and IP-addresses available for myplcs
32 # . the kvm-qemu capable boxes to host qemu instances
33 # . and their admissible load (max # of myplcs)
34 # . the pool of DNS-names and IP-addresses available for nodes
36 # #################### implem. note
38 # this model relies on 'sensing' the substrate,
39 # i.e. probing all the boxes for their running instances of vservers and qemu
40 # this is how we get rid of tracker inconsistencies
41 # however there is a 'black hole' between the time where a given address is
42 # allocated and when it actually gets used/pingable
43 # this is why we still need a shared knowledge among running tests
44 # in a file named /root/starting
45 # this is connected to the Pool class
47 # ####################
56 from optparse import OptionParser
59 from TestSsh import TestSsh
60 from TestMapper import TestMapper
62 def header (message,banner=True):
63 if not message: return
64 if banner: print "===============",
68 def timestamp_sort(o1,o2): return o1.timestamp-o2.timestamp
70 def short_hostname (hostname):
71 return hostname.split('.')[0]
74 # allows to pick an available IP among a pool
75 # input is expressed as a list of tuples (hostname,ip,user_data)
76 # that can be searched iteratively for a free slot
78 # pool = [ (hostname1,user_data1),
79 # (hostname2,user_data2),
80 # (hostname3,user_data2),
81 # (hostname4,user_data4) ]
82 # assuming that ip1 and ip3 are taken (pingable), then we'd get
84 # pool.next_free() -> entry2
85 # pool.next_free() -> entry4
86 # pool.next_free() -> None
87 # that is, even if ip2 is not busy/pingable when the second next_free() is issued
90 def __init__ (self,hostname,userdata):
91 self.hostname=hostname
92 self.userdata=userdata
93 # slot holds 'busy' or 'free' or 'mine' or 'starting' or None
94 # 'mine' is for our own stuff, 'starting' from the concurrent tests
99 return "Pooled %s (%s) -> %s"%(self.hostname,self.userdata, self.status)
102 if self.status==None: return '?'
103 elif self.status=='busy': return '+'
104 elif self.status=='free': return '-'
105 elif self.status=='mine': return 'M'
106 elif self.status=='starting': return 'S'
109 if self.ip: return self.ip
110 ip=socket.gethostbyname(self.hostname)
116 def __init__ (self, tuples,message):
117 self.pool_items= [ PoolItem (hostname,userdata) for (hostname,userdata) in tuples ]
121 for i in self.pool_items: print i.line()
125 for i in self.pool_items: line += ' ' + i.char()
128 def _item (self, hostname):
129 for i in self.pool_items:
130 if i.hostname==hostname: return i
131 raise Exception ("Could not locate hostname %s in pool %s"%(hostname,self.message))
133 def retrieve_userdata (self, hostname):
134 return self._item(hostname).userdata
136 def get_ip (self, hostname):
137 try: return self._item(hostname).get_ip()
138 except: return socket.gethostbyname(hostname)
140 def set_mine (self, hostname):
142 self._item(hostname).status='mine'
144 print 'WARNING: host %s not found in IP pool %s'%(hostname,self.message)
146 def next_free (self):
147 for i in self.pool_items:
148 if i.status == 'free':
150 return (i.hostname,i.userdata)
153 # the place were other test instances tell about their not-yet-started
154 # instances, that go undetected through sensing
155 starting='/root/starting'
156 def add_starting (self, name):
157 try: items=[line.strip() for line in file(Pool.starting).readlines()]
159 if not name in items:
160 file(Pool.starting,'a').write(name+'\n')
161 for i in self.pool_items:
162 if i.hostname==name: i.status='mine'
164 # we load this after actual sensing;
165 def load_starting (self):
166 try: items=[line.strip() for line in file(Pool.starting).readlines()]
168 for i in self.pool_items:
169 if i.hostname in items:
170 if i.status=='free' : i.status='starting'
172 def release_my_starting (self):
173 for i in self.pool_items:
175 self.del_starting(i.hostname)
178 def del_starting (self, name):
179 try: items=[line.strip() for line in file(Pool.starting).readlines()]
182 f=file(Pool.starting,'w')
184 if item != name: f.write(item+'\n')
189 for item in self.pool_items:
190 if item.status is not None:
193 if self.check_ping (item.hostname):
201 print 'Sensing IP pool',self.message,
205 print 'After starting: IP pool'
208 # OS-dependent ping option (support for macos, for convenience)
209 ping_timeout_option = None
210 # returns True when a given hostname/ip responds to ping
211 def check_ping (self,hostname):
212 if not Pool.ping_timeout_option:
213 (status,osname) = commands.getstatusoutput("uname -s")
215 raise Exception, "TestPool: Cannot figure your OS name"
216 if osname == "Linux":
217 Pool.ping_timeout_option="-w"
218 elif osname == "Darwin":
219 Pool.ping_timeout_option="-t"
221 command="ping -c 1 %s 1 %s"%(Pool.ping_timeout_option,hostname)
222 (status,output) = commands.getstatusoutput(command)
227 def __init__ (self,hostname):
228 self.hostname=hostname
229 def shortname (self):
230 return short_hostname(self.hostname)
231 def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False)
232 def reboot (self, options):
233 self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname,
234 dry_run=options.dry_run)
237 if hasattr(self,'_uptime') and self._uptime: return self._uptime
238 return '*undef* uptime'
239 def sense_uptime (self):
241 self._uptime=self.backquote_ssh(command,trash_err=True).strip()
242 if not self._uptime: self._uptime='unreachable'
244 def run(self,argv,message=None,trash_err=False,dry_run=False):
252 return subprocess.call(argv)
254 return subprocess.call(argv,stderr=file('/dev/null','w'))
256 def run_ssh (self, argv, message, trash_err=False, dry_run=False):
257 ssh_argv = self.test_ssh().actual_argv(argv)
258 result=self.run (ssh_argv, message, trash_err, dry_run=dry_run)
260 print "WARNING: failed to run %s on %s"%(" ".join(argv),self.hostname)
263 def backquote (self, argv, trash_err=False):
265 result= subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
267 result= subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
270 def backquote_ssh (self, argv, trash_err=False):
271 # first probe the ssh link
272 probe_argv=self.test_ssh().actual_argv(['hostname'])
273 hostname=self.backquote ( probe_argv, trash_err=True )
275 print "root@%s unreachable"%self.hostname
278 return self.backquote( self.test_ssh().actual_argv(argv), trash_err)
280 ############################################################
282 def __init__ (self, buildname, pid, buildbox):
283 self.buildname=buildname
284 self.buildbox=buildbox
287 def add_pid(self,pid):
288 self.pids.append(pid)
291 return "== %s == (pids=%r)"%(self.buildname,self.pids)
293 class BuildBox (Box):
294 def __init__ (self,hostname):
295 Box.__init__(self,hostname)
296 self.build_instances=[]
298 def add_build (self,buildname,pid):
299 for build in self.build_instances:
300 if build.buildname==buildname:
303 self.build_instances.append(BuildInstance(buildname, pid, self))
306 if not self.build_instances:
307 header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
309 header ("Builds on %s (%s)"%(self.hostname,self.uptime()))
310 for b in self.build_instances:
311 header (b.line(),banner=False)
313 def reboot (self, options):
317 command=['pkill','vbuild']
318 self.run_ssh(command,"Terminating vbuild processes",dry_run=options.dry_run)
320 # inspect box and find currently running builds
321 matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
322 def sense(self,options):
325 pids=self.backquote_ssh(['pgrep','vbuild'],trash_err=True)
327 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
328 ps_lines=self.backquote_ssh (command).split('\n')
329 for line in ps_lines:
330 if not line.strip() or line.find('PID')>=0: continue
331 m=BuildBox.matcher.match(line)
333 date=time.strftime('%Y-%m-%d',time.localtime(time.time()))
334 buildname=m.group('buildname').replace('@DATE@',date)
335 self.add_build (buildname,m.group('pid'))
336 else: header('command %r returned line that failed to match'%command)
338 ############################################################
340 def __init__ (self, vservername, ctxid, plcbox):
341 self.vservername=vservername
347 def set_timestamp (self,timestamp): self.timestamp=timestamp
348 def set_now (self): self.timestamp=int(time.time())
349 def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
352 return self.vservername.split('-')[-1]
353 def buildname (self):
354 return self.vservername.rsplit('-',2)[0]
357 msg="== %s =="%(self.vplcname())
358 msg += " [=%s]"%self.vservername
359 if self.ctxid==0: msg+=" not (yet?) running"
360 else: msg+=" (ctx=%s)"%self.ctxid
361 if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
362 else: msg += " *unknown timestamp*"
366 msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname)
367 self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg)
368 self.plc_box.forget(self)
371 def __init__ (self, hostname, max_plcs):
372 Box.__init__(self,hostname)
373 self.plc_instances=[]
374 self.max_plcs=max_plcs
376 def add_vserver (self,vservername,ctxid):
377 for plc in self.plc_instances:
378 if plc.vservername==vservername:
379 header("WARNING, duplicate myplc %s running on %s"%\
380 (vservername,self.hostname),banner=False)
382 self.plc_instances.append(PlcInstance(vservername,ctxid,self))
384 def forget (self, plc_instance):
385 self.plc_instances.remove(plc_instance)
387 # fill one slot even though this one is not started yet
388 def add_dummy (self, plcname):
389 dummy=PlcInstance('dummy_'+plcname,0,self)
391 self.plc_instances.append(dummy)
394 msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname())
398 if not self.plc_instances:
399 header ('No vserver running on %s'%(self.line()))
401 header ("Active plc VMs on %s"%self.line())
402 self.plc_instances.sort(timestamp_sort)
403 for p in self.plc_instances:
404 header (p.line(),banner=False)
406 def free_spots (self):
407 return self.max_plcs - len(self.plc_instances)
410 if hasattr(self,'_uname') and self._uname: return self._uname
411 return '*undef* uname'
413 def plc_instance_by_vservername (self, vservername):
414 for p in self.plc_instances:
415 if p.vservername==vservername: return p
418 def reboot (self, options):
422 self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers",
423 dry_run=options.dry_run)
425 def sense (self, options):
427 self._uname=self.backquote_ssh(['uname','-r']).strip()
428 # try to find fullname (vserver_stat truncates to a ridiculously short name)
429 # fetch the contexts for all vservers on that box
430 map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
431 context_map=self.backquote_ssh (map_command)
432 # at this point we have a set of lines like
433 # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
435 for map_line in context_map.split("\n"):
436 if not map_line: continue
437 [path,xid] = map_line.split(':')
438 ctx_dict[xid]=os.path.basename(os.path.dirname(path))
439 # at this point ctx_id maps context id to vservername
441 command=['vserver-stat']
442 vserver_stat = self.backquote_ssh (command)
443 for vserver_line in vserver_stat.split("\n"):
444 if not vserver_line: continue
445 context=vserver_line.split()[0]
446 if context=="CTX": continue
447 longname=ctx_dict[context]
448 self.add_vserver(longname,context)
449 # print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
452 running_vsnames = [ i.vservername for i in self.plc_instances ]
453 command= ['grep','.']
454 command += ['/vservers/%s.timestamp'%vs for vs in running_vsnames]
455 command += ['/dev/null']
456 ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
457 for ts_line in ts_lines:
458 if not ts_line.strip(): continue
459 # expect /vservers/<vservername>.timestamp:<timestamp>
461 (ts_file,timestamp)=ts_line.split(':')
462 ts_file=os.path.basename(ts_file)
463 (vservername,_)=os.path.splitext(ts_file)
464 timestamp=int(timestamp)
465 p=self.plc_instance_by_vservername(vservername)
467 print 'WARNING zombie plc',self.hostname,ts_line
468 print '... was expecting',vservername,'in',[i.vservername for i in self.plc_instances]
470 p.set_timestamp(timestamp)
471 except: print 'WARNING, could not parse ts line',ts_line
476 ############################################################
478 def __init__ (self, nodename, pid, qemubox):
479 self.nodename=nodename
481 self.qemu_box=qemubox
486 def set_buildname (self,buildname): self.buildname=buildname
487 def set_timestamp (self,timestamp): self.timestamp=timestamp
488 def set_now (self): self.timestamp=int(time.time())
489 def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
492 msg = "== %s =="%(short_hostname(self.nodename))
493 msg += " [=%s]"%self.buildname
494 if self.pid: msg += " (pid=%s)"%self.pid
495 else: msg += " not (yet?) running"
496 if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
497 else: msg += " *unknown timestamp*"
502 print "cannot kill qemu %s with pid==0"%self.nodename
504 msg="Killing qemu %s with pid=%s on box %s"%(self.nodename,self.pid,self.qemu_box.hostname)
505 self.qemu_box.run_ssh(['kill',"%s"%self.pid],msg)
506 self.qemu_box.forget(self)
510 def __init__ (self, hostname, max_qemus):
511 Box.__init__(self,hostname)
512 self.qemu_instances=[]
513 self.max_qemus=max_qemus
515 def add_node (self,nodename,pid):
516 for qemu in self.qemu_instances:
517 if qemu.nodename==nodename:
518 header("WARNING, duplicate qemu %s running on %s"%\
519 (nodename,self.hostname), banner=False)
521 self.qemu_instances.append(QemuInstance(nodename,pid,self))
523 def forget (self, qemu_instance):
524 self.qemu_instances.remove(qemu_instance)
526 # fill one slot even though this one is not started yet
527 def add_dummy (self, nodename):
528 dummy=QemuInstance('dummy_'+nodename,0,self)
530 self.qemu_instances.append(dummy)
533 msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver())
537 if not self.qemu_instances:
538 header ('No qemu process on %s'%(self.line()))
540 header ("Active qemu processes on %s"%(self.line()))
541 self.qemu_instances.sort(timestamp_sort)
542 for q in self.qemu_instances:
543 header (q.line(),banner=False)
545 def free_spots (self):
546 return self.max_qemus - len(self.qemu_instances)
549 if hasattr(self,'_driver') and self._driver: return self._driver
550 return '*undef* driver'
552 def qemu_instance_by_pid (self,pid):
553 for q in self.qemu_instances:
554 if q.pid==pid: return q
557 def qemu_instance_by_nodename_buildname (self,nodename,buildname):
558 for q in self.qemu_instances:
559 if q.nodename==nodename and q.buildname==buildname:
563 def reboot (self, options):
567 self.run_ssh(['pkill','qemu'],"Killing qemu instances",
568 dry_run=options.dry_run)
570 matcher=re.compile("\s*(?P<pid>[0-9]+).*-cdrom\s+(?P<nodename>[^\s]+)\.iso")
571 def sense(self, options):
573 modules=self.backquote_ssh(['lsmod']).split('\n')
574 self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
575 for module in modules:
576 if module.find('kqemu')==0:
577 self._driver='kqemu module loaded'
578 # kvm might be loaded without vkm_intel (we dont have AMD)
579 elif module.find('kvm_intel')==0:
580 self._driver='kvm_intel module loaded'
581 ########## find out running pids
582 pids=self.backquote_ssh(['pgrep','qemu'])
584 command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
585 ps_lines = self.backquote_ssh (command).split("\n")
586 for line in ps_lines:
587 if not line.strip() or line.find('PID') >=0 : continue
588 m=QemuBox.matcher.match(line)
589 if m: self.add_node (m.group('nodename'),m.group('pid'))
590 else: header('command %r returned line that failed to match'%command)
591 ########## retrieve alive instances and map to build
593 command=['grep','.','*/*/qemu.pid','/dev/null']
594 pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
595 for pid_line in pid_lines:
596 if not pid_line.strip(): continue
597 # expect <build>/<nodename>/qemu.pid:<pid>pid
599 (buildname,nodename,tail)=pid_line.split('/')
600 (_,pid)=tail.split(':')
601 q=self.qemu_instance_by_pid (pid)
603 q.set_buildname(buildname)
604 live_builds.append(buildname)
605 except: print 'WARNING, could not parse pid line',pid_line
606 # retrieve timestamps
607 command= ['grep','.']
608 command += ['%s/*/timestamp'%b for b in live_builds]
609 command += ['/dev/null']
610 ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
611 for ts_line in ts_lines:
612 if not ts_line.strip(): continue
613 # expect <build>/<nodename>/timestamp:<timestamp>
615 (buildname,nodename,tail)=ts_line.split('/')
616 nodename=nodename.replace('qemu-','')
617 (_,timestamp)=tail.split(':')
618 timestamp=int(timestamp)
619 q=self.qemu_instance_by_nodename_buildname(nodename,buildname)
621 print 'WARNING zombie qemu',self.hostname,ts_line
622 print '... was expecting (',short_hostname(nodename),buildname,') in',\
623 [ (short_hostname(i.nodename),i.buildname) for i in self.qemu_instances ]
625 q.set_timestamp(timestamp)
626 except: print 'WARNING, could not parse ts line',ts_line
630 def __init__ (self, pid, buildname):
632 self.buildname=buildname
637 def add_pid (self,pid):
638 self.pids.append(pid)
639 def set_broken (self,plcindex, step):
640 self.broken_steps.append ( (plcindex, step,) )
643 msg = " == %s =="%self.buildname
644 msg += " (pid=%s)"%(self.pids)
645 if self.broken_steps:
646 msg += "\n BROKEN IN STEPS "
647 for (i,s) in self.broken_steps: msg += "step=%s,plc=%s"%(s,i)
651 def __init__ (self,hostname):
652 Box.__init__(self,hostname)
654 self.test_instances=[]
656 def reboot (self, options):
657 # can't reboot a vserver VM
658 self.run_ssh (['pkill','run_log'],"Terminating current runs",
659 dry_run=options.dry_run)
660 self.run_ssh (['rm','-f','/root/starting'],"Cleaning /root/starting",
661 dry_run=options.dry_run)
663 def get_test (self, buildname):
664 for i in self.test_instances:
665 if i.buildname==buildname: return i
667 def add_test (self, pid, buildname):
668 i=self.get_test(buildname)
670 print "WARNING: 2 concurrent tests run on same build %s"%buildname
673 self.test_instances.append (TestInstance (pid,buildname))
675 matcher_proc=re.compile (".*/proc/(?P<pid>[0-9]+)/cwd.*/root/(?P<buildname>[^/]+)$")
676 matcher_grep=re.compile ("/root/(?P<buildname>[^/]+)/logs/trace:TRACE:\s*(?P<plcindex>[0-9]+).*step=(?P<step>\S+).*")
677 def sense (self, options):
680 self.starting_ips=self.backquote_ssh(['cat','/root/starting']).strip().split('\n')
681 pids = self.backquote_ssh (['pgrep','run_log'],trash_err=True)
683 command=['ls','-ld'] + ["/proc/%s/cwd"%pid for pid in pids.split("\n") if pid]
684 ps_lines=self.backquote_ssh (command).split('\n')
685 for line in ps_lines:
686 if not line.strip(): continue
687 m=TestBox.matcher_proc.match(line)
690 buildname=m.group('buildname')
691 self.add_test(pid, buildname)
692 else: header("command %r returned line that failed to match\n%s"%(command,line))
693 buildnames=[i.buildname for i in self.test_instances]
694 if not buildnames: return
696 # messy - tail has different output if one or several args
697 # command=['tail','-n','1'] + [ "/root/%s/logs/trace"%b for b in buildnames ]
698 # trace_lines=self.backquote_ssh (command).split('\n\n')
700 # for line in trace_lines:
701 # if not line.strip(): continue
702 # print 'line [[[%s]]]'%line
703 command=['grep','KO'] + [ "/root/%s/logs/trace"%b for b in buildnames ] + [ "/dev/null" ]
704 trace_lines=self.backquote_ssh (command).split('\n')
705 for line in trace_lines:
706 if not line.strip(): continue
707 m=TestBox.matcher_grep.match(line)
709 buildname=m.group('buildname')
710 plcindex=m.group('plcindex')
712 self.get_test(buildname).set_broken (plcindex, step)
713 else: header("command %r returned line that failed to match\n%s"%(command,line))
718 return "%s (%s)"%(self.hostname,self.uptime())
721 if not self.starting_ips:
722 header ("No starting IP addresses on %s"%self.line())
724 header ("IP addresses currently starting up on %s"%self.line())
725 self.starting_ips.sort()
726 for starting in self.starting_ips: print starting
727 if not self.test_instances:
728 header ("No running tests on %s"%self.line())
730 header ("Running tests on %s"%self.line())
731 for i in self.test_instances: print i.line()
733 ############################################################
739 self.options=Options()
740 self.options.dry_run=False
741 self.options.verbose=False
742 self.options.reboot=False
743 self.options.soft=False
744 self.test_box = TestBox (self.test_box_spec())
745 self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
746 self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()]
747 self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
748 self.all_boxes = self.plc_boxes + self.qemu_boxes
751 self.vplc_pool = Pool (self.vplc_ips(),"for vplcs")
752 self.vnode_pool = Pool (self.vnode_ips(),"for vnodes")
754 def fqdn (self, hostname):
755 if hostname.find('.')<0: return "%s.%s"%(hostname,self.domain())
758 # return True if actual sensing takes place
759 def sense (self,force=False):
760 if self._sensed and not force: return False
761 print 'Sensing local substrate...',
762 for b in self.all_boxes: b.sense(self.options)
767 def add_dummy_plc (self, plc_boxname, plcname):
768 for pb in self.plc_boxes:
769 if pb.hostname==plc_boxname:
770 pb.add_dummy(plcname)
771 def add_dummy_qemu (self, qemu_boxname, qemuname):
772 for qb in self.qemu_boxes:
773 if qb.hostname==qemu_boxname:
774 qb.add_dummy(qemuname)
777 def provision (self,plcs,options):
779 # attach each plc to a plc box and an IP address
780 plcs = [ self.provision_plc (plc,options) for plc in plcs ]
781 # attach each node/qemu to a qemu box with an IP address
782 plcs = [ self.provision_qemus (plc,options) for plc in plcs ]
783 # update the SFA spec accordingly
784 plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ]
787 print '* Could not provision this test on current substrate','--',e,'--','exiting'
788 traceback.print_exc()
791 # it is expected that a couple of options like ips_bplc and ips_vplc
792 # are set or unset together
794 def check_options (x,y):
795 if not x and not y: return True
796 return len(x)==len(y)
798 # find an available plc box (or make space)
799 # and a free IP address (using options if present)
800 def provision_plc (self, plc, options):
802 assert Substrate.check_options (options.ips_bplc, options.ips_vplc)
804 #### let's find an IP address for that plc
808 # we don't check anything here,
809 # it is the caller's responsability to cleanup and make sure this makes sense
810 plc_boxname = options.ips_bplc.pop()
811 vplc_hostname=options.ips_vplc.pop()
813 if self.sense(): self.list_all()
816 # try to find an available IP
817 self.vplc_pool.sense()
818 couple=self.vplc_pool.next_free()
820 (vplc_hostname,unused)=couple
821 #### we need to find one plc box that still has a slot
823 # use the box that has max free spots for load balancing
824 for pb in self.plc_boxes:
827 plc_boxname=pb.hostname
829 # if there's no available slot in the plc_boxes, or we need a free IP address
830 # make space by killing the oldest running instance
831 if not plc_boxname or not vplc_hostname:
832 # find the oldest of all our instances
833 all_plc_instances=reduce(lambda x, y: x+y,
834 [ pb.plc_instances for pb in self.plc_boxes ],
836 all_plc_instances.sort(timestamp_sort)
838 plc_instance_to_kill=all_plc_instances[0]
841 if not plc_boxname: msg += " PLC boxes are full"
842 if not vplc_hostname: msg += " vplc IP pool exhausted"
843 raise Exception,"Could not make space for a PLC instance:"+msg
844 freed_plc_boxname=plc_instance_to_kill.plc_box.hostname
845 freed_vplc_hostname=plc_instance_to_kill.vplcname()
846 message='killing oldest plc instance = %s on %s'%(plc_instance_to_kill.line(),
848 plc_instance_to_kill.kill()
849 # use this new plcbox if that was the problem
851 plc_boxname=freed_plc_boxname
852 # ditto for the IP address
853 if not vplc_hostname:
854 vplc_hostname=freed_vplc_hostname
855 # record in pool as mine
856 self.vplc_pool.set_mine(vplc_hostname)
859 self.add_dummy_plc(plc_boxname,plc['name'])
860 vplc_ip = self.vplc_pool.get_ip(vplc_hostname)
861 self.vplc_pool.add_starting(vplc_hostname)
863 #### compute a helpful vserver name
864 # remove domain in hostname
865 vplc_short = short_hostname(vplc_hostname)
866 vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_short)
867 plc_name = "%s_%s"%(plc['name'],vplc_short)
869 utils.header( 'PROVISION plc %s in box %s at IP %s as %s'%\
870 (plc['name'],plc_boxname,vplc_hostname,vservername))
872 #### apply in the plc_spec
874 # label=options.personality.replace("linux","")
875 mapper = {'plc': [ ('*' , {'host_box':plc_boxname,
876 # 'name':'%s-'+label,
878 'vservername':vservername,
880 'PLC_DB_HOST':vplc_hostname,
881 'PLC_API_HOST':vplc_hostname,
882 'PLC_BOOT_HOST':vplc_hostname,
883 'PLC_WWW_HOST':vplc_hostname,
884 'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
885 'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
890 # mappers only work on a list of plcs
891 return TestMapper([plc],options).map(mapper)[0]
894 def provision_qemus (self, plc, options):
896 assert Substrate.check_options (options.ips_bnode, options.ips_vnode)
898 test_mapper = TestMapper ([plc], options)
899 nodenames = test_mapper.node_names()
901 for nodename in nodenames:
903 if options.ips_vnode:
904 # as above, it's a rerun, take it for granted
905 qemu_boxname=options.ips_bnode.pop()
906 vnode_hostname=options.ips_vnode.pop()
908 if self.sense(): self.list_all()
911 # try to find an available IP
912 self.vnode_pool.sense()
913 couple=self.vnode_pool.next_free()
915 (vnode_hostname,unused)=couple
916 # find a physical box
918 # use the box that has max free spots for load balancing
919 for qb in self.qemu_boxes:
922 qemu_boxname=qb.hostname
924 # if we miss the box or the IP, kill the oldest instance
925 if not qemu_boxname or not vnode_hostname:
926 # find the oldest of all our instances
927 all_qemu_instances=reduce(lambda x, y: x+y,
928 [ qb.qemu_instances for qb in self.qemu_boxes ],
930 all_qemu_instances.sort(timestamp_sort)
932 qemu_instance_to_kill=all_qemu_instances[0]
935 if not qemu_boxname: msg += " QEMU boxes are full"
936 if not vnode_hostname: msg += " vnode IP pool exhausted"
937 raise Exception,"Could not make space for a QEMU instance:"+msg
938 freed_qemu_boxname=qemu_instance_to_kill.qemu_box.hostname
939 freed_vnode_hostname=short_hostname(qemu_instance_to_kill.nodename)
941 message='killing oldest qemu node = %s on %s'%(qemu_instance_to_kill.line(),
943 qemu_instance_to_kill.kill()
944 # use these freed resources where needed
946 qemu_boxname=freed_qemu_boxname
947 if not vnode_hostname:
948 vnode_hostname=freed_vnode_hostname
949 self.vnode_pool.set_mine(vnode_hostname)
951 self.add_dummy_qemu (qemu_boxname,nodename)
952 mac=self.vnode_pool.retrieve_userdata(vnode_hostname)
953 ip=self.vnode_pool.get_ip (vnode_hostname)
954 self.vnode_pool.add_starting(vnode_hostname)
956 vnode_fqdn = self.fqdn(vnode_hostname)
957 nodemap={'host_box':qemu_boxname,
958 'node_fields:hostname':vnode_fqdn,
959 'interface_fields:ip':ip,
960 'interface_fields:mac':mac,
962 nodemap.update(self.network_settings())
963 maps.append ( (nodename, nodemap) )
965 utils.header("PROVISION node %s in box %s at IP %s with MAC %s"%\
966 (nodename,qemu_boxname,vnode_hostname,mac))
968 return test_mapper.map({'node':maps})[0]
970 def localize_sfa_rspec (self,plc,options):
972 plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST']
973 plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST']
974 plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
975 plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
976 plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/'
977 for site in plc['sites']:
978 for node in site['nodes']:
979 plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
982 #################### release:
983 def release (self,options):
984 self.vplc_pool.release_my_starting()
985 self.vnode_pool.release_my_starting()
988 #################### show results for interactive mode
989 def get_box (self,boxname):
990 for b in self.build_boxes + self.plc_boxes + self.qemu_boxes + [self.test_box] :
991 if b.shortname()==boxname:
993 print "Could not find box %s"%boxname
996 def list_boxes(self,boxes):
1001 b.sense(self.options)
1008 def reboot_boxes(self,boxes):
1012 b.reboot(self.options)
1014 ####################
1015 # can be run as a utility to manage the local infrastructure
1017 parser=OptionParser()
1018 parser.add_option ('-r',"--reboot",action='store_true',dest='reboot',default=False,
1019 help='reboot mode (use shutdown -r)')
1020 parser.add_option ('-s',"--soft",action='store_true',dest='soft',default=False,
1021 help='soft mode for reboot (vserver stop or kill qemus)')
1022 parser.add_option ('-t',"--testbox",action='store_true',dest='testbox',default=False,
1023 help='add test box')
1024 parser.add_option ('-b',"--build",action='store_true',dest='builds',default=False,
1025 help='add build boxes')
1026 parser.add_option ('-p',"--plc",action='store_true',dest='plcs',default=False,
1027 help='add plc boxes')
1028 parser.add_option ('-q',"--qemu",action='store_true',dest='qemus',default=False,
1029 help='add qemu boxes')
1030 parser.add_option ('-v',"--verbose",action='store_true',dest='verbose',default=False,
1031 help='verbose mode')
1032 parser.add_option ('-n',"--dry_run",action='store_true',dest='dry_run',default=False,
1033 help='dry run mode')
1034 (self.options,args)=parser.parse_args()
1037 if self.options.testbox: boxes += [self.test_box.hostname]
1038 if self.options.builds: boxes += [b.hostname for b in self.build_boxes]
1039 if self.options.plcs: boxes += [b.hostname for b in self.plc_boxes]
1040 if self.options.qemus: boxes += [b.hostname for b in self.qemu_boxes]
1041 boxes=list(set(boxes))
1045 boxes = [ b.hostname for b in \
1046 self.build_boxes + [ self.test_box ] + \
1047 self.plc_boxes + self.qemu_boxes ]
1049 if self.options.reboot: self.reboot_boxes (boxes)
1050 else: self.list_boxes (boxes)