does not need to trigger the initscript, nodemanager now handles that
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import commands
16 import resource
17
18 import vserverimpl
19 import cpulimit, bwlimit
20
21 from vserverimpl import DLIMIT_INF
22 from vserverimpl import VC_LIM_KEEP
23 from vserverimpl import VLIMIT_NSOCK
24 from vserverimpl import VLIMIT_OPENFD
25 from vserverimpl import VLIMIT_ANON
26 from vserverimpl import VLIMIT_SHMEM
27
28 #
29 # these are the flags taken from the kernel linux/vserver/legacy.h
30 #
31 FLAGS_LOCK = 1
32 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
33 FLAGS_NPROC = 4
34 FLAGS_PRIVATE = 8
35 FLAGS_INIT = 16
36 FLAGS_HIDEINFO = 32
37 FLAGS_ULIMIT = 64
38 FLAGS_NAMESPACE = 128
39
40 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
41             "OPENFD": VLIMIT_OPENFD,
42             "ANON": VLIMIT_ANON,
43             "SHMEM": VLIMIT_SHMEM}
44
45 CPU_SHARE_MULT = 1024
46
47 # add in the platform supported rlimits
48 for entry in resource.__dict__.keys():
49     if entry.find("RLIMIT_")==0:
50         k = entry[len("RLIMIT_"):]
51         if not RLIMITS.has_key(k):
52             RLIMITS[k]=resource.__dict__[entry]
53         else:
54             print "WARNING: duplicate RLIMITS key %s" % k
55
56 class NoSuchVServer(Exception): pass
57
58 class VServerConfig:
59     def __init__(self, name, directory):
60         self.name = name
61         self.dir = directory
62         self.cache = None
63         if not (os.path.isdir(self.dir) and
64                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
65             raise NoSuchVServer, "%s does not exist" % self.dir
66
67     def get(self, option, default = None):
68         try:
69             if self.cache:
70                 return self.cache[option]
71             else:
72                 f = open(os.path.join(self.dir, option), "r")
73                 buf = f.read().rstrip()
74                 f.close()
75                 return buf
76         except:
77             if default is not None:
78                 return default
79             else:
80                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
81
82     def update(self, option, value):
83         if self.cache:
84             return
85
86         try:
87             old_umask = os.umask(0022)
88             filename = os.path.join(self.dir, option)
89             try:
90                 os.makedirs(os.path.dirname(filename), 0755)
91             except:
92                 pass
93             f = open(filename, 'w')
94             if isinstance(value, list):
95                 f.write("%s\n" % "\n".join(value))
96             else:
97                 f.write("%s\n" % value)
98             f.close()
99             os.umask(old_umask)
100         except:
101             raise
102
103     def unset(self, option):
104         if self.cache:
105             return
106
107         try:
108             filename = os.path.join(self.dir, option)
109             os.unlink(filename)
110             try:
111                 os.removedirs(os.path.dirname(filename))
112             except:
113                 pass
114             return True
115         except:
116             return False
117
118     def cache_it(self):
119         self.cache = {}
120         def add_to_cache(cache, dirname, fnames):
121             for file in fnames:
122                 full_name = os.path.join(dirname, file)
123                 if os.path.islink(full_name):
124                     fnames.remove(file)
125                 elif (os.path.isfile(full_name) and
126                       os.access(full_name, os.R_OK)):
127                     f = open(full_name, "r")
128                     cache[full_name.replace(os.path.join(self.dir, ''),
129                                             '')] = f.read().rstrip()
130                     f.close()
131         os.path.walk(self.dir, add_to_cache, self.cache)
132
133
134 class VServer:
135
136     # 2010 June 21 - Thierry 
137     # the slice initscript now gets invoked through rc - see sliver_vs.py in nodemanager
138     INITSCRIPTS = [('/etc/rc.d/rc', '%(runlevel)d')]
139
140     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
141
142         self.name = name
143         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
144         if not (os.path.isdir(self.dir) and
145                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
146             raise NoSuchVServer, "no such vserver: " + name
147         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
148         #self.remove_caps = ~vserverimpl.CAP_SAFE;
149         if vm_id == None:
150             vm_id = int(self.config.get('context'))
151         self.ctx = vm_id
152         if vm_running == None:
153             vm_running = self.is_running()
154         self.vm_running = vm_running
155         self.logfile = logfile
156
157     # inspired from nodemanager's logger
158     def log_in_file (self, fd, msg):
159         if not msg: msg="\n"
160         if not msg.endswith('\n'): msg += '\n'
161         os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
162
163     def log(self,msg):
164         if self.logfile:
165             try:
166                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
167                 self.log_in_file(fd,msg)
168                 os.close(fd)
169             except:
170                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
171
172     def set_rlimit(self, type, hard, soft, min):
173         """Generic set resource limit function for vserver"""
174         global RLIMITS
175         update = False
176
177         if hard <> VC_LIM_KEEP:
178             self.config.update('rlimits/%s.hard' % type.lower(), hard)
179             update = True
180         if soft <> VC_LIM_KEEP:
181             self.config.update('rlimits/%s.soft' % type.lower(), soft)
182             update = True
183         if min <> VC_LIM_KEEP:
184             self.config.update('rlimits/%s.min' % type.lower(), min)
185             update = True
186
187         if self.is_running() and update:
188             resource_type = RLIMITS[type]
189             try:
190                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
191             except OSError, e:
192                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
193                          % (self.ctx, type.lower(), hard, soft, min, e))
194
195         return update
196
197     def get_prefix_from_capabilities(self, capabilities, prefix):
198         split_caps = capabilities.split(',')
199         return ",".join(["%s" % (c) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())])
200
201     def get_bcaps_from_capabilities(self, capabilities):
202         return self.get_prefix_from_capabilities(capabilities, "cap_")
203
204     def get_ccaps_from_capabilities(self, capabilities):
205         return self.get_prefix_from_capabilities(capabilities, "vxc_")
206
207     def set_capabilities_config(self, capabilities):
208         bcaps = self.get_bcaps_from_capabilities(capabilities)
209         ccaps = self.get_ccaps_from_capabilities(capabilities)
210         if len(bcaps) > 0:
211             bcaps += ","
212         bcaps += "CAP_NET_RAW"
213         self.config.update('bcapabilities', bcaps)
214         self.config.update('ccapabilities', ccaps)
215         ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
216         if ret > 0:
217             return ret
218         return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps))
219
220     def get_capabilities(self):
221         bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
222         ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx))
223         if bcaps and ccaps:
224             ccaps = "," + ccaps
225         return (bcaps + ccaps)
226  
227     def get_capabilities_config(self):
228         bcaps = self.config.get('bcapabilities', '')
229         ccaps = self.config.get('ccapabilities', '')
230         if bcaps and ccaps:
231             ccaps = "," + ccaps
232         return (bcaps + ccaps)
233
234     def set_ipaddresses(self, addresses):
235         vserverimpl.netremove(self.ctx, "all")
236         for a in addresses.split(","):
237             vserverimpl.netadd(self.ctx, a)
238
239     def set_ipaddresses_config(self, addresses):
240         return # acb
241         i = 0
242         for a in addresses.split(","):
243             self.config.update("interfaces/%d/ip" % i, a)
244             i += 1
245         while self.config.unset("interfaces/%d/ip" % i):
246             i += 1
247         self.set_ipaddresses(addresses)
248
249     def get_ipaddresses_config(self):
250         i = 0
251         ret = []
252         while True:
253             r = self.config.get("interfaces/%d/ip" % i, '')
254             if r == '':
255                 break
256             ret += [r]
257             i += 1
258         return ",".join(ret)
259
260     def get_ipaddresses(self):
261         # No clean way to do this right now.
262         self.log("Calling Vserver.get_ipaddresses for slice %s" % self.name)
263         return None
264
265     def __do_chroot(self):
266         os.chroot(self.dir)
267         os.chdir("/")
268
269     def chroot_call(self, fn, *args, **kwargs):
270         cwd_fd = os.open(".", os.O_RDONLY)
271         try:
272             root_fd = os.open("/", os.O_RDONLY)
273             try:
274                 self.__do_chroot()
275                 result = fn(*args, **kwargs)
276             finally:
277                 os.fchdir(root_fd)
278                 os.chroot(".")
279                 os.fchdir(cwd_fd)
280                 os.close(root_fd)
281         finally:
282             os.close(cwd_fd)
283         return result
284
285     def set_disklimit(self, block_limit):
286         # block_limit is in kB
287         if block_limit == 0:
288             try:
289                 vserverimpl.unsetdlimit(self.dir, self.ctx)
290             except OSError, e:
291                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
292             return
293
294         if self.vm_running:
295             block_usage = vserverimpl.DLIMIT_KEEP
296             inode_usage = vserverimpl.DLIMIT_KEEP
297         else:
298             # init_disk_info() must have been called to get usage values
299             block_usage = self.disk_blocks
300             inode_usage = self.disk_inodes
301
302         try:
303             vserverimpl.setdlimit(self.dir,
304                                   self.ctx,
305                                   block_usage,
306                                   block_limit,
307                                   inode_usage,
308                                   vserverimpl.DLIMIT_INF,  # inode limit
309                                   2)   # %age reserved for root
310         except OSError, e:
311             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
312
313         self.config.update('dlimits/0/space_total', block_limit)
314
315     def is_running(self):
316         status = subprocess.call(["/usr/sbin/vserver", self.name, "running"], shell=False)
317         return not status
318     
319     def get_disklimit(self):
320         try:
321             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
322              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
323         except OSError, ex:
324             if ex.errno != errno.ESRCH:
325                 raise
326             # get here if no vserver disk limit has been set for xid
327             block_limit = -1
328
329         return block_limit
330
331     def set_sched_config(self, cpu_min, cpu_share):
332         """ Write current CPU scheduler parameters to the vserver
333         configuration file. Currently, 'cpu_min' is not supported. """
334         self.config.update('cgroup/cpu.shares', cpu_share * CPU_SHARE_MULT)
335         if self.is_running():
336             self.set_sched(cpu_min, cpu_share)
337
338     def set_sched(self, cpu_min, cpu_share):
339         """ Update kernel CPU scheduling parameters for this context.
340         Currently, 'cpu_min' is not supported. """
341         try:
342             cgroup = open('/dev/cgroup/%s/cpu.shares' % name, 'w')
343             cgroup.write('%s' % (cpu_share * CPU_SHARE_MULT))
344             cgroup.close()
345         except:
346             pass
347
348     def get_sched(self):
349         try:
350             cpu_share = int(int(self.config.get('cgroup/cpu.shares')) / CPU_SHARE_MULT)
351         except:
352             cpu_share = False
353         return (-1, cpu_share)
354
355     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
356                     exempt_min = None, exempt_max = None,
357                     share = None, dev = "eth0"):
358
359         if minrate is None:
360             bwlimit.off(self.ctx, dev)
361         else:
362             bwlimit.on(self.ctx, dev, share,
363                        minrate, maxrate, exempt_min, exempt_max)
364
365     def get_bwlimit(self, dev = "eth0"):
366
367         result = bwlimit.get(self.ctx)
368         # result of bwlimit.get is (ctx, share, minrate, maxrate)
369         if result:
370             result = result[1:]
371         return result
372
373     def open(self, filename, mode = "r", bufsize = -1):
374
375         return self.chroot_call(open, filename, mode, bufsize)
376
377     def enter(self):
378         subprocess.call("/usr/sbin/vserver %s enter" % self.name, shell=True)
379
380     # detach the process that triggers the initscripts
381     # after http://code.activestate.com/recipes/278731/
382     def start(self, runlevel = 3):
383         if os.fork() != 0:
384             # Parent should just return.
385             self.vm_running = True
386             return
387         else:
388             os.setsid()
389             # first child process: fork again
390             if os.fork() != 0:
391                 os._exit(0)     # Exit parent (the first child) of the second child.
392             # the grandson is the working one
393             os.chdir('/')
394             os.umask(0)
395             try:
396                 # start the vserver
397                 subprocess.call(["/usr/sbin/vserver",self.name,"start"])
398
399                 # execute initscripts
400                 for cmd_to_expand in self.INITSCRIPTS:
401                     # enter vserver context
402                     expand = { 'runlevel': runlevel,
403                                'name': self.name, }
404                     cmd = [ x % expand for x in cmd_to_expand ]
405                     cmd_name = os.path.basename(cmd[0])
406                     cmd_file = "/vservers/" + self.name + cmd[0]
407                     if not os.path.isfile(cmd_file):
408                         self.log("WARNING: could not find %s for %s" % (cmd_file, self.name))
409                         break
410                     self.log("executing %r" % cmd)
411                     try:
412                         logname='/vservers/%s/var/log/%s'%(self.name,cmd_name)
413                         log_fd=os.open(logname,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
414                         self.log_in_file(log_fd,"Running %r into %s"%(cmd,logname))
415                         self.chroot_call(subprocess.call,cmd,
416                                          stdout=log_fd,stderr=subprocess.STDOUT,
417                                          close_fds=True)
418                     except:  self.log(traceback.format_exc())
419                     finally: os.close(log_fd)
420
421             # we get here due to an exception in the grandson process
422             except Exception, ex:
423                 self.log(traceback.format_exc())
424             os._exit(0)
425
426     def set_resources(self):
427
428         """ Called when vserver context is entered for first time,
429         should be overridden by subclass. """
430
431         pass
432
433     def init_disk_info(self):
434         try:
435             dlimit = vserverimpl.getdlimit(self.dir, self.ctx)
436             self.disk_blocks = dlimit[0]
437             self.disk_inodes = dlimit[2]
438             return self.disk_blocks * 1024
439         except Exception, e:
440             pass
441         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
442         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
443                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
444                              close_fds=True)
445         p.stdin.close()
446         line = p.stdout.readline()
447         if not line:
448             sys.stderr.write(p.stderr.read())
449         p.stdout.close()
450         p.stderr.close()
451         ret = p.wait()
452
453         (space, inodes) = line.split()
454         self.disk_inodes = int(inodes)
455         self.disk_blocks = int(space)
456
457         return self.disk_blocks * 1024
458
459     def stop(self, signal = signal.SIGKILL):
460         self.vm_running = False
461         subprocess.call("/usr/sbin/vserver %s stop" % self.name, shell=True)
462
463     def setname(self, slice_id):
464         pass
465
466     def getname(self):
467         '''Get vcVHI_CONTEXT field in kernel'''
468         return vserverimpl.getname(self.ctx)
469
470
471 def create(vm_name, static = False, ctor = VServer):
472
473     options = ['vuseradd']
474     if static:
475         options += ['--static']
476     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
477     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
478         out = "system command ('%s') " % options
479         if os.WIFEXITED(ret):
480             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
481         else:
482             out += "killed by signal %d" % os.WTERMSIG(ret)
483         raise SystemError, out
484     vm_id = pwd.getpwnam(vm_name)[2]
485
486     return ctor(vm_name, vm_id)
487
488
489 def close_nonstandard_fds():
490     """Close all open file descriptors other than 0, 1, and 2."""
491     _SC_OPEN_MAX = 4
492     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
493         try: os.close(fd)
494         except OSError: pass  # most likely an fd that isn't open
495