4fc552d179ddde7f9d2e1d00ba4a058dce292020
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
141         if not (os.path.isdir(self.dir) and
142                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
143             raise NoSuchVServer, "no such vserver: " + name
144         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
145         self.remove_caps = ~vserverimpl.CAP_SAFE;
146         if vm_id == None:
147             vm_id = int(self.config.get('context'))
148         self.ctx = vm_id
149         if vm_running == None:
150             vm_running = self.is_running()
151         self.vm_running = vm_running
152         self.logfile = logfile
153
154     # inspired from nodemanager's logger
155     def log(self,msg):
156         if self.logfile:
157             try:
158                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
159                 if not msg.endswith('\n'): msg += '\n'
160                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
161                 os.close(fd)
162             except:
163                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
164
165     def set_rlimit(self, type, hard, soft, min):
166         """Generic set resource limit function for vserver"""
167         global RLIMITS
168         update = False
169
170         if hard <> VC_LIM_KEEP:
171             self.config.update('rlimits/%s.hard' % type.lower(), hard)
172             update = True
173         if soft <> VC_LIM_KEEP:
174             self.config.update('rlimits/%s.soft' % type.lower(), soft)
175             update = True
176         if min <> VC_LIM_KEEP:
177             self.config.update('rlimits/%s.min' % type.lower(), min)
178             update = True
179
180         if self.is_running() and update:
181             resource_type = RLIMITS[type]
182             try:
183                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
184             except OSError, e:
185                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
186                          % (self.ctx, type.lower(), hard, soft, min))
187
188         return update
189
190     def get_prefix_from_capabilities(self, capabilities, prefix):
191         split_caps = capabilities.split(',')
192         return ",".join(["%s" % (c) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())])
193
194     def get_bcaps_from_capabilities(self, capabilities):
195         return self.get_prefix_from_capabilities(capabilities, "cap_")
196
197     def get_ccaps_from_capabilities(self, capabilities):
198         return self.get_prefix_from_capabilities(capabilities, "vxc_")
199
200     def set_capabilities_config(self, capabilities):
201         bcaps = self.get_bcaps_from_capabilities(capabilities)
202         ccaps = self.get_ccaps_from_capabilities(capabilities)
203         self.config.update('bcapabilities', bcaps)
204         self.config.update('ccapabilities', ccaps)
205         ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
206         if ret > 0:
207             return ret
208         return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps))
209
210     def get_capabilities(self):
211         bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
212         ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx))
213         if bcaps and ccaps:
214             ccaps = "," + ccaps
215         return (bcaps + ccaps)
216  
217     def get_capabilities_config(self):
218         bcaps = self.config.get('bcapabilities', '')
219         ccaps = self.config.get('ccapabilities', '')
220         if bcaps and ccaps:
221             ccaps = "," + ccaps
222         return (bcaps + ccaps)
223
224     def set_ipaddresses(self, addresses):
225         vserverimpl.netremove(self.ctx, "all")
226         for a in addresses.split(","):
227             vserverimpl.netadd(self.ctx, a)
228
229     def set_ipaddresses_config(self, addresses):
230         i = 0
231         for a in addresses.split(","):
232             self.config.update("interfaces/%d/ip" % i, a)
233             i += 1
234         while self.config.unset("interfaces/%d/ip" % i):
235             i += 1
236         self.set_ipaddresses(addresses)
237
238     def get_ipaddresses_config(self):
239         i = 0
240         ret = []
241         while True:
242             r = self.config.get("interfaces/%d/ip" % i, '')
243             if r == '':
244                 break
245             ret += [r]
246             i += 1
247         return ",".join(ret)
248
249     def get_ipaddresses(self):
250         # No clean way to do this right now.
251         return None
252
253     def __do_chroot(self):
254         os.chroot(self.dir)
255         os.chdir("/")
256
257     def chroot_call(self, fn, *args):
258
259         cwd_fd = os.open(".", os.O_RDONLY)
260         try:
261             root_fd = os.open("/", os.O_RDONLY)
262             try:
263                 self.__do_chroot()
264                 result = fn(*args)
265             finally:
266                 os.fchdir(root_fd)
267                 os.chroot(".")
268                 os.fchdir(cwd_fd)
269                 os.close(root_fd)
270         finally:
271             os.close(cwd_fd)
272         return result
273
274     def set_disklimit(self, block_limit):
275         # block_limit is in kB
276         if block_limit == 0:
277             try:
278                 vserverimpl.unsetdlimit(self.dir, self.ctx)
279             except OSError, e:
280                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
281             return
282
283         if self.vm_running:
284             block_usage = vserverimpl.DLIMIT_KEEP
285             inode_usage = vserverimpl.DLIMIT_KEEP
286         else:
287             # init_disk_info() must have been called to get usage values
288             block_usage = self.disk_blocks
289             inode_usage = self.disk_inodes
290
291         try:
292             vserverimpl.setdlimit(self.dir,
293                                   self.ctx,
294                                   block_usage,
295                                   block_limit,
296                                   inode_usage,
297                                   vserverimpl.DLIMIT_INF,  # inode limit
298                                   2)   # %age reserved for root
299         except OSError, e:
300             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
301
302
303         self.config.update('dlimits/0/space_total', block_limit)
304
305     def is_running(self):
306         return vserverimpl.isrunning(self.ctx)
307     
308     def get_disklimit(self):
309
310         try:
311             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
312              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
313         except OSError, ex:
314             if ex.errno != errno.ESRCH:
315                 raise
316             # get here if no vserver disk limit has been set for xid
317             block_limit = -1
318
319         return block_limit
320
321     def set_sched_config(self, cpu_min, cpu_share):
322
323         """ Write current CPU scheduler parameters to the vserver
324         configuration file. This method does not modify the kernel CPU
325         scheduling parameters for this context. """
326
327         self.config.update('sched/fill-rate', cpu_min)
328         self.config.update('sched/fill-rate2', cpu_share)
329         if cpu_share == 0:
330             self.config.unset('sched/idle-time')
331         
332         if self.is_running():
333             self.set_sched(cpu_min, cpu_share)
334
335     def set_sched(self, cpu_min, cpu_share):
336         """ Update kernel CPU scheduling parameters for this context. """
337         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
338
339     def get_sched(self):
340         # have no way of querying scheduler right now on a per vserver basis
341         return (-1, False)
342
343     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
344                     exempt_min = None, exempt_max = None,
345                     share = None, dev = "eth0"):
346
347         if minrate is None:
348             bwlimit.off(self.ctx, dev)
349         else:
350             bwlimit.on(self.ctx, dev, share,
351                        minrate, maxrate, exempt_min, exempt_max)
352
353     def get_bwlimit(self, dev = "eth0"):
354
355         result = bwlimit.get(self.ctx)
356         # result of bwlimit.get is (ctx, share, minrate, maxrate)
357         if result:
358             result = result[1:]
359         return result
360
361     def open(self, filename, mode = "r", bufsize = -1):
362
363         return self.chroot_call(open, filename, mode, bufsize)
364
365     def __do_chcontext(self, state_file):
366
367         if state_file:
368             print >>state_file, "%u" % self.ctx
369             state_file.close()
370
371         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
372             self.set_resources(True)
373             vserverimpl.setup_done(self.ctx)
374
375
376     def __prep(self, runlevel):
377
378         """ Perform all the crap that the vserver script does before
379         actually executing the startup scripts. """
380
381
382         # set the initial runlevel
383         vserverimpl.setrunlevel(self.dir + "/utmp", runlevel)
384
385         # mount /proc and /dev/pts
386         self.__do_mount("none", self.dir, "/proc", "proc")
387         # XXX - magic mount options
388         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
389
390
391     def __cleanvar(self):
392         """
393         Clean the /var/ directory so RH startup scripts can run
394         """ 
395
396         RUNDIR = "/var/run"
397         LOCKDIR = "/var/lock/subsys"
398
399         filter = ["utmp"]
400         garbage = []
401         for topdir in [RUNDIR, LOCKDIR]:
402             #os.walk() = (dirpath, dirnames, filenames)
403             for root, dirs, files in os.walk(topdir):
404                 for file in files:
405                     if not file in filter:
406                         garbage.append(root + "/" + file)
407
408         for f in garbage: os.unlink(f)
409         return garbage
410
411
412     def __do_mount(self, *mount_args):
413         try:
414             vserverimpl.mount(*mount_args)
415         except OSError, ex:
416             if ex.errno == errno.EBUSY:
417                 # assume already mounted
418                 return
419             raise ex
420
421
422     def enter(self):
423         self.config.cache_it()
424         self.__do_chroot()
425         self.__do_chcontext(None)
426
427
428     def start(self, runlevel = 3):
429
430         if (os.fork() != 0):
431             # Parent should just return.
432             self.vm_running = True
433             return
434         else:
435             # child process
436             try:
437                 # so we don't chcontext with priv'ed fds
438                 close_nonstandard_fds()
439
440                 # get a new session
441                 os.setsid()
442
443                 # open state file to record vserver info
444                 state_file = open("/var/run/vservers/%s" % self.name, "w")
445
446                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
447                 fd = os.open("/dev/null", os.O_RDONLY)
448                 if fd != 0:
449                     os.dup2(fd, 0)
450                     os.close(fd)
451  
452                 # perform pre-init cleanup
453                 self.__prep(runlevel)
454
455                 self.config.cache_it()
456                 self.__do_chroot()
457                 removed = self.__cleanvar()
458
459                 log = open("/var/log/boot.log", "a", 0)
460                 if log.fileno() != 1:
461                     os.dup2(log.fileno(), 1)
462                 os.dup2(1, 2)
463
464                 print >>log, ("%s: removing %s" % 
465                                 (time.asctime(time.gmtime()), removed))
466                 print >>log, ("%s: starting the virtual server %s" %
467                                 (time.asctime(time.gmtime()), self.name))
468                 # execute each init script in turn
469                 # XXX - we don't support all scripts that vserver script does
470                 self.__do_chcontext(state_file)
471                 for cmd in self.INITSCRIPTS:
472                     try:
473                         # enter vserver context
474                         arg_subst = { 'runlevel': runlevel }
475                         cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
476                                                    cmd[1:])
477                         if os.path.isfile(cmd[0]):                         
478                             print >>log, "executing '%s'" % " ".join(cmd_args)
479                             os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
480                     except:
481                         print >>log, traceback.format_exc()
482
483             # we get here due to an exception in the top-level child process
484             except Exception, ex:
485                 self.log(traceback.format_exc())
486             os._exit(0)
487
488     def set_resources(self,setup=False):
489
490         """ Called when vserver context is entered for first time,
491         should be overridden by subclass. """
492
493         pass
494
495     def init_disk_info(self):
496         try:
497             dlimit = vserverimpl.getdlimit(self.dir, self.ctx)
498             self.disk_blocks = dlimit[0]
499             self.disk_inodes = dlimit[2]
500             return self.disk_blocks * 1024
501         except Exception, e:
502             pass
503         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
504         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
505                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
506                              close_fds=True)
507         p.stdin.close()
508         line = p.stdout.readline()
509         if not line:
510             sys.stderr.write(p.stderr.read())
511         p.stdout.close()
512         p.stderr.close()
513         ret = p.wait()
514
515         (space, inodes) = line.split()
516         self.disk_inodes = int(inodes)
517         self.disk_blocks = int(space)
518         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
519
520         return self.disk_blocks * 1024
521
522     def stop(self, signal = signal.SIGKILL):
523         vserverimpl.killall(self.ctx, signal)
524         self.vm_running = False
525
526     def setname(self, slice_id):
527         '''Set vcVHI_CONTEXT field in kernel to slice_id'''
528         vserverimpl.setname(self.ctx, slice_id)
529
530     def getname(self):
531         '''Get vcVHI_CONTEXT field in kernel'''
532         return vserverimpl.getname(self.ctx)
533
534
535 def create(vm_name, static = False, ctor = VServer):
536
537     options = ['vuseradd']
538     if static:
539         options += ['--static']
540     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
541     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
542         out = "system command ('%s') " % options
543         if os.WIFEXITED(ret):
544             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
545         else:
546             out += "killed by signal %d" % os.WTERMSIG(ret)
547         raise SystemError, out
548     vm_id = pwd.getpwnam(vm_name)[2]
549
550     return ctor(vm_name, vm_id)
551
552
553 def close_nonstandard_fds():
554     """Close all open file descriptors other than 0, 1, and 2."""
555     _SC_OPEN_MAX = 4
556     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
557         try: os.close(fd)
558         except OSError: pass  # most likely an fd that isn't open
559