5c32676142ef9b02409f525229ddb6583d08240e
[util-vserver.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.70 2007/08/01 21:48:42 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import mountimpl
18 import runcmd
19 import utmp
20 import vserverimpl
21 import cpulimit, bwlimit
22
23 from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED
24 from vserverimpl import DLIMIT_INF
25 from vserverimpl import VC_LIM_KEEP
26 from vserverimpl import VLIMIT_NSOCK
27 from vserverimpl import VLIMIT_OPENFD
28 from vserverimpl import VLIMIT_ANON
29 from vserverimpl import VLIMIT_SHMEM
30
31 #
32 # these are the flags taken from the kernel linux/vserver/legacy.h
33 #
34 FLAGS_LOCK = 1
35 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
36 FLAGS_NPROC = 4
37 FLAGS_PRIVATE = 8
38 FLAGS_INIT = 16
39 FLAGS_HIDEINFO = 32
40 FLAGS_ULIMIT = 64
41 FLAGS_NAMESPACE = 128
42
43 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
44             "OPENFD": VLIMIT_OPENFD,
45             "ANON": VLIMIT_ANON,
46             "SHMEM": VLIMIT_SHMEM}
47
48 # add in the platform supported rlimits
49 for entry in resource.__dict__.keys():
50     if entry.find("RLIMIT_")==0:
51         k = entry[len("RLIMIT_"):]
52         if not RLIMITS.has_key(k):
53             RLIMITS[k]=resource.__dict__[entry]
54         else:
55             print "WARNING: duplicate RLIMITS key %s" % k
56
57 class NoSuchVServer(Exception): pass
58
59
60 class VServerConfig:
61     def __init__(self, name, directory):
62         self.name = name
63         self.dir = directory
64         self.cache = None
65         if not (os.path.isdir(self.dir) and
66                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
67             raise NoSuchVServer, "%s does not exist" % self.dir
68
69     def get(self, option, default = None):
70         try:
71             if self.cache:
72                 return self.cache[option]
73             else:
74                 f = open(os.path.join(self.dir, option), "r")
75                 buf = f.read().rstrip()
76                 f.close()
77                 return buf
78         except:
79             if default is not None:
80                 return default
81             else:
82                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
83
84     def update(self, option, value):
85         if self.cache:
86             return
87
88         try:
89             old_umask = os.umask(0022)
90             filename = os.path.join(self.dir, option)
91             try:
92                 os.makedirs(os.path.dirname(filename), 0755)
93             except:
94                 pass
95             f = open(filename, 'w')
96             if isinstance(value, list):
97                 f.write("%s\n" % "\n".join(value))
98             else:
99                 f.write("%s\n" % value)
100             f.close()
101             os.umask(old_umask)
102         except:
103             raise
104
105     def unset(self, option):
106         if self.cache:
107             return
108
109         try:
110             filename = os.path.join(self.dir, option)
111             os.unlink(filename)
112             try:
113                 os.removedirs(os.path.dirname(filename))
114             except:
115                 pass
116             return True
117         except:
118             return False
119
120     def cache_it(self):
121         self.cache = {}
122         def add_to_cache(cache, dirname, fnames):
123             for file in fnames:
124                 full_name = os.path.join(dirname, file)
125                 if os.path.islink(full_name):
126                     fnames.remove(file)
127                 elif (os.path.isfile(full_name) and
128                       os.access(full_name, os.R_OK)):
129                     f = open(full_name, "r")
130                     cache[full_name.replace(os.path.join(self.dir, ''),
131                                             '')] = f.readline().rstrip()
132                     f.close()
133         os.path.walk(self.dir, add_to_cache, self.cache)
134
135
136 class VServer:
137
138     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
139                    ('/etc/rc.d/rc', '%(runlevel)d')]
140
141     def __init__(self, name, vm_id = None, vm_running = None):
142
143         self.name = name
144         self.rlimits_changed = False
145         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
146         if not (os.path.isdir(self.dir) and
147                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
148             raise NoSuchVServer, "no such vserver: " + name
149         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
150         self.remove_caps = ~vserverimpl.CAP_SAFE;
151         if vm_id == None:
152             vm_id = int(self.config.get('context'))
153         self.ctx = vm_id
154         if vm_running == None:
155             vm_running = self.is_running()
156         self.vm_running = vm_running
157
158     def have_limits_changed(self):
159         return self.rlimits_changed
160
161     def set_rlimit_limit(self,type,hard,soft,minimum):
162         """Generic set resource limit function for vserver"""
163         global RLIMITS
164         changed = False
165         try:
166             old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
167             if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
168             if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
169             if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
170             self.rlimits_changed = self.rlimits_changed or changed 
171         except OSError, e:
172             if self.is_running(): print "Unexpected error with getrlimit for running context %d" % self.ctx
173
174         resource_type = RLIMITS[type]
175         try:
176             ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
177         except OSError, e:
178             if self.is_running(): print "Unexpected error with setrlimit for running context %d" % self.ctx
179
180     def set_rlimit_config(self,type,hard,soft,minimum):
181         """Generic set resource limit function for vserver"""
182         if hard <> VC_LIM_KEEP:
183             self.config.update('rlimits/%s.hard' % type.lower(), hard)
184         if soft <> VC_LIM_KEEP:
185             self.config.update('rlimits/%s.soft' % type.lower(), soft)
186         if minimum <> VC_LIM_KEEP:
187             self.config.update('rlimits/%s.min' % type.lower(), minimum)
188         self.set_rlimit_limit(type,hard,soft,minimum)
189
190     def get_rlimit_limit(self,type):
191         """Generic get resource configuration function for vserver"""
192         global RLIMITS
193         resource_type = RLIMITS[type]
194         try:
195             ret = vserverimpl.getrlimit(self.ctx,resource_type)
196         except OSError, e:
197             print "Unexpected error with getrlimit for context %d" % self.ctx
198             ret = self.get_rlimit_config(type)
199         return ret
200
201     def get_rlimit_config(self,type):
202         """Generic get resource configuration function for vserver"""
203         hard = int(self.config.get("rlimits/%s.hard"%type.lower(),VC_LIM_KEEP))
204         soft = int(self.config.get("rlimits/%s.soft"%type.lower(),VC_LIM_KEEP))
205         minimum = int(self.config.get("rlimits/%s.min"%type.lower(),VC_LIM_KEEP))
206         return (hard,soft,minimum)
207
208     def set_WHITELISTED_config(self,whitelisted):
209         self.config.update('whitelisted', whitelisted)
210
211     def set_capabilities(self, capabilities):
212         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
213
214     def set_capabilities_config(self, capabilities):
215         self.config.update('bcapabilities', capabilities)
216         self.set_capabilities(capabilities)
217
218     def get_capabilities(self):
219         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
220  
221     def get_capabilities_config(self):
222         return self.config.get('bcapabilities', '')
223
224     def set_ipaddresses(self, addresses):
225         vserverimpl.netremove(self.ctx, "all")
226         for a in addresses.split(","):
227             vserverimpl.netadd(self.ctx, a)
228
229     def set_ipaddresses_config(self, addresses):
230         i = 0
231         for a in addresses.split(","):
232             self.config.update("interfaces/%d/ip" % i, a)
233             i += 1
234         while self.config.unset("interfaces/%d/ip" % i):
235             i += 1
236         self.set_ipaddresses(addresses)
237
238     def get_ipaddresses_config(self):
239         i = 0
240         ret = []
241         while True:
242             r = self.config.get("interfaces/%d/ip" % i, '')
243             if r == '':
244                 break
245             ret += [r]
246             i += 1
247         return ",".join(ret)
248
249     def get_ipaddresses(self):
250         # No clean way to do this right now.
251         return None
252
253     def __do_chroot(self):
254         self.config.cache_it()
255         os.chroot(self.dir)
256         os.chdir("/")
257
258     def chroot_call(self, fn, *args):
259
260         cwd_fd = os.open(".", os.O_RDONLY)
261         try:
262             root_fd = os.open("/", os.O_RDONLY)
263             try:
264                 self.__do_chroot()
265                 result = fn(*args)
266             finally:
267                 os.fchdir(root_fd)
268                 os.chroot(".")
269                 os.fchdir(cwd_fd)
270                 os.close(root_fd)
271         finally:
272             os.close(cwd_fd)
273         return result
274
275     def set_disklimit(self, block_limit):
276         # block_limit is in kB
277         if block_limit == 0:
278             try:
279                 vserverimpl.unsetdlimit(self.dir, self.ctx)
280             except OSError, e:
281                 print "Unexpected error with unsetdlimit for context %d" % self.ctx
282             return
283
284         if self.vm_running:
285             block_usage = vserverimpl.DLIMIT_KEEP
286             inode_usage = vserverimpl.DLIMIT_KEEP
287         else:
288             # init_disk_info() must have been called to get usage values
289             block_usage = self.disk_blocks
290             inode_usage = self.disk_inodes
291
292
293         try:
294             vserverimpl.setdlimit(self.dir,
295                                   self.ctx,
296                                   block_usage,
297                                   block_limit,
298                                   inode_usage,
299                                   vserverimpl.DLIMIT_INF,  # inode limit
300                                   2)   # %age reserved for root
301         except OSError, e:
302             print "Unexpected error with setdlimit for context %d" % self.ctx
303
304
305         self.config.update('dlimits/0/space_total', block_limit)
306
307     def is_running(self):
308         return vserverimpl.isrunning(self.ctx)
309     
310     def get_disklimit(self):
311
312         try:
313             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
314              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
315         except OSError, ex:
316             if ex.errno != errno.ESRCH:
317                 raise
318             # get here if no vserver disk limit has been set for xid
319             block_limit = -1
320
321         return block_limit
322
323     def set_sched_config(self, cpu_share, sched_flags):
324
325         """ Write current CPU scheduler parameters to the vserver
326         configuration file. This method does not modify the kernel CPU
327         scheduling parameters for this context. """
328
329         if sched_flags & SCHED_CPU_GUARANTEED:
330             cpu_guaranteed = cpu_share
331         else:
332             cpu_guaranteed = 0
333         self.config.update('sched/fill-rate2', cpu_share)
334         self.config.update('sched/fill-rate', cpu_guaranteed)
335
336         if self.vm_running:
337             self.set_sched(cpu_share, sched_flags)
338
339     def set_sched(self, cpu_share, sched_flags = 0):
340         """ Update kernel CPU scheduling parameters for this context. """
341         vserverimpl.setsched(self.ctx, cpu_share, sched_flags)
342
343     def get_sched(self):
344         # have no way of querying scheduler right now on a per vserver basis
345         return (-1, False)
346
347     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
348                     exempt_min = None, exempt_max = None,
349                     share = None, dev = "eth0"):
350
351         if minrate is None:
352             bwlimit.off(self.ctx, dev)
353         else:
354             bwlimit.on(self.ctx, dev, share,
355                        minrate, maxrate, exempt_min, exempt_max)
356
357     def get_bwlimit(self, dev = "eth0"):
358
359         result = bwlimit.get(self.ctx)
360         # result of bwlimit.get is (ctx, share, minrate, maxrate)
361         if result:
362             result = result[1:]
363         return result
364
365     def open(self, filename, mode = "r", bufsize = -1):
366
367         return self.chroot_call(open, filename, mode, bufsize)
368
369     def __do_chcontext(self, state_file):
370
371         if state_file:
372             print >>state_file, "%u" % self.ctx
373             state_file.close()
374
375         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
376             self.set_resources()
377             vserverimpl.setup_done(self.ctx)
378
379     def __prep(self, runlevel, log):
380
381         """ Perform all the crap that the vserver script does before
382         actually executing the startup scripts. """
383
384         # remove /var/run and /var/lock/subsys files
385         # but don't remove utmp from the top-level /var/run
386         RUNDIR = "/var/run"
387         LOCKDIR = "/var/lock/subsys"
388         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
389         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
390                           (out + map((dir + "/").__add__, ff(files)),
391                            lambda fs: fs)),
392                          list(os.walk(RUNDIR)),
393                          ([], filter_fn))[0]
394         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
395                                               os.listdir(LOCKDIR)))
396         if False:
397             for f in garbage:
398                 os.unlink(f)
399
400         # set the initial runlevel
401         f = open(RUNDIR + "/utmp", "w")
402         utmp.set_runlevel(f, runlevel)
403         f.close()
404
405         # mount /proc and /dev/pts
406         self.__do_mount("none", "/proc", "proc")
407         # XXX - magic mount options
408         self.__do_mount("none", "/dev/pts", "devpts", 0, "gid=5,mode=0620")
409
410     def __do_mount(self, *mount_args):
411
412         try:
413             mountimpl.mount(*mount_args)
414         except OSError, ex:
415             if ex.errno == errno.EBUSY:
416                 # assume already mounted
417                 return
418             raise ex
419
420     def enter(self):
421         self.__do_chroot()
422         self.__do_chcontext(None)
423
424     def start(self, wait, runlevel = 3):
425         self.vm_running = True
426         self.rlimits_changed = False
427
428         child_pid = os.fork()
429         if child_pid == 0:
430             # child process
431             try:
432                 # get a new session
433                 os.setsid()
434
435                 # open state file to record vserver info
436                 state_file = open("/var/run/vservers/%s" % self.name, "w")
437
438                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
439                 fd = os.open("/dev/null", os.O_RDONLY)
440                 if fd != 0:
441                     os.dup2(fd, 0)
442                     os.close(fd)
443                 self.__do_chroot()
444                 log = open("/var/log/boot.log", "w", 0)
445                 if log.fileno() != 1:
446                     os.dup2(log.fileno(), 1)
447                 os.dup2(1, 2)
448
449                 print >>log, ("%s: starting the virtual server %s" %
450                               (time.asctime(time.gmtime()), self.name))
451
452                 # perform pre-init cleanup
453                 self.__prep(runlevel, log)
454
455                 # execute each init script in turn
456                 # XXX - we don't support all scripts that vserver script does
457                 self.__do_chcontext(state_file)
458                 for cmd in self.INITSCRIPTS:
459                      try:
460                          # enter vserver context
461                          arg_subst = { 'runlevel': runlevel }
462                          cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
463                                                    cmd[1:])
464                          print >>log, "executing '%s'" % " ".join(cmd_args)
465                          os.spawnvp(os.P_WAIT,cmd[0],cmd_args)
466                      except:
467                          traceback.print_exc()
468                          os._exit(1)
469
470             # we get here due to an exception in the top-level child process
471             except Exception, ex:
472                 traceback.print_exc()
473             os._exit(0)
474
475         # parent process
476         return child_pid
477
478     def set_resources(self):
479
480         """ Called when vserver context is entered for first time,
481         should be overridden by subclass. """
482
483         pass
484
485     def init_disk_info(self):
486         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
487         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
488                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
489                              close_fds=True)
490         p.stdin.close()
491         line = p.stdout.readline()
492         if not line:
493             sys.stderr.write(p.stderr.read())
494         p.stdout.close()
495         p.stderr.close()
496         ret = p.wait()
497
498         (space, inodes) = line.split()
499         self.disk_inodes = int(inodes)
500         self.disk_blocks = int(space)
501         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
502
503         return self.disk_blocks * 1024
504
505     def stop(self, signal = signal.SIGKILL):
506         vserverimpl.killall(self.ctx, signal)
507         self.vm_running = False
508         self.rlimits_changed = False
509
510
511
512 def create(vm_name, static = False, ctor = VServer):
513
514     options = []
515     if static:
516         options += ['--static']
517     runcmd.run('vuseradd', options + [vm_name])
518     vm_id = pwd.getpwnam(vm_name)[2]
519
520     return ctor(vm_name, vm_id)