Update to code actually running on Trellis v0.1
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
141         if not (os.path.isdir(self.dir) and
142                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
143             raise NoSuchVServer, "no such vserver: " + name
144         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
145         self.remove_caps = ~vserverimpl.CAP_SAFE;
146         if vm_id == None:
147             vm_id = int(self.config.get('context'))
148         self.ctx = vm_id
149         if vm_running == None:
150             vm_running = self.is_running()
151         self.vm_running = vm_running
152         self.logfile = logfile
153
154     # inspired from nodemanager's logger
155     def log(self,msg):
156         if self.logfile:
157             try:
158                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
159                 if not msg.endswith('\n'): msg += '\n'
160                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
161                 os.close(fd)
162             except:
163                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
164
165     def set_rlimit(self, type, hard, soft, min):
166         """Generic set resource limit function for vserver"""
167         global RLIMITS
168         update = False
169
170         if hard <> VC_LIM_KEEP:
171             self.config.update('rlimits/%s.hard' % type.lower(), hard)
172             update = True
173         if soft <> VC_LIM_KEEP:
174             self.config.update('rlimits/%s.soft' % type.lower(), soft)
175             update = True
176         if min <> VC_LIM_KEEP:
177             self.config.update('rlimits/%s.min' % type.lower(), min)
178             update = True
179
180         if self.is_running() and update:
181             resource_type = RLIMITS[type]
182             try:
183                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
184             except OSError, e:
185                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
186                          % (self.ctx, type.lower(), hard, soft, min))
187
188         return update
189
190     def get_prefix_from_capabilities(self, capabilities, prefix):
191         split_caps = capabilities.split(',')
192         return ",".join(["%s" % (c) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())])
193
194     def get_bcaps_from_capabilities(self, capabilities):
195         return self.get_prefix_from_capabilities(capabilities, "cap_")
196
197     def get_ccaps_from_capabilities(self, capabilities):
198         return self.get_prefix_from_capabilities(capabilities, "vxc_")
199
200     def set_capabilities_config(self, capabilities):
201         bcaps = self.get_bcaps_from_capabilities(capabilities)
202         ccaps = self.get_ccaps_from_capabilities(capabilities)
203         self.config.update('bcapabilities', bcaps)
204         self.config.update('ccapabilities', ccaps)
205         ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
206         if ret > 0:
207             return ret
208         return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps))
209
210     def get_capabilities(self):
211         bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
212         ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx))
213         if bcaps and ccaps:
214             ccaps = "," + ccaps
215         return (bcaps + ccaps)
216  
217     def get_capabilities_config(self):
218         bcaps = self.config.get('bcapabilities', '')
219         ccaps = self.config.get('ccapabilities', '')
220         if bcaps and ccaps:
221             ccaps = "," + ccaps
222         return (bcaps + ccaps)
223
224     def set_ipaddresses(self, addresses):
225         vserverimpl.netremove(self.ctx, "all")
226         for a in addresses.split(","):
227             vserverimpl.netadd(self.ctx, a)
228
229     def set_ipaddresses_config(self, addresses):
230         i = 0
231         for a in addresses.split(","):
232             self.config.update("interfaces/%d/ip" % i, a)
233             i += 1
234         while self.config.unset("interfaces/%d/ip" % i):
235             i += 1
236         self.set_ipaddresses(addresses)
237
238     def get_ipaddresses_config(self):
239         i = 0
240         ret = []
241         while True:
242             r = self.config.get("interfaces/%d/ip" % i, '')
243             if r == '':
244                 break
245             ret += [r]
246             i += 1
247         return ",".join(ret)
248
249     def get_ipaddresses(self):
250         # No clean way to do this right now.
251         return None
252
253     def get_unshare_netns_config(self):
254         try:
255             unshare_netns = int(self.config.get('spaces/net'))
256         except:
257             unshare_netns = 0;
258         return unshare_netns;
259
260     def __do_chroot(self):
261         os.chroot(self.dir)
262         os.chdir("/")
263
264     def chroot_call(self, fn, *args):
265
266         cwd_fd = os.open(".", os.O_RDONLY)
267         try:
268             root_fd = os.open("/", os.O_RDONLY)
269             try:
270                 self.__do_chroot()
271                 result = fn(*args)
272             finally:
273                 os.fchdir(root_fd)
274                 os.chroot(".")
275                 os.fchdir(cwd_fd)
276                 os.close(root_fd)
277         finally:
278             os.close(cwd_fd)
279         return result
280
281     def set_disklimit(self, block_limit):
282         # block_limit is in kB
283         if block_limit == 0:
284             try:
285                 vserverimpl.unsetdlimit(self.dir, self.ctx)
286             except OSError, e:
287                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
288             return
289
290         if self.vm_running:
291             block_usage = vserverimpl.DLIMIT_KEEP
292             inode_usage = vserverimpl.DLIMIT_KEEP
293         else:
294             # init_disk_info() must have been called to get usage values
295             block_usage = self.disk_blocks
296             inode_usage = self.disk_inodes
297
298         try:
299             vserverimpl.setdlimit(self.dir,
300                                   self.ctx,
301                                   block_usage,
302                                   block_limit,
303                                   inode_usage,
304                                   vserverimpl.DLIMIT_INF,  # inode limit
305                                   2)   # %age reserved for root
306         except OSError, e:
307             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
308
309
310         self.config.update('dlimits/0/space_total', block_limit)
311
312     def is_running(self):
313         return vserverimpl.isrunning(self.ctx)
314     
315     def get_disklimit(self):
316
317         try:
318             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
319              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
320         except OSError, ex:
321             if ex.errno != errno.ESRCH:
322                 raise
323             # get here if no vserver disk limit has been set for xid
324             block_limit = -1
325
326         return block_limit
327
328     def set_sched_config(self, cpu_min, cpu_share):
329
330         """ Write current CPU scheduler parameters to the vserver
331         configuration file. This method does not modify the kernel CPU
332         scheduling parameters for this context. """
333
334         self.config.update('sched/fill-rate', cpu_min)
335         self.config.update('sched/fill-rate2', cpu_share)
336         if cpu_share == 0:
337             self.config.unset('sched/idle-time')
338         
339         if self.is_running():
340             self.set_sched(cpu_min, cpu_share)
341
342     def set_sched(self, cpu_min, cpu_share):
343         """ Update kernel CPU scheduling parameters for this context. """
344         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
345
346     def get_sched(self):
347         # have no way of querying scheduler right now on a per vserver basis
348         return (-1, False)
349
350     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
351                     exempt_min = None, exempt_max = None,
352                     share = None, dev = "eth0"):
353
354         if minrate is None:
355             bwlimit.off(self.ctx, dev)
356         else:
357             bwlimit.on(self.ctx, dev, share,
358                        minrate, maxrate, exempt_min, exempt_max)
359
360     def get_bwlimit(self, dev = "eth0"):
361
362         result = bwlimit.get(self.ctx)
363         # result of bwlimit.get is (ctx, share, minrate, maxrate)
364         if result:
365             result = result[1:]
366         return result
367
368     def open(self, filename, mode = "r", bufsize = -1):
369
370         return self.chroot_call(open, filename, mode, bufsize)
371
372     def __do_chcontext(self, state_file):
373
374         if state_file:
375             print >>state_file, "%u" % self.ctx
376             state_file.close()
377
378         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config()), self.get_unshare_netns_config()):
379             self.set_resources()
380             vserverimpl.setup_done(self.ctx)
381
382     def __prep(self, runlevel):
383
384         """ Perform all the crap that the vserver script does before
385         actually executing the startup scripts. """
386
387         # remove /var/run and /var/lock/subsys files
388         # but don't remove utmp from the top-level /var/run
389         RUNDIR = "/var/run"
390         LOCKDIR = "/var/lock/subsys"
391         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
392         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
393                           (out + map((dir + "/").__add__, ff(files)),
394                            lambda fs: fs)),
395                          list(os.walk(RUNDIR)),
396                          ([], filter_fn))[0]
397         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
398                                               os.listdir(LOCKDIR)))
399         if False:
400             for f in garbage:
401                 os.unlink(f)
402
403         # set the initial runlevel
404         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
405
406         # mount /proc and /dev/pts
407         self.__do_mount("none", self.dir, "/proc", "proc")
408         # XXX - magic mount options
409         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
410
411     def __do_mount(self, *mount_args):
412
413         try:
414             vserverimpl.mount(*mount_args)
415         except OSError, ex:
416             if ex.errno == errno.EBUSY:
417                 # assume already mounted
418                 return
419             raise ex
420
421     def enter(self):
422         self.config.cache_it()
423         self.__do_chroot()
424         self.__do_chcontext(None)
425
426     def start(self, runlevel = 3):
427
428         if (os.fork() != 0):
429             # Parent should just return.
430             self.vm_running = True
431             return
432         else:
433             # child process
434             try:
435                 # so we don't chcontext with priv'ed fds
436                 close_nonstandard_fds()
437
438                 # get a new session
439                 os.setsid()
440
441                 # open state file to record vserver info
442                 state_file = open("/var/run/vservers/%s" % self.name, "w")
443
444                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
445                 fd = os.open("/dev/null", os.O_RDONLY)
446                 if fd != 0:
447                     os.dup2(fd, 0)
448                     os.close(fd)
449  
450                 # perform pre-init cleanup
451                 self.__prep(runlevel)
452
453                 self.config.cache_it()
454                 self.__do_chroot()
455                 log = open("/var/log/boot.log", "a", 0)
456                 if log.fileno() != 1:
457                     os.dup2(log.fileno(), 1)
458                 os.dup2(1, 2)
459
460                 print >>log, ("%s: starting the virtual server %s" %
461                               (time.asctime(time.gmtime()), self.name))
462                 # execute each init script in turn
463                 # XXX - we don't support all scripts that vserver script does
464                 self.__do_chcontext(state_file)
465                 for cmd in self.INITSCRIPTS:
466                     try:
467                         # enter vserver context
468                         arg_subst = { 'runlevel': runlevel }
469                         cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
470                                                    cmd[1:])
471                         if os.path.isfile(cmd[0]):                         
472                             print >>log, "executing '%s'" % " ".join(cmd_args)
473                             os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
474                     except:
475                         print >>log, traceback.format_exc()
476
477             # we get here due to an exception in the top-level child process
478             except Exception, ex:
479                 self.log(traceback.format_exc())
480             os._exit(0)
481
482     def set_resources(self):
483
484         """ Called when vserver context is entered for first time,
485         should be overridden by subclass. """
486
487         pass
488
489     def init_disk_info(self):
490         try:
491             dlimit = vserverimpl.getdlimit(self.dir, self.ctx)
492             self.disk_blocks = dlimit[0]
493             self.disk_inodes = dlimit[2]
494             return self.disk_blocks * 1024
495         except Exception, e:
496             pass
497         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
498         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
499                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
500                              close_fds=True)
501         p.stdin.close()
502         line = p.stdout.readline()
503         if not line:
504             sys.stderr.write(p.stderr.read())
505         p.stdout.close()
506         p.stderr.close()
507         ret = p.wait()
508
509         (space, inodes) = line.split()
510         self.disk_inodes = int(inodes)
511         self.disk_blocks = int(space)
512         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
513
514         return self.disk_blocks * 1024
515
516     def stop(self, signal = signal.SIGKILL):
517         vserverimpl.killall(self.ctx, signal)
518         self.vm_running = False
519
520     def setname(self, slice_id):
521         '''Set vcVHI_CONTEXT field in kernel to slice_id'''
522         vserverimpl.setname(self.ctx, slice_id)
523
524     def getname(self):
525         '''Get vcVHI_CONTEXT field in kernel'''
526         return vserverimpl.getname(self.ctx)
527
528
529 def create(vm_name, static = False, ctor = VServer):
530
531     options = ['vuseradd']
532     if static:
533         options += ['--static']
534     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
535     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
536         out = "system command ('%s') " % options
537         if os.WIFEXITED(ret):
538             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
539         else:
540             out += "killed by signal %d" % os.WTERMSIG(ret)
541         raise SystemError, out
542     vm_id = pwd.getpwnam(vm_name)[2]
543
544     return ctor(vm_name, vm_id)
545
546
547 def close_nonstandard_fds():
548     """Close all open file descriptors other than 0, 1, and 2."""
549     _SC_OPEN_MAX = 4
550     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
551         try: os.close(fd)
552         except OSError: pass  # most likely an fd that isn't open
553