mount /proc before chroot'ing into filesystem.
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED
21 from vserverimpl import DLIMIT_INF
22 from vserverimpl import VC_LIM_KEEP
23 from vserverimpl import VLIMIT_NSOCK
24 from vserverimpl import VLIMIT_OPENFD
25 from vserverimpl import VLIMIT_ANON
26 from vserverimpl import VLIMIT_SHMEM
27
28 #
29 # these are the flags taken from the kernel linux/vserver/legacy.h
30 #
31 FLAGS_LOCK = 1
32 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
33 FLAGS_NPROC = 4
34 FLAGS_PRIVATE = 8
35 FLAGS_INIT = 16
36 FLAGS_HIDEINFO = 32
37 FLAGS_ULIMIT = 64
38 FLAGS_NAMESPACE = 128
39
40 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
41             "OPENFD": VLIMIT_OPENFD,
42             "ANON": VLIMIT_ANON,
43             "SHMEM": VLIMIT_SHMEM}
44
45 # add in the platform supported rlimits
46 for entry in resource.__dict__.keys():
47     if entry.find("RLIMIT_")==0:
48         k = entry[len("RLIMIT_"):]
49         if not RLIMITS.has_key(k):
50             RLIMITS[k]=resource.__dict__[entry]
51         else:
52             print "WARNING: duplicate RLIMITS key %s" % k
53
54 class NoSuchVServer(Exception): pass
55
56
57 class VServerConfig:
58     def __init__(self, name, directory):
59         self.name = name
60         self.dir = directory
61         self.cache = None
62         if not (os.path.isdir(self.dir) and
63                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
64             raise NoSuchVServer, "%s does not exist" % self.dir
65
66     def get(self, option, default = None):
67         try:
68             if self.cache:
69                 return self.cache[option]
70             else:
71                 f = open(os.path.join(self.dir, option), "r")
72                 buf = f.read().rstrip()
73                 f.close()
74                 return buf
75         except:
76             if default is not None:
77                 return default
78             else:
79                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
80
81     def update(self, option, value):
82         if self.cache:
83             return
84
85         try:
86             old_umask = os.umask(0022)
87             filename = os.path.join(self.dir, option)
88             try:
89                 os.makedirs(os.path.dirname(filename), 0755)
90             except:
91                 pass
92             f = open(filename, 'w')
93             if isinstance(value, list):
94                 f.write("%s\n" % "\n".join(value))
95             else:
96                 f.write("%s\n" % value)
97             f.close()
98             os.umask(old_umask)
99         except:
100             raise
101
102     def unset(self, option):
103         if self.cache:
104             return
105
106         try:
107             filename = os.path.join(self.dir, option)
108             os.unlink(filename)
109             try:
110                 os.removedirs(os.path.dirname(filename))
111             except:
112                 pass
113             return True
114         except:
115             return False
116
117     def cache_it(self):
118         self.cache = {}
119         def add_to_cache(cache, dirname, fnames):
120             for file in fnames:
121                 full_name = os.path.join(dirname, file)
122                 if os.path.islink(full_name):
123                     fnames.remove(file)
124                 elif (os.path.isfile(full_name) and
125                       os.access(full_name, os.R_OK)):
126                     f = open(full_name, "r")
127                     cache[full_name.replace(os.path.join(self.dir, ''),
128                                             '')] = f.read().rstrip()
129                     f.close()
130         os.path.walk(self.dir, add_to_cache, self.cache)
131
132
133 class VServer:
134
135     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
136                    ('/etc/rc.d/rc', '%(runlevel)d')]
137
138     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
139
140         self.name = name
141         self.rlimits_changed = False
142         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
143         if not (os.path.isdir(self.dir) and
144                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
145             raise NoSuchVServer, "no such vserver: " + name
146         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
147         self.remove_caps = ~vserverimpl.CAP_SAFE;
148         if vm_id == None:
149             vm_id = int(self.config.get('context'))
150         self.ctx = vm_id
151         if vm_running == None:
152             vm_running = self.is_running()
153         self.vm_running = vm_running
154         self.logfile = logfile
155
156     # inspired from nodemanager's logger
157     def log(self,msg):
158         if self.logfile:
159             try:
160                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
161                 if not msg.endswith('\n'): msg += '\n'
162                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
163                 os.close(fd)
164             except:
165                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
166
167     def have_limits_changed(self):
168         return self.rlimits_changed
169
170     def set_rlimit_limit(self,type,hard,soft,minimum):
171         """Generic set resource limit function for vserver"""
172         global RLIMITS
173         changed = False
174         try:
175             old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
176             if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
177             if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
178             if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
179             self.rlimits_changed = self.rlimits_changed or changed 
180         except OSError, e:
181             if self.is_running(): self.log("Unexpected error with getrlimit for running context %d" % self.ctx)
182
183         resource_type = RLIMITS[type]
184         try:
185             ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
186         except OSError, e:
187             if self.is_running(): self.log("Unexpected error with setrlimit for running context %d" % self.ctx)
188
189     def set_rlimit_config(self,type,hard,soft,minimum):
190         """Generic set resource limit function for vserver"""
191         if hard <> VC_LIM_KEEP:
192             self.config.update('rlimits/%s.hard' % type.lower(), hard)
193         if soft <> VC_LIM_KEEP:
194             self.config.update('rlimits/%s.soft' % type.lower(), soft)
195         if minimum <> VC_LIM_KEEP:
196             self.config.update('rlimits/%s.min' % type.lower(), minimum)
197         self.set_rlimit_limit(type,hard,soft,minimum)
198
199     def get_rlimit_limit(self,type):
200         """Generic get resource configuration function for vserver"""
201         global RLIMITS
202         resource_type = RLIMITS[type]
203         try:
204             ret = vserverimpl.getrlimit(self.ctx,resource_type)
205         except OSError, e:
206             self.log("Unexpected error with getrlimit for context %d" % self.ctx)
207             ret = self.get_rlimit_config(type)
208         return ret
209
210     def get_rlimit_config(self,type):
211         """Generic get resource configuration function for vserver"""
212         hard = int(self.config.get("rlimits/%s.hard"%type.lower(),VC_LIM_KEEP))
213         soft = int(self.config.get("rlimits/%s.soft"%type.lower(),VC_LIM_KEEP))
214         minimum = int(self.config.get("rlimits/%s.min"%type.lower(),VC_LIM_KEEP))
215         return (hard,soft,minimum)
216
217     def set_capabilities(self, capabilities):
218         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
219
220     def set_capabilities_config(self, capabilities):
221         self.config.update('bcapabilities', capabilities)
222         self.set_capabilities(capabilities)
223
224     def get_capabilities(self):
225         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
226  
227     def get_capabilities_config(self):
228         return self.config.get('bcapabilities', '')
229
230     def set_ipaddresses(self, addresses):
231         vserverimpl.netremove(self.ctx, "all")
232         for a in addresses.split(","):
233             vserverimpl.netadd(self.ctx, a)
234
235     def set_ipaddresses_config(self, addresses):
236         i = 0
237         for a in addresses.split(","):
238             self.config.update("interfaces/%d/ip" % i, a)
239             i += 1
240         while self.config.unset("interfaces/%d/ip" % i):
241             i += 1
242         self.set_ipaddresses(addresses)
243
244     def get_ipaddresses_config(self):
245         i = 0
246         ret = []
247         while True:
248             r = self.config.get("interfaces/%d/ip" % i, '')
249             if r == '':
250                 break
251             ret += [r]
252             i += 1
253         return ",".join(ret)
254
255     def get_ipaddresses(self):
256         # No clean way to do this right now.
257         return None
258
259     def __do_chroot(self):
260         os.chroot(self.dir)
261         os.chdir("/")
262
263     def chroot_call(self, fn, *args):
264
265         cwd_fd = os.open(".", os.O_RDONLY)
266         try:
267             root_fd = os.open("/", os.O_RDONLY)
268             try:
269                 self.__do_chroot()
270                 result = fn(*args)
271             finally:
272                 os.fchdir(root_fd)
273                 os.chroot(".")
274                 os.fchdir(cwd_fd)
275                 os.close(root_fd)
276         finally:
277             os.close(cwd_fd)
278         return result
279
280     def set_disklimit(self, block_limit):
281         # block_limit is in kB
282         if block_limit == 0:
283             try:
284                 vserverimpl.unsetdlimit(self.dir, self.ctx)
285             except OSError, e:
286                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
287             return
288
289         if self.vm_running:
290             block_usage = vserverimpl.DLIMIT_KEEP
291             inode_usage = vserverimpl.DLIMIT_KEEP
292         else:
293             # init_disk_info() must have been called to get usage values
294             block_usage = self.disk_blocks
295             inode_usage = self.disk_inodes
296
297
298         try:
299             vserverimpl.setdlimit(self.dir,
300                                   self.ctx,
301                                   block_usage,
302                                   block_limit,
303                                   inode_usage,
304                                   vserverimpl.DLIMIT_INF,  # inode limit
305                                   2)   # %age reserved for root
306         except OSError, e:
307             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
308
309
310         self.config.update('dlimits/0/space_total', block_limit)
311
312     def is_running(self):
313         return vserverimpl.isrunning(self.ctx)
314     
315     def get_disklimit(self):
316
317         try:
318             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
319              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
320         except OSError, ex:
321             if ex.errno != errno.ESRCH:
322                 raise
323             # get here if no vserver disk limit has been set for xid
324             block_limit = -1
325
326         return block_limit
327
328     def set_sched_config(self, cpu_share, sched_flags):
329
330         """ Write current CPU scheduler parameters to the vserver
331         configuration file. This method does not modify the kernel CPU
332         scheduling parameters for this context. """
333
334         if sched_flags & SCHED_CPU_GUARANTEED:
335             cpu_guaranteed = cpu_share
336         else:
337             cpu_guaranteed = 0
338         self.config.update('sched/fill-rate2', cpu_share)
339         self.config.update('sched/fill-rate', cpu_guaranteed)
340
341         if self.vm_running:
342             self.set_sched(cpu_share, sched_flags)
343
344     def set_sched(self, cpu_share, sched_flags = 0):
345         """ Update kernel CPU scheduling parameters for this context. """
346         vserverimpl.setsched(self.ctx, cpu_share, sched_flags)
347
348     def get_sched(self):
349         # have no way of querying scheduler right now on a per vserver basis
350         return (-1, False)
351
352     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
353                     exempt_min = None, exempt_max = None,
354                     share = None, dev = "eth0"):
355
356         if minrate is None:
357             bwlimit.off(self.ctx, dev)
358         else:
359             bwlimit.on(self.ctx, dev, share,
360                        minrate, maxrate, exempt_min, exempt_max)
361
362     def get_bwlimit(self, dev = "eth0"):
363
364         result = bwlimit.get(self.ctx)
365         # result of bwlimit.get is (ctx, share, minrate, maxrate)
366         if result:
367             result = result[1:]
368         return result
369
370     def open(self, filename, mode = "r", bufsize = -1):
371
372         return self.chroot_call(open, filename, mode, bufsize)
373
374     def __do_chcontext(self, state_file):
375
376         if state_file:
377             print >>state_file, "%u" % self.ctx
378             state_file.close()
379
380         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
381             self.set_resources()
382             vserverimpl.setup_done(self.ctx)
383
384     def __prep(self, runlevel):
385
386         """ Perform all the crap that the vserver script does before
387         actually executing the startup scripts. """
388
389         # remove /var/run and /var/lock/subsys files
390         # but don't remove utmp from the top-level /var/run
391         RUNDIR = "/var/run"
392         LOCKDIR = "/var/lock/subsys"
393         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
394         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
395                           (out + map((dir + "/").__add__, ff(files)),
396                            lambda fs: fs)),
397                          list(os.walk(RUNDIR)),
398                          ([], filter_fn))[0]
399         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
400                                               os.listdir(LOCKDIR)))
401         if False:
402             for f in garbage:
403                 os.unlink(f)
404
405         # set the initial runlevel
406         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
407
408         # mount /proc and /dev/pts
409         self.__do_mount("none", self.dir, "/proc", "proc")
410         # XXX - magic mount options
411         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
412
413     def __do_mount(self, *mount_args):
414
415         try:
416             vserverimpl.mount(*mount_args)
417         except OSError, ex:
418             if ex.errno == errno.EBUSY:
419                 # assume already mounted
420                 return
421             raise ex
422
423     def enter(self):
424         self.config.cache_it()
425         self.__do_chroot()
426         self.__do_chcontext(None)
427
428     def start(self, wait, runlevel = 3):
429         self.vm_running = True
430         self.rlimits_changed = False
431
432         child_pid = os.fork()
433         if child_pid == 0:
434             # child process
435             try:
436                 # get a new session
437                 os.setsid()
438
439                 # open state file to record vserver info
440                 state_file = open("/var/run/vservers/%s" % self.name, "w")
441
442                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
443                 fd = os.open("/dev/null", os.O_RDONLY)
444                 if fd != 0:
445                     os.dup2(fd, 0)
446                     os.close(fd)
447  
448                 # perform pre-init cleanup
449                 self.__prep(runlevel)
450
451                 self.config.cache_it()
452                 self.__do_chroot()
453                 log = open("/var/log/boot.log", "a", 0)
454                 if log.fileno() != 1:
455                     os.dup2(log.fileno(), 1)
456                 os.dup2(1, 2)
457
458                 print >>log, ("%s: starting the virtual server %s" %
459                               (time.asctime(time.gmtime()), self.name))
460                 # execute each init script in turn
461                 # XXX - we don't support all scripts that vserver script does
462                 self.__do_chcontext(state_file)
463                 for cmd in self.INITSCRIPTS:
464                      try:
465                          # enter vserver context
466                          arg_subst = { 'runlevel': runlevel }
467                          cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
468                                                    cmd[1:])
469                          print >>log, "executing '%s'" % " ".join(cmd_args)
470                          os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
471                      except:
472                          print >>log, traceback.format_exc()
473                          os._exit(1)
474
475             # we get here due to an exception in the top-level child process
476             except Exception, ex:
477                 self.log(traceback.format_exc())
478             os._exit(0)
479
480         # parent process
481         return child_pid
482
483     def set_resources(self):
484
485         """ Called when vserver context is entered for first time,
486         should be overridden by subclass. """
487
488         pass
489
490     def init_disk_info(self):
491         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
492         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
493                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
494                              close_fds=True)
495         p.stdin.close()
496         line = p.stdout.readline()
497         if not line:
498             sys.stderr.write(p.stderr.read())
499         p.stdout.close()
500         p.stderr.close()
501         ret = p.wait()
502
503         (space, inodes) = line.split()
504         self.disk_inodes = int(inodes)
505         self.disk_blocks = int(space)
506         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
507
508         return self.disk_blocks * 1024
509
510     def stop(self, signal = signal.SIGKILL):
511         vserverimpl.killall(self.ctx, signal)
512         self.vm_running = False
513         self.rlimits_changed = False
514
515
516
517 def create(vm_name, static = False, ctor = VServer):
518
519     options = ['vuseradd']
520     if static:
521         options += ['--static']
522     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
523     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
524         out = "system command ('%s') " % options
525         if os.WIFEXITED(ret):
526             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
527         else:
528             out += "killed by signal %d" % os.WTERMSIG(ret)
529         raise SystemError, out
530     vm_id = pwd.getpwnam(vm_name)[2]
531
532     return ctor(vm_name, vm_id)