If it's busy, assume it's already mounted.
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
141         if not (os.path.isdir(self.dir) and
142                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
143             raise NoSuchVServer, "no such vserver: " + name
144         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
145         self.remove_caps = ~vserverimpl.CAP_SAFE;
146         if vm_id == None:
147             vm_id = int(self.config.get('context'))
148         self.ctx = vm_id
149         if vm_running == None:
150             vm_running = self.is_running()
151         self.vm_running = vm_running
152         self.logfile = logfile
153
154     # inspired from nodemanager's logger
155     def log(self,msg):
156         if self.logfile:
157             try:
158                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
159                 if not msg.endswith('\n'): msg += '\n'
160                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
161                 os.close(fd)
162             except:
163                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
164
165     def set_rlimit(self, type, hard, soft, min):
166         """Generic set resource limit function for vserver"""
167         global RLIMITS
168         update = False
169
170         if hard <> VC_LIM_KEEP:
171             self.config.update('rlimits/%s.hard' % type.lower(), hard)
172             update = True
173         if soft <> VC_LIM_KEEP:
174             self.config.update('rlimits/%s.soft' % type.lower(), soft)
175             update = True
176         if min <> VC_LIM_KEEP:
177             self.config.update('rlimits/%s.min' % type.lower(), min)
178             update = True
179
180         if self.is_running() and update:
181             resource_type = RLIMITS[type]
182             try:
183                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
184             except OSError, e:
185                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
186                          % (self.ctx, type.lower(), hard, soft, min))
187
188         return update
189
190     def set_capabilities(self, capabilities):
191         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
192
193     def set_capabilities_config(self, capabilities):
194         self.config.update('bcapabilities', capabilities)
195         self.set_capabilities(capabilities)
196
197     def get_capabilities(self):
198         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
199  
200     def get_capabilities_config(self):
201         return self.config.get('bcapabilities', '')
202
203     def set_ipaddresses(self, addresses):
204         vserverimpl.netremove(self.ctx, "all")
205         for a in addresses.split(","):
206             vserverimpl.netadd(self.ctx, a)
207
208     def set_ipaddresses_config(self, addresses):
209         i = 0
210         for a in addresses.split(","):
211             self.config.update("interfaces/%d/ip" % i, a)
212             i += 1
213         while self.config.unset("interfaces/%d/ip" % i):
214             i += 1
215         self.set_ipaddresses(addresses)
216
217     def get_ipaddresses_config(self):
218         i = 0
219         ret = []
220         while True:
221             r = self.config.get("interfaces/%d/ip" % i, '')
222             if r == '':
223                 break
224             ret += [r]
225             i += 1
226         return ",".join(ret)
227
228     def get_ipaddresses(self):
229         # No clean way to do this right now.
230         return None
231
232     def __do_chroot(self):
233         os.chroot(self.dir)
234         os.chdir("/")
235
236     def chroot_call(self, fn, *args):
237
238         cwd_fd = os.open(".", os.O_RDONLY)
239         try:
240             root_fd = os.open("/", os.O_RDONLY)
241             try:
242                 self.__do_chroot()
243                 result = fn(*args)
244             finally:
245                 os.fchdir(root_fd)
246                 os.chroot(".")
247                 os.fchdir(cwd_fd)
248                 os.close(root_fd)
249         finally:
250             os.close(cwd_fd)
251         return result
252
253     def set_disklimit(self, block_limit):
254         # block_limit is in kB
255         if block_limit == 0:
256             try:
257                 vserverimpl.unsetdlimit(self.dir, self.ctx)
258             except OSError, e:
259                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
260             return
261
262         if self.vm_running:
263             block_usage = vserverimpl.DLIMIT_KEEP
264             inode_usage = vserverimpl.DLIMIT_KEEP
265         else:
266             # init_disk_info() must have been called to get usage values
267             block_usage = self.disk_blocks
268             inode_usage = self.disk_inodes
269
270
271         try:
272             vserverimpl.setdlimit(self.dir,
273                                   self.ctx,
274                                   block_usage,
275                                   block_limit,
276                                   inode_usage,
277                                   vserverimpl.DLIMIT_INF,  # inode limit
278                                   2)   # %age reserved for root
279         except OSError, e:
280             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
281
282
283         self.config.update('dlimits/0/space_total', block_limit)
284
285     def is_running(self):
286         return vserverimpl.isrunning(self.ctx)
287     
288     def get_disklimit(self):
289
290         try:
291             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
292              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
293         except OSError, ex:
294             if ex.errno != errno.ESRCH:
295                 raise
296             # get here if no vserver disk limit has been set for xid
297             block_limit = -1
298
299         return block_limit
300
301     def set_sched_config(self, cpu_min, cpu_share):
302
303         """ Write current CPU scheduler parameters to the vserver
304         configuration file. This method does not modify the kernel CPU
305         scheduling parameters for this context. """
306
307         self.config.update('sched/fill-rate', cpu_min)
308         self.config.update('sched/fill-rate2', cpu_share)
309         if cpu_share == 0:
310             self.config.unset('sched/idle-time')
311         
312         if self.is_running():
313             self.set_sched(cpu_min, cpu_share)
314
315     def set_sched(self, cpu_min, cpu_share):
316         """ Update kernel CPU scheduling parameters for this context. """
317         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
318
319     def get_sched(self):
320         # have no way of querying scheduler right now on a per vserver basis
321         return (-1, False)
322
323     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
324                     exempt_min = None, exempt_max = None,
325                     share = None, dev = "eth0"):
326
327         if minrate is None:
328             bwlimit.off(self.ctx, dev)
329         else:
330             bwlimit.on(self.ctx, dev, share,
331                        minrate, maxrate, exempt_min, exempt_max)
332
333     def get_bwlimit(self, dev = "eth0"):
334
335         result = bwlimit.get(self.ctx)
336         # result of bwlimit.get is (ctx, share, minrate, maxrate)
337         if result:
338             result = result[1:]
339         return result
340
341     def open(self, filename, mode = "r", bufsize = -1):
342
343         return self.chroot_call(open, filename, mode, bufsize)
344
345     def __do_chcontext(self, state_file):
346
347         if state_file:
348             print >>state_file, "%u" % self.ctx
349             state_file.close()
350
351         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
352             self.set_resources()
353             vserverimpl.setup_done(self.ctx)
354
355     def __prep(self, runlevel):
356
357         """ Perform all the crap that the vserver script does before
358         actually executing the startup scripts. """
359
360         # remove /var/run and /var/lock/subsys files
361         # but don't remove utmp from the top-level /var/run
362         RUNDIR = "/var/run"
363         LOCKDIR = "/var/lock/subsys"
364         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
365         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
366                           (out + map((dir + "/").__add__, ff(files)),
367                            lambda fs: fs)),
368                          list(os.walk(RUNDIR)),
369                          ([], filter_fn))[0]
370         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
371                                               os.listdir(LOCKDIR)))
372         if False:
373             for f in garbage:
374                 os.unlink(f)
375
376         # set the initial runlevel
377         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
378
379         # mount /proc and /dev/pts
380         self.__do_mount("none", self.dir, "/proc", "proc")
381         # XXX - magic mount options
382         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
383
384     def __do_mount(self, *mount_args):
385
386         try:
387             vserverimpl.mount(*mount_args)
388         except OSError, ex:
389             if ex.errno == errno.EBUSY:
390                 # assume already mounted
391                 return
392             raise ex
393
394     def enter(self):
395         self.config.cache_it()
396         self.__do_chroot()
397         self.__do_chcontext(None)
398
399     def start(self, wait, runlevel = 3):
400         self.vm_running = True
401
402         child_pid = os.fork()
403         if child_pid == 0:
404             # child process
405             try:
406                 # get a new session
407                 os.setsid()
408
409                 # open state file to record vserver info
410                 state_file = open("/var/run/vservers/%s" % self.name, "w")
411
412                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
413                 fd = os.open("/dev/null", os.O_RDONLY)
414                 if fd != 0:
415                     os.dup2(fd, 0)
416                     os.close(fd)
417  
418                 # perform pre-init cleanup
419                 self.__prep(runlevel)
420
421                 self.config.cache_it()
422                 self.__do_chroot()
423                 log = open("/var/log/boot.log", "a", 0)
424                 if log.fileno() != 1:
425                     os.dup2(log.fileno(), 1)
426                 os.dup2(1, 2)
427
428                 print >>log, ("%s: starting the virtual server %s" %
429                               (time.asctime(time.gmtime()), self.name))
430                 # execute each init script in turn
431                 # XXX - we don't support all scripts that vserver script does
432                 self.__do_chcontext(state_file)
433                 for cmd in self.INITSCRIPTS:
434                      try:
435                          # enter vserver context
436                          arg_subst = { 'runlevel': runlevel }
437                          cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
438                                                    cmd[1:])
439                          print >>log, "executing '%s'" % " ".join(cmd_args)
440                          os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
441                      except:
442                          print >>log, traceback.format_exc()
443                          os._exit(1)
444
445             # we get here due to an exception in the top-level child process
446             except Exception, ex:
447                 self.log(traceback.format_exc())
448             os._exit(0)
449
450         # parent process
451         return child_pid
452
453     def set_resources(self):
454
455         """ Called when vserver context is entered for first time,
456         should be overridden by subclass. """
457
458         pass
459
460     def init_disk_info(self):
461         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
462         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
463                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
464                              close_fds=True)
465         p.stdin.close()
466         line = p.stdout.readline()
467         if not line:
468             sys.stderr.write(p.stderr.read())
469         p.stdout.close()
470         p.stderr.close()
471         ret = p.wait()
472
473         (space, inodes) = line.split()
474         self.disk_inodes = int(inodes)
475         self.disk_blocks = int(space)
476         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
477
478         return self.disk_blocks * 1024
479
480     def stop(self, signal = signal.SIGKILL):
481         vserverimpl.killall(self.ctx, signal)
482         self.vm_running = False
483
484
485
486 def create(vm_name, static = False, ctor = VServer):
487
488     options = ['vuseradd']
489     if static:
490         options += ['--static']
491     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
492     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
493         out = "system command ('%s') " % options
494         if os.WIFEXITED(ret):
495             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
496         else:
497             out += "killed by signal %d" % os.WTERMSIG(ret)
498         raise SystemError, out
499     vm_id = pwd.getpwnam(vm_name)[2]
500
501     return ctor(vm_name, vm_id)