Optimization in case the context is already running.
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
141         if not (os.path.isdir(self.dir) and
142                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
143             raise NoSuchVServer, "no such vserver: " + name
144         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
145         self.remove_caps = ~vserverimpl.CAP_SAFE;
146         if vm_id == None:
147             vm_id = int(self.config.get('context'))
148         self.ctx = vm_id
149         if vm_running == None:
150             vm_running = self.is_running()
151         self.vm_running = vm_running
152         self.logfile = logfile
153
154     # inspired from nodemanager's logger
155     def log(self,msg):
156         if self.logfile:
157             try:
158                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
159                 if not msg.endswith('\n'): msg += '\n'
160                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
161                 os.close(fd)
162             except:
163                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
164
165     def set_rlimit(self, type, hard, soft, min):
166         """Generic set resource limit function for vserver"""
167         global RLIMITS
168         update = False
169
170         if hard <> VC_LIM_KEEP:
171             self.config.update('rlimits/%s.hard' % type.lower(), hard)
172             update = True
173         if soft <> VC_LIM_KEEP:
174             self.config.update('rlimits/%s.soft' % type.lower(), soft)
175             update = True
176         if min <> VC_LIM_KEEP:
177             self.config.update('rlimits/%s.min' % type.lower(), min)
178             update = True
179
180         if self.is_running() and update:
181             resource_type = RLIMITS[type]
182             try:
183                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
184             except OSError, e:
185                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
186                          % (self.ctx, type.lower(), hard, soft, min))
187
188         return update
189
190     def set_capabilities(self, capabilities):
191         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
192
193     def set_capabilities_config(self, capabilities):
194         self.config.update('bcapabilities', capabilities)
195         self.set_capabilities(capabilities)
196
197     def get_capabilities(self):
198         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
199  
200     def get_capabilities_config(self):
201         return self.config.get('bcapabilities', '')
202
203     def set_ipaddresses(self, addresses):
204         vserverimpl.netremove(self.ctx, "all")
205         for a in addresses.split(","):
206             vserverimpl.netadd(self.ctx, a)
207
208     def set_ipaddresses_config(self, addresses):
209         i = 0
210         for a in addresses.split(","):
211             self.config.update("interfaces/%d/ip" % i, a)
212             i += 1
213         while self.config.unset("interfaces/%d/ip" % i):
214             i += 1
215         self.set_ipaddresses(addresses)
216
217     def get_ipaddresses_config(self):
218         i = 0
219         ret = []
220         while True:
221             r = self.config.get("interfaces/%d/ip" % i, '')
222             if r == '':
223                 break
224             ret += [r]
225             i += 1
226         return ",".join(ret)
227
228     def get_ipaddresses(self):
229         # No clean way to do this right now.
230         return None
231
232     def __do_chroot(self):
233         os.chroot(self.dir)
234         os.chdir("/")
235
236     def chroot_call(self, fn, *args):
237
238         cwd_fd = os.open(".", os.O_RDONLY)
239         try:
240             root_fd = os.open("/", os.O_RDONLY)
241             try:
242                 self.__do_chroot()
243                 result = fn(*args)
244             finally:
245                 os.fchdir(root_fd)
246                 os.chroot(".")
247                 os.fchdir(cwd_fd)
248                 os.close(root_fd)
249         finally:
250             os.close(cwd_fd)
251         return result
252
253     def set_disklimit(self, block_limit):
254         # block_limit is in kB
255         if block_limit == 0:
256             try:
257                 vserverimpl.unsetdlimit(self.dir, self.ctx)
258             except OSError, e:
259                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
260             return
261
262         if self.vm_running:
263             block_usage = vserverimpl.DLIMIT_KEEP
264             inode_usage = vserverimpl.DLIMIT_KEEP
265         else:
266             # init_disk_info() must have been called to get usage values
267             block_usage = self.disk_blocks
268             inode_usage = self.disk_inodes
269
270         try:
271             vserverimpl.setdlimit(self.dir,
272                                   self.ctx,
273                                   block_usage,
274                                   block_limit,
275                                   inode_usage,
276                                   vserverimpl.DLIMIT_INF,  # inode limit
277                                   2)   # %age reserved for root
278         except OSError, e:
279             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
280
281
282         self.config.update('dlimits/0/space_total', block_limit)
283
284     def is_running(self):
285         return vserverimpl.isrunning(self.ctx)
286     
287     def get_disklimit(self):
288
289         try:
290             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
291              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
292         except OSError, ex:
293             if ex.errno != errno.ESRCH:
294                 raise
295             # get here if no vserver disk limit has been set for xid
296             block_limit = -1
297
298         return block_limit
299
300     def set_sched_config(self, cpu_min, cpu_share):
301
302         """ Write current CPU scheduler parameters to the vserver
303         configuration file. This method does not modify the kernel CPU
304         scheduling parameters for this context. """
305
306         self.config.update('sched/fill-rate', cpu_min)
307         self.config.update('sched/fill-rate2', cpu_share)
308         if cpu_share == 0:
309             self.config.unset('sched/idle-time')
310         
311         if self.is_running():
312             self.set_sched(cpu_min, cpu_share)
313
314     def set_sched(self, cpu_min, cpu_share):
315         """ Update kernel CPU scheduling parameters for this context. """
316         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
317
318     def get_sched(self):
319         # have no way of querying scheduler right now on a per vserver basis
320         return (-1, False)
321
322     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
323                     exempt_min = None, exempt_max = None,
324                     share = None, dev = "eth0"):
325
326         if minrate is None:
327             bwlimit.off(self.ctx, dev)
328         else:
329             bwlimit.on(self.ctx, dev, share,
330                        minrate, maxrate, exempt_min, exempt_max)
331
332     def get_bwlimit(self, dev = "eth0"):
333
334         result = bwlimit.get(self.ctx)
335         # result of bwlimit.get is (ctx, share, minrate, maxrate)
336         if result:
337             result = result[1:]
338         return result
339
340     def open(self, filename, mode = "r", bufsize = -1):
341
342         return self.chroot_call(open, filename, mode, bufsize)
343
344     def __do_chcontext(self, state_file):
345
346         if state_file:
347             print >>state_file, "%u" % self.ctx
348             state_file.close()
349
350         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
351             self.set_resources()
352             vserverimpl.setup_done(self.ctx)
353
354     def __prep(self, runlevel):
355
356         """ Perform all the crap that the vserver script does before
357         actually executing the startup scripts. """
358
359         # remove /var/run and /var/lock/subsys files
360         # but don't remove utmp from the top-level /var/run
361         RUNDIR = "/var/run"
362         LOCKDIR = "/var/lock/subsys"
363         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
364         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
365                           (out + map((dir + "/").__add__, ff(files)),
366                            lambda fs: fs)),
367                          list(os.walk(RUNDIR)),
368                          ([], filter_fn))[0]
369         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
370                                               os.listdir(LOCKDIR)))
371         if False:
372             for f in garbage:
373                 os.unlink(f)
374
375         # set the initial runlevel
376         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
377
378         # mount /proc and /dev/pts
379         self.__do_mount("none", self.dir, "/proc", "proc")
380         # XXX - magic mount options
381         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
382
383     def __do_mount(self, *mount_args):
384
385         try:
386             vserverimpl.mount(*mount_args)
387         except OSError, ex:
388             if ex.errno == errno.EBUSY:
389                 # assume already mounted
390                 return
391             raise ex
392
393     def enter(self):
394         self.config.cache_it()
395         self.__do_chroot()
396         self.__do_chcontext(None)
397
398     def start(self, runlevel = 3):
399
400         if (os.fork() != 0):
401             # Parent should just return.
402             self.vm_running = True
403             return
404         else:
405             # child process
406             try:
407                 # so we don't chcontext with priv'ed fds
408                 close_nonstandard_fds()
409
410                 # get a new session
411                 os.setsid()
412
413                 # open state file to record vserver info
414                 state_file = open("/var/run/vservers/%s" % self.name, "w")
415
416                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
417                 fd = os.open("/dev/null", os.O_RDONLY)
418                 if fd != 0:
419                     os.dup2(fd, 0)
420                     os.close(fd)
421  
422                 # perform pre-init cleanup
423                 self.__prep(runlevel)
424
425                 self.config.cache_it()
426                 self.__do_chroot()
427                 log = open("/var/log/boot.log", "a", 0)
428                 if log.fileno() != 1:
429                     os.dup2(log.fileno(), 1)
430                 os.dup2(1, 2)
431
432                 print >>log, ("%s: starting the virtual server %s" %
433                               (time.asctime(time.gmtime()), self.name))
434                 # execute each init script in turn
435                 # XXX - we don't support all scripts that vserver script does
436                 self.__do_chcontext(state_file)
437                 for cmd in self.INITSCRIPTS:
438                     try:
439                         # enter vserver context
440                         arg_subst = { 'runlevel': runlevel }
441                         cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
442                                                    cmd[1:])
443                         if os.path.isfile(cmd[0]):                         
444                             print >>log, "executing '%s'" % " ".join(cmd_args)
445                             os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
446                     except:
447                         print >>log, traceback.format_exc()
448
449             # we get here due to an exception in the top-level child process
450             except Exception, ex:
451                 self.log(traceback.format_exc())
452             os._exit(0)
453
454     def set_resources(self):
455
456         """ Called when vserver context is entered for first time,
457         should be overridden by subclass. """
458
459         pass
460
461     def init_disk_info(self):
462         try:
463             dlimit = vserver.getdlimit(self.dir, self.ctx)
464             self.disk_blocks = dlimit[0]
465             self.disk_inodes = dlimit[2]
466             return self.disk_blocks * 1024
467         except Exception, e:
468             pass
469         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
470         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
471                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
472                              close_fds=True)
473         p.stdin.close()
474         line = p.stdout.readline()
475         if not line:
476             sys.stderr.write(p.stderr.read())
477         p.stdout.close()
478         p.stderr.close()
479         ret = p.wait()
480
481         (space, inodes) = line.split()
482         self.disk_inodes = int(inodes)
483         self.disk_blocks = int(space)
484         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
485
486         return self.disk_blocks * 1024
487
488     def stop(self, signal = signal.SIGKILL):
489         vserverimpl.killall(self.ctx, signal)
490         self.vm_running = False
491
492     def setname(self, slice_id):
493         '''Set vcVHI_CONTEXT field in kernel to slice_id'''
494         vserverimpl.setname(self.ctx, slice_id)
495
496     def getname(self):
497         '''Get vcVHI_CONTEXT field in kernel'''
498         return vserverimpl.getname(self.ctx)
499
500
501 def create(vm_name, static = False, ctor = VServer):
502
503     options = ['vuseradd']
504     if static:
505         options += ['--static']
506     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
507     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
508         out = "system command ('%s') " % options
509         if os.WIFEXITED(ret):
510             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
511         else:
512             out += "killed by signal %d" % os.WTERMSIG(ret)
513         raise SystemError, out
514     vm_id = pwd.getpwnam(vm_name)[2]
515
516     return ctor(vm_name, vm_id)
517
518
519 def close_nonstandard_fds():
520     """Close all open file descriptors other than 0, 1, and 2."""
521     _SC_OPEN_MAX = 4
522     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
523         try: os.close(fd)
524         except OSError: pass  # most likely an fd that isn't open
525