Don't nuke /var/run.
[util-vserver.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 import errno
4 import fcntl
5 import os
6 import re
7 import pwd
8 import signal
9 import sys
10 import time
11 import traceback
12
13 import mountimpl
14 import runcmd
15 import utmp
16 import vserverimpl, vduimpl
17 import cpulimit, bwlimit
18
19 from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22
23 from vserverimpl import RLIMIT_CPU
24 from vserverimpl import RLIMIT_RSS
25 from vserverimpl import RLIMIT_NPROC
26 from vserverimpl import RLIMIT_NOFILE
27 from vserverimpl import RLIMIT_MEMLOCK
28 from vserverimpl import RLIMIT_AS
29 from vserverimpl import RLIMIT_LOCKS
30 from vserverimpl import RLIMIT_SIGPENDING
31 from vserverimpl import RLIMIT_MSGQUEUE
32 from vserverimpl import VLIMIT_NSOCK
33 from vserverimpl import VLIMIT_OPENFD
34 from vserverimpl import VLIMIT_ANON
35 from vserverimpl import VLIMIT_SHMEM
36
37 #
38 # these are the flags taken from the kernel linux/vserver/legacy.h
39 #
40 FLAGS_LOCK = 1
41 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
42 FLAGS_NPROC = 4
43 FLAGS_PRIVATE = 8
44 FLAGS_INIT = 16
45 FLAGS_HIDEINFO = 32
46 FLAGS_ULIMIT = 64
47 FLAGS_NAMESPACE = 128
48
49 RLIMITS = {"CPU": RLIMIT_CPU,
50            "RSS": RLIMIT_RSS,
51            "NPROC": RLIMIT_NPROC,
52            "NOFILE": RLIMIT_NOFILE,
53            "MEMLOCK": RLIMIT_MEMLOCK,
54            "AS": RLIMIT_AS,
55            "LOCKS": RLIMIT_LOCKS,
56            "SIGPENDING": RLIMIT_SIGPENDING,
57            "MSGQUEUE": RLIMIT_MSGQUEUE,
58            "NSOCK": VLIMIT_NSOCK,
59            "OPENFD": VLIMIT_OPENFD,
60            "ANON": VLIMIT_ANON,
61            "SHMEM": VLIMIT_SHMEM}
62
63 class NoSuchVServer(Exception): pass
64
65
66 class VServer:
67
68     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
69                    ('/etc/rc.d/rc', '%(runlevel)d')]
70
71     def __init__(self, name, vm_id = None, vm_running = False):
72
73         self.name = name
74         self.rlimits_changed = False
75         self.config_file = "/etc/vservers/%s.conf" % name
76         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
77         if not (os.path.isdir(self.dir) and
78                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
79             raise NoSuchVServer, "no such vserver: " + name
80         self.config = {}
81         for config_file in ["/etc/vservers.conf", self.config_file]:
82             try:
83                 self.config.update(self.__read_config_file(config_file))
84             except IOError, ex:
85                 if ex.errno != errno.ENOENT:
86                     raise
87         self.remove_caps = ~vserverimpl.CAP_SAFE;
88         if vm_id == None:
89             vm_id = int(self.config['S_CONTEXT'])
90         self.ctx = vm_id
91         self.vm_running = vm_running
92
93     def have_limits_changed(self):
94         return self.rlimits_changed
95
96     def set_rlimit_limit(self,type,hard,soft,minimum):
97         """Generic set resource limit function for vserver"""
98         global RLIMITS
99         changed = False
100         try:
101             old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
102             if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
103             if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
104             if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
105             self.rlimits_changed = self.rlimits_changed or changed 
106         except OSError, e:
107             if self.is_running(): print "Unexpected error with getrlimit for running context %d" % self.ctx
108
109         resource_type = RLIMITS[type]
110         try:
111             ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
112         except OSError, e:
113             if self.is_running(): print "Unexpected error with setrlimit for running context %d" % self.ctx
114
115     def set_rlimit_config(self,type,hard,soft,minimum):
116         """Generic set resource limit function for vserver"""
117         resources = {}
118         if hard <> VC_LIM_KEEP:
119             resources["VS_%s_HARD"%type] = hard
120         if soft <> VC_LIM_KEEP:
121             resources["VS_%s_SOFT"%type] = soft
122         if minimum <> VC_LIM_KEEP:
123             resources["VS_%s_MINIMUM"%type] = minimum
124         if len(resources)>0:
125             self.update_resources(resources)
126         self.set_rlimit_limit(type,hard,soft,minimum)
127
128     def get_rlimit_limit(self,type):
129         """Generic get resource configuration function for vserver"""
130         global RLIMITS
131         resource_type = RLIMITS[type]
132         try:
133             ret = vserverimpl.getrlimit(self.ctx,resource_type)
134         except OSError, e:
135             print "Unexpected error with getrlimit for context %d" % self.ctx
136             ret = self.get_rlimit_config(type)
137         return ret
138
139     def get_rlimit_config(self,type):
140         """Generic get resource configuration function for vserver"""
141         hard = int(self.config.get("VS_%s_HARD"%type,VC_LIM_KEEP))
142         soft = int(self.config.get("VS_%s_SOFT"%type,VC_LIM_KEEP))
143         minimum = int(self.config.get("VS_%s_MINIMUM"%type,VC_LIM_KEEP))
144         return (hard,soft,minimum)
145
146     def set_WHITELISTED_config(self,whitelisted):
147         resources = {'VS_WHITELISTED': whitelisted}
148         self.update_resources(resources)
149
150     config_var_re = re.compile(r"^ *([A-Z_]+)=(.*)\n?$", re.MULTILINE)
151
152     def __read_config_file(self, filename):
153
154         f = open(filename, "r")
155         data = f.read()
156         f.close()
157         config = {}
158         for m in self.config_var_re.finditer(data):
159             (key, val) = m.groups()
160             config[key] = val.strip('"')
161         return config
162
163     def __update_config_file(self, filename, newvars):
164
165         # read old file, apply changes
166         f = open(filename, "r")
167         data = f.read()
168         f.close()
169         todo = newvars.copy()
170         changed = False
171         offset = 0
172         for m in self.config_var_re.finditer(data):
173             (key, val) = m.groups()
174             newval = todo.pop(key, None)
175             if newval != None:
176                 data = data[:offset+m.start(2)] + str(newval) + data[offset+m.end(2):]
177                 offset += len(str(newval)) - (m.end(2)-m.start(2))
178                 changed = True
179         for (newkey, newval) in todo.items():
180             data += "%s=%s\n" % (newkey, newval)
181             changed = True
182
183         if not changed:
184             return
185
186         # write new file
187         newfile = filename + ".new"
188         f = open(newfile, "w")
189         f.write(data)
190         f.close()
191
192         # replace old file with new
193         os.rename(newfile, filename)
194
195     def __do_chroot(self):
196
197         os.chroot(self.dir)
198         os.chdir("/")
199
200     def chroot_call(self, fn, *args):
201
202         cwd_fd = os.open(".", os.O_RDONLY)
203         try:
204             root_fd = os.open("/", os.O_RDONLY)
205             try:
206                 self.__do_chroot()
207                 result = fn(*args)
208             finally:
209                 os.fchdir(root_fd)
210                 os.chroot(".")
211                 os.fchdir(cwd_fd)
212                 os.close(root_fd)
213         finally:
214             os.close(cwd_fd)
215         return result
216
217     def set_disklimit(self, block_limit):
218         # block_limit is in kB
219         if block_limit == 0:
220             try:
221                 vserverimpl.unsetdlimit(self.dir, self.ctx)
222             except OSError, e:
223                 print "Unexpected error with unsetdlimit for context %d" % self.ctx
224             return
225
226         if self.vm_running:
227             block_usage = vserverimpl.DLIMIT_KEEP
228             inode_usage = vserverimpl.DLIMIT_KEEP
229         else:
230             # init_disk_info() must have been called to get usage values
231             block_usage = self.disk_blocks
232             inode_usage = self.disk_inodes
233
234
235         try:
236             vserverimpl.setdlimit(self.dir,
237                                   self.ctx,
238                                   block_usage,
239                                   block_limit,
240                                   inode_usage,
241                                   vserverimpl.DLIMIT_INF,  # inode limit
242                                   2)   # %age reserved for root
243         except OSError, e:
244             print "Unexpected error with setdlimit for context %d" % self.ctx
245
246
247         resources = {'VS_DISK_MAX': block_limit}
248         self.update_resources(resources)
249
250     def is_running(self):
251         return vserverimpl.isrunning(self.ctx)
252     
253     def get_disklimit(self):
254
255         try:
256             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
257              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
258         except OSError, ex:
259             if ex.errno != errno.ESRCH:
260                 raise
261             # get here if no vserver disk limit has been set for xid
262             block_limit = -1
263
264         return block_limit
265
266     def set_sched_config(self, cpu_share, sched_flags):
267
268         """ Write current CPU scheduler parameters to the vserver
269         configuration file. This method does not modify the kernel CPU
270         scheduling parameters for this context. """
271
272         if cpu_share == int(self.config.get("CPULIMIT", -1)):
273             return
274         cpu_guaranteed = sched_flags & SCHED_CPU_GUARANTEED
275         cpu_config = { "CPULIMIT": cpu_share, "CPUGUARANTEED": cpu_guaranteed }
276         self.update_resources(cpu_config)
277         if self.vm_running:
278             self.set_sched(cpu_share, sched_flags)
279
280     def set_sched(self, cpu_share, sched_flags = 0):
281         """ Update kernel CPU scheduling parameters for this context. """
282         vserverimpl.setsched(self.ctx, cpu_share, sched_flags)
283
284     def get_sched(self):
285         # have no way of querying scheduler right now on a per vserver basis
286         return (-1, False)
287
288     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
289                     exempt_min = None, exempt_max = None,
290                     share = None, dev = "eth0"):
291
292         if minrate is None:
293             bwlimit.off(self.ctx, dev)
294         else:
295             bwlimit.on(self.ctx, dev, share,
296                        minrate, maxrate, exempt_min, exempt_max)
297
298     def get_bwlimit(self, dev = "eth0"):
299
300         result = bwlimit.get(self.ctx)
301         # result of bwlimit.get is (ctx, share, minrate, maxrate)
302         if result:
303             result = result[1:]
304         return result
305
306     def open(self, filename, mode = "r", bufsize = -1):
307
308         return self.chroot_call(open, filename, mode, bufsize)
309
310     def __do_chcontext(self, state_file):
311
312         if state_file:
313             print >>state_file, "S_CONTEXT=%u" % self.ctx
314             print >>state_file, "S_PROFILE="
315             state_file.close()
316
317         if vserverimpl.chcontext(self.ctx):
318             self.set_resources()
319             vserverimpl.setup_done(self.ctx)
320
321     def __prep(self, runlevel, log):
322
323         """ Perform all the crap that the vserver script does before
324         actually executing the startup scripts. """
325
326         # remove /var/run and /var/lock/subsys files
327         # but don't remove utmp from the top-level /var/run
328         RUNDIR = "/var/run"
329         LOCKDIR = "/var/lock/subsys"
330         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
331         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
332                           (out + map((dir + "/").__add__, ff(files)),
333                            lambda fs: fs)),
334                          list(os.walk(RUNDIR)),
335                          ([], filter_fn))[0]
336         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
337                                               os.listdir(LOCKDIR)))
338         if False:
339             for f in garbage:
340                 os.unlink(f)
341
342         # set the initial runlevel
343         f = open(RUNDIR + "/utmp", "w")
344         utmp.set_runlevel(f, runlevel)
345         f.close()
346
347         # mount /proc and /dev/pts
348         self.__do_mount("none", "/proc", "proc")
349         # XXX - magic mount options
350         self.__do_mount("none", "/dev/pts", "devpts", 0, "gid=5,mode=0620")
351
352     def __do_mount(self, *mount_args):
353
354         try:
355             mountimpl.mount(*mount_args)
356         except OSError, ex:
357             if ex.errno == errno.EBUSY:
358                 # assume already mounted
359                 return
360             raise ex
361
362     def enter(self):
363
364         state_file = open("/var/run/vservers/%s.ctx" % self.name, "w")
365         self.__do_chroot()
366         self.__do_chcontext(state_file)
367
368     def start(self, wait, runlevel = 3):
369         self.vm_running = True
370         self.rlimits_changed = False
371
372         child_pid = os.fork()
373         if child_pid == 0:
374             # child process
375             try:
376                 # get a new session
377                 os.setsid()
378
379                 # open state file to record vserver info
380                 state_file = open("/var/run/vservers/%s.ctx" % self.name, "w")
381
382                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
383                 os.close(0)
384                 os.close(1)
385                 os.open("/dev/null", os.O_RDONLY)
386                 self.__do_chroot()
387                 log = open("/var/log/boot.log", "w", 0)
388                 os.dup2(1, 2)
389
390                 print >>log, ("%s: starting the virtual server %s" %
391                               (time.asctime(time.gmtime()), self.name))
392
393                 # perform pre-init cleanup
394                 self.__prep(runlevel, log)
395
396                 # execute each init script in turn
397                 # XXX - we don't support all scripts that vserver script does
398                 self.__do_chcontext(state_file)
399                 for cmd in self.INITSCRIPTS + [None]:
400                         try:
401                             # enter vserver context
402                             arg_subst = { 'runlevel': runlevel }
403                             cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
404                                             cmd[1:])
405                             print >>log, "executing '%s'" % " ".join(cmd_args)
406                             os.spawnvp(os.P_WAIT,cmd[0],*cmd_args)
407                         except:
408                                 traceback.print_exc()
409                                 os._exit(1)
410
411             # we get here due to an exception in the top-level child process
412             except Exception, ex:
413                 traceback.print_exc()
414             os._exit(0)
415
416         # parent process
417         return child_pid
418
419     def set_resources(self):
420
421         """ Called when vserver context is entered for first time,
422         should be overridden by subclass. """
423
424         pass
425
426     def update_resources(self, resources):
427
428         self.config.update(resources)
429
430         # write new values to configuration file
431         self.__update_config_file(self.config_file, resources)
432
433     def init_disk_info(self):
434
435         (self.disk_inodes, self.disk_blocks, size) = vduimpl.vdu(self.dir)
436
437         return size
438
439     def stop(self, signal = signal.SIGKILL):
440         vserverimpl.killall(self.ctx, signal)
441         self.vm_running = False
442         self.rlimits_changed = False
443
444
445
446 def create(vm_name, static = False, ctor = VServer):
447
448     options = []
449     if static:
450         options += ['--static']
451     runcmd.run('vuseradd', options + [vm_name])
452     vm_id = pwd.getpwnam(vm_name)[2]
453
454     return ctor(vm_name, vm_id)