re-add Marc's patch that got lost in the merge
[util-vserver.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.61 2007/07/18 14:50:49 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import mountimpl
18 import runcmd
19 import utmp
20 import vserverimpl
21 import cpulimit, bwlimit
22
23 from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED
24 from vserverimpl import DLIMIT_INF
25 from vserverimpl import VC_LIM_KEEP
26 from vserverimpl import VLIMIT_NSOCK
27 from vserverimpl import VLIMIT_OPENFD
28 from vserverimpl import VLIMIT_ANON
29 from vserverimpl import VLIMIT_SHMEM
30
31 #
32 # these are the flags taken from the kernel linux/vserver/legacy.h
33 #
34 FLAGS_LOCK = 1
35 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
36 FLAGS_NPROC = 4
37 FLAGS_PRIVATE = 8
38 FLAGS_INIT = 16
39 FLAGS_HIDEINFO = 32
40 FLAGS_ULIMIT = 64
41 FLAGS_NAMESPACE = 128
42
43 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
44             "OPENFD": VLIMIT_OPENFD,
45             "ANON": VLIMIT_ANON,
46             "SHMEM": VLIMIT_SHMEM}
47
48 # add in the platform supported rlimits
49 for entry in resource.__dict__.keys():
50     if entry.find("RLIMIT_")==0:
51         k = entry[len("RLIMIT_"):]
52         if not RLIMITS.has_key(k):
53             RLIMITS[k]=resource.__dict__[entry]
54         else:
55             print "WARNING: duplicate RLIMITS key %s" % k
56
57 class NoSuchVServer(Exception): pass
58
59
60 class VServerConfig:
61     def __init__(self, name, directory):
62         self.name = name
63         self.dir = directory
64
65     def get(self, option, default = None):
66         try:
67             f = open(os.path.join(self.dir, option), "r")
68             buf = f.readline().rstrip()
69             f.close()
70             return buf
71         except IOError, e:
72             if default is not None:
73                 return default
74             else:
75                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
76
77     def update(self, option, value):
78         try:
79             old_umask = os.umask(0022)
80             filename = os.path.join(self.dir, option)
81             try:
82                 os.makedirs(os.path.dirname(filename), 0755)
83             except:
84                 pass
85             f = open(filename, 'w')
86             if isinstance(value, list):
87                 f.write("%s\n" % "\n".join(value))
88             else:
89                 f.write("%s\n" % value)
90             f.close()
91             os.umask(old_umask)
92         except KeyError, e:
93             raise KeyError, "Don't know how to handle %s, sorry" % option
94
95
96 class VServer:
97
98     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
99                    ('/etc/rc.d/rc', '%(runlevel)d')]
100
101     def __init__(self, name, vm_id = None, vm_running = None):
102
103         self.name = name
104         self.rlimits_changed = False
105         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
106         if not (os.path.isdir(self.dir) and
107                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
108             raise NoSuchVServer, "no such vserver: " + name
109         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
110         self.remove_caps = ~vserverimpl.CAP_SAFE;
111         if vm_id == None:
112             vm_id = int(self.config.get('context'))
113         self.ctx = vm_id
114         if vm_running == None:
115             vm_running = self.is_running()
116         self.vm_running = vm_running
117
118     def have_limits_changed(self):
119         return self.rlimits_changed
120
121     def set_rlimit_limit(self,type,hard,soft,minimum):
122         """Generic set resource limit function for vserver"""
123         global RLIMITS
124         changed = False
125         try:
126             old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
127             if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
128             if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
129             if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
130             self.rlimits_changed = self.rlimits_changed or changed 
131         except OSError, e:
132             if self.is_running(): print "Unexpected error with getrlimit for running context %d" % self.ctx
133
134         resource_type = RLIMITS[type]
135         try:
136             ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
137         except OSError, e:
138             if self.is_running(): print "Unexpected error with setrlimit for running context %d" % self.ctx
139
140     def set_rlimit_config(self,type,hard,soft,minimum):
141         """Generic set resource limit function for vserver"""
142         if hard <> VC_LIM_KEEP:
143             self.config.update('rlimits/%s.hard' % type.lower(), hard)
144         if soft <> VC_LIM_KEEP:
145             self.config.update('rlimits/%s.soft' % type.lower(), soft)
146         if minimum <> VC_LIM_KEEP:
147             self.config.update('rlimits/%s.min' % type.lower(), minimum)
148         self.set_rlimit_limit(type,hard,soft,minimum)
149
150     def get_rlimit_limit(self,type):
151         """Generic get resource configuration function for vserver"""
152         global RLIMITS
153         resource_type = RLIMITS[type]
154         try:
155             ret = vserverimpl.getrlimit(self.ctx,resource_type)
156         except OSError, e:
157             print "Unexpected error with getrlimit for context %d" % self.ctx
158             ret = self.get_rlimit_config(type)
159         return ret
160
161     def get_rlimit_config(self,type):
162         """Generic get resource configuration function for vserver"""
163         hard = int(self.config.get("rlimits/%s.hard"%type.lower(),VC_LIM_KEEP))
164         soft = int(self.config.get("rlimits/%s.soft"%type.lower(),VC_LIM_KEEP))
165         minimum = int(self.config.get("rlimits/%s.min"%type.lower(),VC_LIM_KEEP))
166         return (hard,soft,minimum)
167
168     def set_WHITELISTED_config(self,whitelisted):
169         self.config.update('whitelisted', whitelisted)
170
171     def set_capabilities(self, capabilities):
172         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
173
174     def set_capabilities_config(self, capabilities):
175         self.config.update('bcapabilities', capabilities)
176         self.set_capabilities(capabilities)
177
178     def get_capabilities(self):
179         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
180  
181     def get_capabilities_config(self):
182         return self.config.get('bcapabilities', '')
183
184     def __do_chroot(self):
185
186         os.chroot(self.dir)
187         os.chdir("/")
188
189     def chroot_call(self, fn, *args):
190
191         cwd_fd = os.open(".", os.O_RDONLY)
192         try:
193             root_fd = os.open("/", os.O_RDONLY)
194             try:
195                 self.__do_chroot()
196                 result = fn(*args)
197             finally:
198                 os.fchdir(root_fd)
199                 os.chroot(".")
200                 os.fchdir(cwd_fd)
201                 os.close(root_fd)
202         finally:
203             os.close(cwd_fd)
204         return result
205
206     def set_disklimit(self, block_limit):
207         # block_limit is in kB
208         if block_limit == 0:
209             try:
210                 vserverimpl.unsetdlimit(self.dir, self.ctx)
211             except OSError, e:
212                 print "Unexpected error with unsetdlimit for context %d" % self.ctx
213             return
214
215         if self.vm_running:
216             block_usage = vserverimpl.DLIMIT_KEEP
217             inode_usage = vserverimpl.DLIMIT_KEEP
218         else:
219             # init_disk_info() must have been called to get usage values
220             block_usage = self.disk_blocks
221             inode_usage = self.disk_inodes
222
223
224         try:
225             vserverimpl.setdlimit(self.dir,
226                                   self.ctx,
227                                   block_usage,
228                                   block_limit,
229                                   inode_usage,
230                                   vserverimpl.DLIMIT_INF,  # inode limit
231                                   2)   # %age reserved for root
232         except OSError, e:
233             print "Unexpected error with setdlimit for context %d" % self.ctx
234
235
236         self.config.update('dlimits/0/space_total', block_limit)
237
238     def is_running(self):
239         return vserverimpl.isrunning(self.ctx)
240     
241     def get_disklimit(self):
242
243         try:
244             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
245              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
246         except OSError, ex:
247             if ex.errno != errno.ESRCH:
248                 raise
249             # get here if no vserver disk limit has been set for xid
250             block_limit = -1
251
252         return block_limit
253
254     def set_sched_config(self, cpu_share, sched_flags):
255
256         """ Write current CPU scheduler parameters to the vserver
257         configuration file. This method does not modify the kernel CPU
258         scheduling parameters for this context. """
259
260         if sched_flags & SCHED_CPU_GUARANTEED:
261             cpu_guaranteed = cpu_share
262         else:
263             cpu_guaranteed = 0
264         self.config.update('sched/fill-rate2', cpu_share)
265         self.config.update('sched/fill-rate', cpu_guaranteed)
266
267         if self.vm_running:
268             self.set_sched(cpu_share, sched_flags)
269
270     def set_sched(self, cpu_share, sched_flags = 0):
271         """ Update kernel CPU scheduling parameters for this context. """
272         vserverimpl.setsched(self.ctx, cpu_share, sched_flags)
273
274     def get_sched(self):
275         # have no way of querying scheduler right now on a per vserver basis
276         return (-1, False)
277
278     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
279                     exempt_min = None, exempt_max = None,
280                     share = None, dev = "eth0"):
281
282         if minrate is None:
283             bwlimit.off(self.ctx, dev)
284         else:
285             bwlimit.on(self.ctx, dev, share,
286                        minrate, maxrate, exempt_min, exempt_max)
287
288     def get_bwlimit(self, dev = "eth0"):
289
290         result = bwlimit.get(self.ctx)
291         # result of bwlimit.get is (ctx, share, minrate, maxrate)
292         if result:
293             result = result[1:]
294         return result
295
296     def open(self, filename, mode = "r", bufsize = -1):
297
298         return self.chroot_call(open, filename, mode, bufsize)
299
300     def __do_chcontext(self, state_file):
301
302         if state_file:
303             print >>state_file, "%u" % self.ctx
304             state_file.close()
305
306         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
307             self.set_resources()
308             vserverimpl.setup_done(self.ctx)
309
310     def __prep(self, runlevel, log):
311
312         """ Perform all the crap that the vserver script does before
313         actually executing the startup scripts. """
314
315         # remove /var/run and /var/lock/subsys files
316         # but don't remove utmp from the top-level /var/run
317         RUNDIR = "/var/run"
318         LOCKDIR = "/var/lock/subsys"
319         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
320         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
321                           (out + map((dir + "/").__add__, ff(files)),
322                            lambda fs: fs)),
323                          list(os.walk(RUNDIR)),
324                          ([], filter_fn))[0]
325         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
326                                               os.listdir(LOCKDIR)))
327         for f in garbage:
328             os.unlink(f)
329
330         # set the initial runlevel
331         f = open(RUNDIR + "/utmp", "w")
332         utmp.set_runlevel(f, runlevel)
333         f.close()
334
335         # mount /proc and /dev/pts
336         self.__do_mount("none", "/proc", "proc")
337         # XXX - magic mount options
338         self.__do_mount("none", "/dev/pts", "devpts", 0, "gid=5,mode=0620")
339
340     def __do_mount(self, *mount_args):
341
342         try:
343             mountimpl.mount(*mount_args)
344         except OSError, ex:
345             if ex.errno == errno.EBUSY:
346                 # assume already mounted
347                 return
348             raise ex
349
350     def enter(self):
351         self.__do_chroot()
352         self.__do_chcontext(None)
353
354     def start(self, wait, runlevel = 3):
355         self.vm_running = True
356         self.rlimits_changed = False
357
358         child_pid = os.fork()
359         if child_pid == 0:
360             # child process
361             try:
362                 # get a new session
363                 os.setsid()
364
365                 # open state file to record vserver info
366                 state_file = open("/var/run/vservers/%s" % self.name, "w")
367
368                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
369                 os.close(0)
370                 os.close(1)
371                 os.open("/dev/null", os.O_RDONLY)
372                 self.__do_chroot()
373                 log = open("/var/log/boot.log", "w", 0)
374                 os.dup2(1, 2)
375
376                 print >>log, ("%s: starting the virtual server %s" %
377                               (time.asctime(time.gmtime()), self.name))
378
379                 # perform pre-init cleanup
380                 self.__prep(runlevel, log)
381
382                 # execute each init script in turn
383                 # XXX - we don't support all scripts that vserver script does
384                 self.__do_chcontext(state_file)
385                 for cmd in self.INITSCRIPTS + [None]:
386                      try:
387                          # enter vserver context
388                          arg_subst = { 'runlevel': runlevel }
389                          cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
390                                                    cmd[1:])
391                          print >>log, "executing '%s'" % " ".join(cmd_args)
392                          os.spawnvp(os.P_WAIT,cmd[0],*cmd_args)
393                      except:
394                          traceback.print_exc()
395                          os._exit(1)
396
397             # we get here due to an exception in the top-level child process
398             except Exception, ex:
399                 traceback.print_exc()
400             os._exit(0)
401
402         # parent process
403         return child_pid
404
405     def set_resources(self):
406
407         """ Called when vserver context is entered for first time,
408         should be overridden by subclass. """
409
410         pass
411
412     def init_disk_info(self):
413         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
414         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
415                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
416                              close_fds=True)
417         p.stdin.close()
418         line = p.stdout.readline()
419         if not line:
420             sys.stderr.write(p.stderr.read())
421         p.stdout.close()
422         p.stderr.close()
423         ret = p.wait()
424
425         (space, inodes) = line.split()
426         self.disk_inodes = int(inodes)
427         self.disk_blocks = int(space)
428         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
429
430         return self.disk_blocks * 1024
431
432     def stop(self, signal = signal.SIGKILL):
433         vserverimpl.killall(self.ctx, signal)
434         self.vm_running = False
435         self.rlimits_changed = False
436
437
438
439 def create(vm_name, static = False, ctor = VServer):
440
441     options = []
442     if static:
443         options += ['--static']
444     runcmd.run('vuseradd', options + [vm_name])
445     vm_id = pwd.getpwnam(vm_name)[2]
446
447     return ctor(vm_name, vm_id)