Update CPU token buckets of running vserver
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.rlimits_changed = False
141         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
142         if not (os.path.isdir(self.dir) and
143                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
144             raise NoSuchVServer, "no such vserver: " + name
145         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
146         self.remove_caps = ~vserverimpl.CAP_SAFE;
147         if vm_id == None:
148             vm_id = int(self.config.get('context'))
149         self.ctx = vm_id
150         if vm_running == None:
151             vm_running = self.is_running()
152         self.vm_running = vm_running
153         self.logfile = logfile
154
155     # inspired from nodemanager's logger
156     def log(self,msg):
157         if self.logfile:
158             try:
159                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
160                 if not msg.endswith('\n'): msg += '\n'
161                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
162                 os.close(fd)
163             except:
164                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
165
166     def have_limits_changed(self):
167         return self.rlimits_changed
168
169     def set_rlimit_limit(self,type,hard,soft,minimum):
170         """Generic set resource limit function for vserver"""
171         global RLIMITS
172         changed = False
173         try:
174             old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
175             if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
176             if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
177             if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
178             self.rlimits_changed = self.rlimits_changed or changed 
179         except OSError, e:
180             if self.is_running(): self.log("Unexpected error with getrlimit for running context %d" % self.ctx)
181
182         resource_type = RLIMITS[type]
183         try:
184             ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
185         except OSError, e:
186             if self.is_running(): self.log("Unexpected error with setrlimit for running context %d" % self.ctx)
187
188     def set_rlimit_config(self,type,hard,soft,minimum):
189         """Generic set resource limit function for vserver"""
190         if hard <> VC_LIM_KEEP:
191             self.config.update('rlimits/%s.hard' % type.lower(), hard)
192         if soft <> VC_LIM_KEEP:
193             self.config.update('rlimits/%s.soft' % type.lower(), soft)
194         if minimum <> VC_LIM_KEEP:
195             self.config.update('rlimits/%s.min' % type.lower(), minimum)
196         self.set_rlimit_limit(type,hard,soft,minimum)
197
198     def get_rlimit_limit(self,type):
199         """Generic get resource configuration function for vserver"""
200         global RLIMITS
201         resource_type = RLIMITS[type]
202         try:
203             ret = vserverimpl.getrlimit(self.ctx,resource_type)
204         except OSError, e:
205             self.log("Unexpected error with getrlimit for context %d" % self.ctx)
206             ret = self.get_rlimit_config(type)
207         return ret
208
209     def get_rlimit_config(self,type):
210         """Generic get resource configuration function for vserver"""
211         hard = int(self.config.get("rlimits/%s.hard"%type.lower(),VC_LIM_KEEP))
212         soft = int(self.config.get("rlimits/%s.soft"%type.lower(),VC_LIM_KEEP))
213         minimum = int(self.config.get("rlimits/%s.min"%type.lower(),VC_LIM_KEEP))
214         return (hard,soft,minimum)
215
216     def set_capabilities(self, capabilities):
217         return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
218
219     def set_capabilities_config(self, capabilities):
220         self.config.update('bcapabilities', capabilities)
221         self.set_capabilities(capabilities)
222
223     def get_capabilities(self):
224         return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
225  
226     def get_capabilities_config(self):
227         return self.config.get('bcapabilities', '')
228
229     def set_ipaddresses(self, addresses):
230         vserverimpl.netremove(self.ctx, "all")
231         for a in addresses.split(","):
232             vserverimpl.netadd(self.ctx, a)
233
234     def set_ipaddresses_config(self, addresses):
235         i = 0
236         for a in addresses.split(","):
237             self.config.update("interfaces/%d/ip" % i, a)
238             i += 1
239         while self.config.unset("interfaces/%d/ip" % i):
240             i += 1
241         self.set_ipaddresses(addresses)
242
243     def get_ipaddresses_config(self):
244         i = 0
245         ret = []
246         while True:
247             r = self.config.get("interfaces/%d/ip" % i, '')
248             if r == '':
249                 break
250             ret += [r]
251             i += 1
252         return ",".join(ret)
253
254     def get_ipaddresses(self):
255         # No clean way to do this right now.
256         return None
257
258     def __do_chroot(self):
259         os.chroot(self.dir)
260         os.chdir("/")
261
262     def chroot_call(self, fn, *args):
263
264         cwd_fd = os.open(".", os.O_RDONLY)
265         try:
266             root_fd = os.open("/", os.O_RDONLY)
267             try:
268                 self.__do_chroot()
269                 result = fn(*args)
270             finally:
271                 os.fchdir(root_fd)
272                 os.chroot(".")
273                 os.fchdir(cwd_fd)
274                 os.close(root_fd)
275         finally:
276             os.close(cwd_fd)
277         return result
278
279     def set_disklimit(self, block_limit):
280         # block_limit is in kB
281         if block_limit == 0:
282             try:
283                 vserverimpl.unsetdlimit(self.dir, self.ctx)
284             except OSError, e:
285                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
286             return
287
288         if self.vm_running:
289             block_usage = vserverimpl.DLIMIT_KEEP
290             inode_usage = vserverimpl.DLIMIT_KEEP
291         else:
292             # init_disk_info() must have been called to get usage values
293             block_usage = self.disk_blocks
294             inode_usage = self.disk_inodes
295
296
297         try:
298             vserverimpl.setdlimit(self.dir,
299                                   self.ctx,
300                                   block_usage,
301                                   block_limit,
302                                   inode_usage,
303                                   vserverimpl.DLIMIT_INF,  # inode limit
304                                   2)   # %age reserved for root
305         except OSError, e:
306             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
307
308
309         self.config.update('dlimits/0/space_total', block_limit)
310
311     def is_running(self):
312         return vserverimpl.isrunning(self.ctx)
313     
314     def get_disklimit(self):
315
316         try:
317             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
318              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
319         except OSError, ex:
320             if ex.errno != errno.ESRCH:
321                 raise
322             # get here if no vserver disk limit has been set for xid
323             block_limit = -1
324
325         return block_limit
326
327     def set_sched_config(self, cpu_min, cpu_share):
328
329         """ Write current CPU scheduler parameters to the vserver
330         configuration file. This method does not modify the kernel CPU
331         scheduling parameters for this context. """
332
333         self.config.update('sched/fill-rate', cpu_min)
334         self.config.update('sched/fill-rate2', cpu_share)
335         if cpu_share == 0:
336             self.config.unset('sched/idle-time')
337         
338         if self.is_running():
339             self.set_sched(cpu_min, cpu_share)
340
341     def set_sched(self, cpu_min, cpu_share):
342         """ Update kernel CPU scheduling parameters for this context. """
343         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
344
345     def get_sched(self):
346         # have no way of querying scheduler right now on a per vserver basis
347         return (-1, False)
348
349     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
350                     exempt_min = None, exempt_max = None,
351                     share = None, dev = "eth0"):
352
353         if minrate is None:
354             bwlimit.off(self.ctx, dev)
355         else:
356             bwlimit.on(self.ctx, dev, share,
357                        minrate, maxrate, exempt_min, exempt_max)
358
359     def get_bwlimit(self, dev = "eth0"):
360
361         result = bwlimit.get(self.ctx)
362         # result of bwlimit.get is (ctx, share, minrate, maxrate)
363         if result:
364             result = result[1:]
365         return result
366
367     def open(self, filename, mode = "r", bufsize = -1):
368
369         return self.chroot_call(open, filename, mode, bufsize)
370
371     def __do_chcontext(self, state_file):
372
373         if state_file:
374             print >>state_file, "%u" % self.ctx
375             state_file.close()
376
377         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
378             self.set_resources()
379             vserverimpl.setup_done(self.ctx)
380
381     def __prep(self, runlevel):
382
383         """ Perform all the crap that the vserver script does before
384         actually executing the startup scripts. """
385
386         # remove /var/run and /var/lock/subsys files
387         # but don't remove utmp from the top-level /var/run
388         RUNDIR = "/var/run"
389         LOCKDIR = "/var/lock/subsys"
390         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
391         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
392                           (out + map((dir + "/").__add__, ff(files)),
393                            lambda fs: fs)),
394                          list(os.walk(RUNDIR)),
395                          ([], filter_fn))[0]
396         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
397                                               os.listdir(LOCKDIR)))
398         if False:
399             for f in garbage:
400                 os.unlink(f)
401
402         # set the initial runlevel
403         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
404
405         # mount /proc and /dev/pts
406         self.__do_mount("none", self.dir, "/proc", "proc")
407         # XXX - magic mount options
408         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
409
410     def __do_mount(self, *mount_args):
411
412         try:
413             vserverimpl.mount(*mount_args)
414         except OSError, ex:
415             if ex.errno == errno.EBUSY:
416                 # assume already mounted
417                 return
418             raise ex
419
420     def enter(self):
421         self.config.cache_it()
422         self.__do_chroot()
423         self.__do_chcontext(None)
424
425     def start(self, wait, runlevel = 3):
426         self.vm_running = True
427         self.rlimits_changed = False
428
429         child_pid = os.fork()
430         if child_pid == 0:
431             # child process
432             try:
433                 # get a new session
434                 os.setsid()
435
436                 # open state file to record vserver info
437                 state_file = open("/var/run/vservers/%s" % self.name, "w")
438
439                 # use /dev/null for stdin, /var/log/boot.log for stdout/err
440                 fd = os.open("/dev/null", os.O_RDONLY)
441                 if fd != 0:
442                     os.dup2(fd, 0)
443                     os.close(fd)
444  
445                 # perform pre-init cleanup
446                 self.__prep(runlevel)
447
448                 self.config.cache_it()
449                 self.__do_chroot()
450                 log = open("/var/log/boot.log", "a", 0)
451                 if log.fileno() != 1:
452                     os.dup2(log.fileno(), 1)
453                 os.dup2(1, 2)
454
455                 print >>log, ("%s: starting the virtual server %s" %
456                               (time.asctime(time.gmtime()), self.name))
457                 # execute each init script in turn
458                 # XXX - we don't support all scripts that vserver script does
459                 self.__do_chcontext(state_file)
460                 for cmd in self.INITSCRIPTS:
461                      try:
462                          # enter vserver context
463                          arg_subst = { 'runlevel': runlevel }
464                          cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
465                                                    cmd[1:])
466                          print >>log, "executing '%s'" % " ".join(cmd_args)
467                          os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
468                      except:
469                          print >>log, traceback.format_exc()
470                          os._exit(1)
471
472             # we get here due to an exception in the top-level child process
473             except Exception, ex:
474                 self.log(traceback.format_exc())
475             os._exit(0)
476
477         # parent process
478         return child_pid
479
480     def set_resources(self):
481
482         """ Called when vserver context is entered for first time,
483         should be overridden by subclass. """
484
485         pass
486
487     def init_disk_info(self):
488         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
489         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
490                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
491                              close_fds=True)
492         p.stdin.close()
493         line = p.stdout.readline()
494         if not line:
495             sys.stderr.write(p.stderr.read())
496         p.stdout.close()
497         p.stderr.close()
498         ret = p.wait()
499
500         (space, inodes) = line.split()
501         self.disk_inodes = int(inodes)
502         self.disk_blocks = int(space)
503         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
504
505         return self.disk_blocks * 1024
506
507     def stop(self, signal = signal.SIGKILL):
508         vserverimpl.killall(self.ctx, signal)
509         self.vm_running = False
510         self.rlimits_changed = False
511
512
513
514 def create(vm_name, static = False, ctor = VServer):
515
516     options = ['vuseradd']
517     if static:
518         options += ['--static']
519     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
520     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
521         out = "system command ('%s') " % options
522         if os.WIFEXITED(ret):
523             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
524         else:
525             out += "killed by signal %d" % os.WTERMSIG(ret)
526         raise SystemError, out
527     vm_id = pwd.getpwnam(vm_name)[2]
528
529     return ctor(vm_name, vm_id)