5278806d853cb8e08aeeaaac381672cd43250c41
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 def adjust_lim(goal, curr):
133     gh = goal[0]
134     gs = goal[1]
135     gm = goal[2]
136     soft = curr[0]
137     hard = curr[1]
138     if gm != VC_LIM_KEEP:
139         if gm > soft or gm == resource.RLIM_INFINITY:
140             soft = gm
141         if gm > hard or gm == resource.RLIM_INFINITY:
142             hard = gm
143     if gs != VC_LIM_KEEP:
144         if gs > soft or gs == resource.RLIM_INFINITY:
145             soft = gs
146     if gh != VC_LIM_KEEP:
147         if gh > hard or gh == resource.RLIM_INFINITY:
148             hard = gh
149     return (soft, hard)
150
151
152 class VServer:
153
154     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
155                    ('/etc/rc.d/rc', '%(runlevel)d')]
156
157     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
158
159         self.name = name
160         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
161         if not (os.path.isdir(self.dir) and
162                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
163             raise NoSuchVServer, "no such vserver: " + name
164         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
165         self.remove_caps = ~vserverimpl.CAP_SAFE;
166         if vm_id == None:
167             vm_id = int(self.config.get('context'))
168         self.ctx = vm_id
169         if vm_running == None:
170             vm_running = self.is_running()
171         self.vm_running = vm_running
172         self.logfile = logfile
173
174     # inspired from nodemanager's logger
175     def log(self,msg):
176         if self.logfile:
177             try:
178                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
179                 if not msg.endswith('\n'): msg += '\n'
180                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
181                 os.close(fd)
182             except:
183                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
184
185     def set_rlimit(self, type, hard, soft, min):
186         """Generic set resource limit function for vserver"""
187         global RLIMITS
188         update = False
189
190         if hard <> VC_LIM_KEEP:
191             self.config.update('rlimits/%s.hard' % type.lower(), hard)
192             update = True
193         if soft <> VC_LIM_KEEP:
194             self.config.update('rlimits/%s.soft' % type.lower(), soft)
195             update = True
196         if min <> VC_LIM_KEEP:
197             self.config.update('rlimits/%s.min' % type.lower(), min)
198             update = True
199
200         if self.is_running() and update:
201             resource_type = RLIMITS[type]
202             try:
203                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
204                 if hasattr(resource, 'RLIMIT_' + type):
205                     lim = resource.getrlimit(resource_type)
206                     lim = adjust_lim((hard, soft, min), lim)
207                     resource.setrlimit(resource_type, lim)
208             except OSError, e:
209                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
210                          % (self.ctx, type.lower(), hard, soft, min))
211
212         return update
213
214     def get_prefix_from_capabilities(self, capabilities, prefix):
215         split_caps = capabilities.split(',')
216         return ",".join(["%s" % (c) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())])
217
218     def get_bcaps_from_capabilities(self, capabilities):
219         return self.get_prefix_from_capabilities(capabilities, "cap_")
220
221     def get_ccaps_from_capabilities(self, capabilities):
222         return self.get_prefix_from_capabilities(capabilities, "vxc_")
223
224     def set_capabilities_config(self, capabilities):
225         bcaps = self.get_bcaps_from_capabilities(capabilities)
226         ccaps = self.get_ccaps_from_capabilities(capabilities)
227         self.config.update('bcapabilities', bcaps)
228         self.config.update('ccapabilities', ccaps)
229         ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
230         if ret > 0:
231             return ret
232         return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps))
233
234     def get_capabilities(self):
235         bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
236         ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx))
237         if bcaps and ccaps:
238             ccaps = "," + ccaps
239         return (bcaps + ccaps)
240  
241     def get_capabilities_config(self):
242         bcaps = self.config.get('bcapabilities', '')
243         ccaps = self.config.get('ccapabilities', '')
244         if bcaps and ccaps:
245             ccaps = "," + ccaps
246         return (bcaps + ccaps)
247
248     def set_ipaddresses(self, addresses):
249         vserverimpl.netremove(self.ctx, "all")
250         for a in addresses.split(","):
251             vserverimpl.netadd(self.ctx, a)
252
253     def set_ipaddresses_config(self, addresses):
254         i = 0
255         for a in addresses.split(","):
256             self.config.update("interfaces/%d/ip" % i, a)
257             i += 1
258         while self.config.unset("interfaces/%d/ip" % i):
259             i += 1
260         self.set_ipaddresses(addresses)
261
262     def get_ipaddresses_config(self):
263         i = 0
264         ret = []
265         while True:
266             r = self.config.get("interfaces/%d/ip" % i, '')
267             if r == '':
268                 break
269             ret += [r]
270             i += 1
271         return ",".join(ret)
272
273     def get_ipaddresses(self):
274         # No clean way to do this right now.
275         return None
276
277     def __do_chroot(self):
278         os.chroot(self.dir)
279         os.chdir("/")
280
281     def chroot_call(self, fn, *args):
282
283         cwd_fd = os.open(".", os.O_RDONLY)
284         try:
285             root_fd = os.open("/", os.O_RDONLY)
286             try:
287                 self.__do_chroot()
288                 result = fn(*args)
289             finally:
290                 os.fchdir(root_fd)
291                 os.chroot(".")
292                 os.fchdir(cwd_fd)
293                 os.close(root_fd)
294         finally:
295             os.close(cwd_fd)
296         return result
297
298     def set_disklimit(self, block_limit):
299         # block_limit is in kB
300         if block_limit == 0:
301             try:
302                 vserverimpl.unsetdlimit(self.dir, self.ctx)
303             except OSError, e:
304                 self.log("Unexpected error with unsetdlimit for context %d: %r" % (self.ctx,e))
305             return
306
307         if self.vm_running:
308             block_usage = vserverimpl.DLIMIT_KEEP
309             inode_usage = vserverimpl.DLIMIT_KEEP
310         else:
311             # init_disk_info() must have been called to get usage values
312             block_usage = self.disk_blocks
313             inode_usage = self.disk_inodes
314
315         try:
316             vserverimpl.setdlimit(self.dir,
317                                   self.ctx,
318                                   block_usage,
319                                   block_limit,
320                                   inode_usage,
321                                   vserverimpl.DLIMIT_INF,  # inode limit
322                                   2)   # %age reserved for root
323         except OSError, e:
324             self.log("Unexpected error with setdlimit for context %d: %r" % (self.ctx, e))
325
326
327         self.config.update('dlimits/0/space_total', block_limit)
328
329     def is_running(self):
330         return vserverimpl.isrunning(self.ctx)
331     
332     def get_disklimit(self):
333
334         try:
335             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
336              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
337         except OSError, ex:
338             if ex.errno != errno.ESRCH:
339                 raise
340             # get here if no vserver disk limit has been set for xid
341             block_limit = -1
342
343         return block_limit
344
345     def set_sched_config(self, cpu_min, cpu_share):
346
347         """ Write current CPU scheduler parameters to the vserver
348         configuration file. This method does not modify the kernel CPU
349         scheduling parameters for this context. """
350
351         self.config.update('sched/fill-rate', cpu_min)
352         self.config.update('sched/fill-rate2', cpu_share)
353         if cpu_share == 0:
354             self.config.unset('sched/idle-time')
355         
356         if self.is_running():
357             self.set_sched(cpu_min, cpu_share)
358
359     def set_sched(self, cpu_min, cpu_share):
360         """ Update kernel CPU scheduling parameters for this context. """
361         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
362
363     def get_sched(self):
364         # have no way of querying scheduler right now on a per vserver basis
365         return (-1, False)
366
367     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
368                     exempt_min = None, exempt_max = None,
369                     share = None, dev = "eth0"):
370
371         if minrate is None:
372             bwlimit.off(self.ctx, dev)
373         else:
374             bwlimit.on(self.ctx, dev, share,
375                        minrate, maxrate, exempt_min, exempt_max)
376
377     def get_bwlimit(self, dev = "eth0"):
378
379         result = bwlimit.get(self.ctx)
380         # result of bwlimit.get is (ctx, share, minrate, maxrate)
381         if result:
382             result = result[1:]
383         return result
384
385     def open(self, filename, mode = "r", bufsize = -1):
386
387         return self.chroot_call(open, filename, mode, bufsize)
388
389     def __do_chcontext(self, state_file):
390
391         if state_file:
392             print >>state_file, "%u" % self.ctx
393             state_file.close()
394
395         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
396             self.set_resources(True)
397             vserverimpl.setup_done(self.ctx)
398
399
400     def __prep(self, runlevel):
401
402         """ Perform all the crap that the vserver script does before
403         actually executing the startup scripts. """
404
405
406         # set the initial runlevel
407         vserverimpl.setrunlevel(self.dir + "/var/run/utmp", runlevel)
408
409         # mount /proc and /dev/pts
410         self.__do_mount("none", self.dir, "/proc", "proc")
411         # XXX - magic mount options
412         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
413
414
415     def __cleanvar(self):
416         """
417         Clean the /var/ directory so RH startup scripts can run
418         """ 
419
420         RUNDIR = "/var/run"
421         LOCKDIR = "/var/lock/subsys"
422
423         filter = ["utmp"]
424         garbage = []
425         for topdir in [RUNDIR, LOCKDIR]:
426             #os.walk() = (dirpath, dirnames, filenames)
427             for root, dirs, files in os.walk(topdir):
428                 for file in files:
429                     if not file in filter:
430                         garbage.append(root + "/" + file)
431
432         for f in garbage: os.unlink(f)
433         return garbage
434
435
436     def __do_mount(self, *mount_args):
437         try:
438             vserverimpl.mount(*mount_args)
439         except OSError, ex:
440             if ex.errno == errno.EBUSY:
441                 # assume already mounted
442                 return
443             raise ex
444
445
446     def enter(self):
447         self.config.cache_it()
448         self.__do_chroot()
449         self.__do_chcontext(None)
450
451
452     def start(self, runlevel = 3):
453
454         if (os.fork() != 0):
455             # Parent should just return.
456             self.vm_running = True
457             return
458         else:
459             # child process
460             try:
461                 # so we don't chcontext with priv'ed fds
462                 close_nonstandard_fds()
463
464                 # get a new session
465                 os.setsid()
466
467                 # open state file to record vserver info
468                 state_file = open("/var/run/vservers/%s" % self.name, "w")
469
470                 # use /dev/null for stdin, /var/log/nm for stdout/err
471                 fd = os.open("/dev/null", os.O_RDONLY)
472                 if fd != 0:
473                     os.dup2(fd, 0)
474                     os.close(fd)
475  
476                 # perform pre-init cleanup
477                 self.__prep(runlevel)
478
479                 self.config.cache_it()
480                 self.__do_chroot()
481                 removed = self.__cleanvar()
482
483                 log = open("/var/log/nm", "a", 0)
484                 if log.fileno() != 1:
485                     os.dup2(log.fileno(), 1)
486                 os.dup2(1, 2)
487
488                 print >>log, ("%s: removing %s" % 
489                                 (time.asctime(time.gmtime()), removed))
490                 print >>log, ("%s: starting the virtual server %s" %
491                                 (time.asctime(time.gmtime()), self.name))
492                 # execute each init script in turn
493                 # XXX - we don't support all scripts that vserver script does
494                 self.__do_chcontext(state_file)
495                 for cmd in self.INITSCRIPTS:
496                     try:
497                         # enter vserver context
498                         arg_subst = { 'runlevel': runlevel }
499                         cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
500                                                    cmd[1:])
501                         if os.path.isfile(cmd[0]):                         
502                             print >>log, "executing '%s'" % " ".join(cmd_args)
503                             os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
504                         else:
505                             print >>log, "WARNING: could not run %s"%cmd[0]
506                     except:
507                         print >>log, traceback.format_exc()
508
509             # we get here due to an exception in the top-level child process
510             except Exception, ex:
511                 self.log(traceback.format_exc())
512             os._exit(0)
513
514     def set_resources(self,setup=False):
515
516         """ Called when vserver context is entered for first time,
517         should be overridden by subclass. """
518
519         pass
520
521     def init_disk_info(self):
522         try:
523             dlimit = vserverimpl.getdlimit(self.dir, self.ctx)
524             self.disk_blocks = dlimit[0]
525             self.disk_inodes = dlimit[2]
526             return self.disk_blocks * 1024
527         except Exception, e:
528             pass
529         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
530         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
531                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
532                              close_fds=True)
533         p.stdin.close()
534         line = p.stdout.readline()
535         if not line:
536             sys.stderr.write(p.stderr.read())
537         p.stdout.close()
538         p.stderr.close()
539         ret = p.wait()
540
541         (space, inodes) = line.split()
542         self.disk_inodes = int(inodes)
543         self.disk_blocks = int(space)
544         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
545
546         return self.disk_blocks * 1024
547
548     def stop(self, signal = signal.SIGKILL):
549         vserverimpl.killall(self.ctx, signal)
550         self.vm_running = False
551
552     def setname(self, slice_id):
553         '''Set vcVHI_CONTEXT field in kernel to slice_id'''
554         vserverimpl.setname(self.ctx, slice_id)
555
556     def getname(self):
557         '''Get vcVHI_CONTEXT field in kernel'''
558         return vserverimpl.getname(self.ctx)
559
560
561 def create(vm_name, static = False, ctor = VServer):
562
563     options = ['vuseradd']
564     if static:
565         options += ['--static']
566     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
567     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
568         out = "system command ('%s') " % options
569         if os.WIFEXITED(ret):
570             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
571         else:
572             out += "killed by signal %d" % os.WTERMSIG(ret)
573         raise SystemError, out
574     vm_id = pwd.getpwnam(vm_name)[2]
575
576     return ctor(vm_name, vm_id)
577
578
579 def close_nonstandard_fds():
580     """Close all open file descriptors other than 0, 1, and 2."""
581     _SC_OPEN_MAX = 4
582     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
583         try: os.close(fd)
584         except OSError: pass  # most likely an fd that isn't open
585