Rewriting util-vserver-pl to wrap util-vserver, as proof-of-concept
[util-vserver-pl.git] / python / vserver.py
1 # Copyright 2005 Princeton University
2
3 #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
4
5 import errno
6 import fcntl
7 import os
8 import re
9 import pwd
10 import signal
11 import sys
12 import time
13 import traceback
14 import subprocess
15 import resource
16
17 import vserverimpl
18 import cpulimit, bwlimit
19
20 from vserverimpl import DLIMIT_INF
21 from vserverimpl import VC_LIM_KEEP
22 from vserverimpl import VLIMIT_NSOCK
23 from vserverimpl import VLIMIT_OPENFD
24 from vserverimpl import VLIMIT_ANON
25 from vserverimpl import VLIMIT_SHMEM
26
27 #
28 # these are the flags taken from the kernel linux/vserver/legacy.h
29 #
30 FLAGS_LOCK = 1
31 FLAGS_SCHED = 2  # XXX - defined in util-vserver/src/chcontext.c
32 FLAGS_NPROC = 4
33 FLAGS_PRIVATE = 8
34 FLAGS_INIT = 16
35 FLAGS_HIDEINFO = 32
36 FLAGS_ULIMIT = 64
37 FLAGS_NAMESPACE = 128
38
39 RLIMITS = { "NSOCK": VLIMIT_NSOCK,
40             "OPENFD": VLIMIT_OPENFD,
41             "ANON": VLIMIT_ANON,
42             "SHMEM": VLIMIT_SHMEM}
43
44 # add in the platform supported rlimits
45 for entry in resource.__dict__.keys():
46     if entry.find("RLIMIT_")==0:
47         k = entry[len("RLIMIT_"):]
48         if not RLIMITS.has_key(k):
49             RLIMITS[k]=resource.__dict__[entry]
50         else:
51             print "WARNING: duplicate RLIMITS key %s" % k
52
53 class NoSuchVServer(Exception): pass
54
55
56 class VServerConfig:
57     def __init__(self, name, directory):
58         self.name = name
59         self.dir = directory
60         self.cache = None
61         if not (os.path.isdir(self.dir) and
62                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
63             raise NoSuchVServer, "%s does not exist" % self.dir
64
65     def get(self, option, default = None):
66         try:
67             if self.cache:
68                 return self.cache[option]
69             else:
70                 f = open(os.path.join(self.dir, option), "r")
71                 buf = f.read().rstrip()
72                 f.close()
73                 return buf
74         except:
75             if default is not None:
76                 return default
77             else:
78                 raise KeyError, "Key %s is not set for %s" % (option, self.name)
79
80     def update(self, option, value):
81         if self.cache:
82             return
83
84         try:
85             old_umask = os.umask(0022)
86             filename = os.path.join(self.dir, option)
87             try:
88                 os.makedirs(os.path.dirname(filename), 0755)
89             except:
90                 pass
91             f = open(filename, 'w')
92             if isinstance(value, list):
93                 f.write("%s\n" % "\n".join(value))
94             else:
95                 f.write("%s\n" % value)
96             f.close()
97             os.umask(old_umask)
98         except:
99             raise
100
101     def unset(self, option):
102         if self.cache:
103             return
104
105         try:
106             filename = os.path.join(self.dir, option)
107             os.unlink(filename)
108             try:
109                 os.removedirs(os.path.dirname(filename))
110             except:
111                 pass
112             return True
113         except:
114             return False
115
116     def cache_it(self):
117         self.cache = {}
118         def add_to_cache(cache, dirname, fnames):
119             for file in fnames:
120                 full_name = os.path.join(dirname, file)
121                 if os.path.islink(full_name):
122                     fnames.remove(file)
123                 elif (os.path.isfile(full_name) and
124                       os.access(full_name, os.R_OK)):
125                     f = open(full_name, "r")
126                     cache[full_name.replace(os.path.join(self.dir, ''),
127                                             '')] = f.read().rstrip()
128                     f.close()
129         os.path.walk(self.dir, add_to_cache, self.cache)
130
131
132 class VServer:
133
134     INITSCRIPTS = [('/etc/rc.vinit', 'start'),
135                    ('/etc/rc.d/rc', '%(runlevel)d')]
136
137     def __init__(self, name, vm_id = None, vm_running = None, logfile=None):
138
139         self.name = name
140         self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
141         if not (os.path.isdir(self.dir) and
142                 os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
143             raise NoSuchVServer, "no such vserver: " + name
144         self.config = VServerConfig(name, "/etc/vservers/%s" % name)
145         self.remove_caps = ~vserverimpl.CAP_SAFE;
146         if vm_id == None:
147             vm_id = int(self.config.get('context'))
148         self.ctx = vm_id
149         if vm_running == None:
150             vm_running = self.is_running()
151         self.vm_running = vm_running
152         self.logfile = logfile
153
154     # inspired from nodemanager's logger
155     def log(self,msg):
156         if self.logfile:
157             try:
158                 fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
159                 if not msg.endswith('\n'): msg += '\n'
160                 os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
161                 os.close(fd)
162             except:
163                 print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg)
164
165     def set_rlimit(self, type, hard, soft, min):
166         """Generic set resource limit function for vserver"""
167         global RLIMITS
168         update = False
169
170         if hard <> VC_LIM_KEEP:
171             self.config.update('rlimits/%s.hard' % type.lower(), hard)
172             update = True
173         if soft <> VC_LIM_KEEP:
174             self.config.update('rlimits/%s.soft' % type.lower(), soft)
175             update = True
176         if min <> VC_LIM_KEEP:
177             self.config.update('rlimits/%s.min' % type.lower(), min)
178             update = True
179
180         if self.is_running() and update:
181             resource_type = RLIMITS[type]
182             try:
183                 vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
184             except OSError, e:
185                 self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
186                          % (self.ctx, type.lower(), hard, soft, min))
187
188         return update
189
190     def get_prefix_from_capabilities(self, capabilities, prefix):
191         split_caps = capabilities.split(',')
192         return ",".join(["%s" % (c) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())])
193
194     def get_bcaps_from_capabilities(self, capabilities):
195         return self.get_prefix_from_capabilities(capabilities, "cap_")
196
197     def get_ccaps_from_capabilities(self, capabilities):
198         return self.get_prefix_from_capabilities(capabilities, "vxc_")
199
200     def set_capabilities_config(self, capabilities):
201         bcaps = self.get_bcaps_from_capabilities(capabilities)
202         ccaps = self.get_ccaps_from_capabilities(capabilities)
203         if len(bcaps) > 0:
204             bcaps += ","
205         bcaps += "CAP_NET_RAW"
206         self.config.update('bcapabilities', bcaps)
207         self.config.update('ccapabilities', ccaps)
208         ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
209         if ret > 0:
210             return ret
211         return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps))
212
213     def get_capabilities(self):
214         bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
215         ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx))
216         if bcaps and ccaps:
217             ccaps = "," + ccaps
218         return (bcaps + ccaps)
219  
220     def get_capabilities_config(self):
221         bcaps = self.config.get('bcapabilities', '')
222         ccaps = self.config.get('ccapabilities', '')
223         if bcaps and ccaps:
224             ccaps = "," + ccaps
225         return (bcaps + ccaps)
226
227     def set_ipaddresses(self, addresses):
228         vserverimpl.netremove(self.ctx, "all")
229         for a in addresses.split(","):
230             vserverimpl.netadd(self.ctx, a)
231
232     def set_ipaddresses_config(self, addresses):
233         return
234         i = 0
235         for a in addresses.split(","):
236             self.config.update("interfaces/%d/ip" % i, a)
237             i += 1
238         while self.config.unset("interfaces/%d/ip" % i):
239             i += 1
240         self.set_ipaddresses(addresses)
241
242     def get_ipaddresses_config(self):
243         i = 0
244         ret = []
245         while True:
246             r = self.config.get("interfaces/%d/ip" % i, '')
247             if r == '':
248                 break
249             ret += [r]
250             i += 1
251         return ",".join(ret)
252
253     def get_ipaddresses(self):
254         # No clean way to do this right now.
255         return None
256
257     def get_unshare_netns_config(self):
258         try:
259             unshare_netns = int(self.config.get('spaces/net'))
260         except:
261             unshare_netns = 0;
262         return unshare_netns;
263
264     def __do_chroot(self):
265         os.chroot(self.dir)
266         os.chdir("/")
267
268     def chroot_call(self, fn, *args):
269
270         cwd_fd = os.open(".", os.O_RDONLY)
271         try:
272             root_fd = os.open("/", os.O_RDONLY)
273             try:
274                 self.__do_chroot()
275                 result = fn(*args)
276             finally:
277                 os.fchdir(root_fd)
278                 os.chroot(".")
279                 os.fchdir(cwd_fd)
280                 os.close(root_fd)
281         finally:
282             os.close(cwd_fd)
283         return result
284
285     def set_disklimit(self, block_limit):
286         # block_limit is in kB
287         if block_limit == 0:
288             try:
289                 vserverimpl.unsetdlimit(self.dir, self.ctx)
290             except OSError, e:
291                 self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
292             return
293
294         if self.vm_running:
295             block_usage = vserverimpl.DLIMIT_KEEP
296             inode_usage = vserverimpl.DLIMIT_KEEP
297         else:
298             # init_disk_info() must have been called to get usage values
299             block_usage = self.disk_blocks
300             inode_usage = self.disk_inodes
301
302         try:
303             vserverimpl.setdlimit(self.dir,
304                                   self.ctx,
305                                   block_usage,
306                                   block_limit,
307                                   inode_usage,
308                                   vserverimpl.DLIMIT_INF,  # inode limit
309                                   2)   # %age reserved for root
310         except OSError, e:
311             self.log("Unexpected error with setdlimit for context %d" % self.ctx)
312
313
314         self.config.update('dlimits/0/space_total', block_limit)
315
316     def is_running(self):
317         return vserverimpl.isrunning(self.ctx)
318     
319     def get_disklimit(self):
320
321         try:
322             (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
323              reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
324         except OSError, ex:
325             if ex.errno != errno.ESRCH:
326                 raise
327             # get here if no vserver disk limit has been set for xid
328             block_limit = -1
329
330         return block_limit
331
332     def set_sched_config(self, cpu_min, cpu_share):
333
334         """ Write current CPU scheduler parameters to the vserver
335         configuration file. This method does not modify the kernel CPU
336         scheduling parameters for this context. """
337
338         self.config.update('sched/fill-rate', cpu_min)
339         self.config.update('sched/fill-rate2', cpu_share)
340         if cpu_share == 0:
341             self.config.unset('sched/idle-time')
342         
343         if self.is_running():
344             self.set_sched(cpu_min, cpu_share)
345
346     def set_sched(self, cpu_min, cpu_share):
347         """ Update kernel CPU scheduling parameters for this context. """
348         vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
349
350     def get_sched(self):
351         # have no way of querying scheduler right now on a per vserver basis
352         return (-1, False)
353
354     def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
355                     exempt_min = None, exempt_max = None,
356                     share = None, dev = "eth0"):
357
358         if minrate is None:
359             bwlimit.off(self.ctx, dev)
360         else:
361             bwlimit.on(self.ctx, dev, share,
362                        minrate, maxrate, exempt_min, exempt_max)
363
364     def get_bwlimit(self, dev = "eth0"):
365
366         result = bwlimit.get(self.ctx)
367         # result of bwlimit.get is (ctx, share, minrate, maxrate)
368         if result:
369             result = result[1:]
370         return result
371
372     def open(self, filename, mode = "r", bufsize = -1):
373
374         return self.chroot_call(open, filename, mode, bufsize)
375
376     def __do_chcontext(self, state_file):
377
378         if state_file:
379             print >>state_file, "%u" % self.ctx
380             state_file.close()
381
382         if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config()), self.get_unshare_netns_config()):
383             self.set_resources()
384             vserverimpl.setup_done(self.ctx)
385
386     def __prep(self, runlevel):
387
388         """ Perform all the crap that the vserver script does before
389         actually executing the startup scripts. """
390
391         # remove /var/run and /var/lock/subsys files
392         # but don't remove utmp from the top-level /var/run
393         RUNDIR = "/var/run"
394         LOCKDIR = "/var/lock/subsys"
395         filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
396         garbage = reduce((lambda (out, ff), (dir, subdirs, files):
397                           (out + map((dir + "/").__add__, ff(files)),
398                            lambda fs: fs)),
399                          list(os.walk(RUNDIR)),
400                          ([], filter_fn))[0]
401         garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
402                                               os.listdir(LOCKDIR)))
403         if False:
404             for f in garbage:
405                 os.unlink(f)
406
407         # set the initial runlevel
408         vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel)
409
410         # mount /proc and /dev/pts
411         self.__do_mount("none", self.dir, "/proc", "proc")
412         # XXX - magic mount options
413         self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
414
415     def __do_mount(self, *mount_args):
416
417         try:
418             vserverimpl.mount(*mount_args)
419         except OSError, ex:
420             if ex.errno == errno.EBUSY:
421                 # assume already mounted
422                 return
423             raise ex
424
425     def enter(self):
426         subprocess.call("/usr/sbin/vserver %s enter" % self.name, shell=True)
427
428     def start(self, runlevel = 3):
429         if (os.fork() != 0):
430             # Parent should just return.
431             self.vm_running = True
432             return
433         else:
434             # child process
435             try:
436                 subprocess.call("/usr/sbin/vserver %s start" % self.name, 
437                                 shell=True)
438             # we get here due to an exception in the top-level child process
439             except Exception, ex:
440                 self.log(traceback.format_exc())
441             os._exit(0)
442
443     def set_resources(self):
444
445         """ Called when vserver context is entered for first time,
446         should be overridden by subclass. """
447
448         pass
449
450     def init_disk_info(self):
451         try:
452             dlimit = vserverimpl.getdlimit(self.dir, self.ctx)
453             self.disk_blocks = dlimit[0]
454             self.disk_inodes = dlimit[2]
455             return self.disk_blocks * 1024
456         except Exception, e:
457             pass
458         cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
459         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
460                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
461                              close_fds=True)
462         p.stdin.close()
463         line = p.stdout.readline()
464         if not line:
465             sys.stderr.write(p.stderr.read())
466         p.stdout.close()
467         p.stderr.close()
468         ret = p.wait()
469
470         (space, inodes) = line.split()
471         self.disk_inodes = int(inodes)
472         self.disk_blocks = int(space)
473         #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
474
475         return self.disk_blocks * 1024
476
477     def stop(self, signal = signal.SIGKILL):
478         self.vm_running = False
479         subprocess.call("/usr/sbin/vserver %s stop" % self.name, shell=True)
480
481     def setname(self, slice_id):
482         '''Set vcVHI_CONTEXT field in kernel to slice_id'''
483         vserverimpl.setname(self.ctx, slice_id)
484
485     def getname(self):
486         '''Get vcVHI_CONTEXT field in kernel'''
487         return vserverimpl.getname(self.ctx)
488
489
490 def create(vm_name, static = False, ctor = VServer):
491
492     options = ['vuseradd']
493     if static:
494         options += ['--static']
495     ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
496     if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
497         out = "system command ('%s') " % options
498         if os.WIFEXITED(ret):
499             out += "failed, rc = %d" % os.WEXITSTATUS(ret)
500         else:
501             out += "killed by signal %d" % os.WTERMSIG(ret)
502         raise SystemError, out
503     vm_id = pwd.getpwnam(vm_name)[2]
504
505     return ctor(vm_name, vm_id)
506
507
508 def close_nonstandard_fds():
509     """Close all open file descriptors other than 0, 1, and 2."""
510     _SC_OPEN_MAX = 4
511     for fd in range(3, os.sysconf(_SC_OPEN_MAX)):
512         try: os.close(fd)
513         except OSError: pass  # most likely an fd that isn't open
514