--- /dev/null
+## $Id$
+
+# Copyright (C) 2003,2004,2005,2006 Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+
+python_vserverimpl_la_SOURCES = python/vserverimpl.c src/planetlab.c
+python_vserverimpl_la_LIBADD = -lvserver
+python_vserverimpl_la_CFLAGS = -Wno-redundant-decls -I$(top_srcdir)/src $(PYTHON_INCLUDES)
+python_vserverimpl_la_LDFLAGS = -module -avoid-version
+
+src_vip6_autod_SOURCES = src/vip6-autod.c
+src_vip6_autod_LDADD = -lvserver -lnl
+
+src_vsh_SOURCES = src/vsh.c src/planetlab.c
+src_vsh_LDADD = -lvserver
+
+noinst_HEADERS = src/planetlab.h
+
+man_MANS = man/vsh.8
+
+pyexec_LTLIBRARIES = python/vserverimpl.la
+
+pyexec_DATA = python/vserver.py \
+ python/bwlimit.py \
+ python/cpulimit.py
+
+sbin_SCRIPTS = python/bwlimit \
+ python/disklimit \
+ scripts/vcached \
+ scripts/vuseradd \
+ scripts/vuserdel
+
+sbin_PROGRAMS = src/vip6-autod \
+ src/vsh
+
+sysv_SCRIPTS = sysv/vip6-autod
+
+crondir = $(sysconfdir)/cron.d
+cron_DATA = scripts/vcached.cron
+
+logrotatedir = $(sysconfdir)/logrotate.d
+logrotate_DATA = scripts/vcached.logrotate
+
+install-data-hook: install-fix-script-paths
+
+fix_SCRPTS = $(addprefix $(sbindir)/, $(notdir $(sbin_SCRIPTS))) \
+ $(addprefix $(sysvdir)/, $(notdir $(sysv_SCRIPTS)))
+
+install-fix-script-paths:
+ test "$(UV_PKGLIBDIR)" = "/usr/lib/util-vserver" || \
+ for i in $(fix_SCRPTS); do \
+ f="$(DESTDIR)$$i"; \
+ $(SED) -e 's!/usr/lib/util-vserver!$(UV_PKGLIBDIR)!g' "$$f" > "$$f.tmp"; \
+ cmp -s "$$f.tmp" "$$f" || cat "$$f.tmp" > "$$f"; \
+ rm -f "$$f.tmp"; \
+ done
--- /dev/null
+dnl $Id: configure.ac 2604 2007-09-02 20:03:17Z dhozac $
+
+dnl Copyright (C) 2002,2003,2004 Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de>
+dnl
+dnl This program is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2, or (at your option)
+dnl any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+dnl
+dnl
+dnl As a special exception to the GNU General Public License, if you
+dnl distribute this file as part of a program that contains a configuration
+dnl script generated by Autoconf, you may include it under the same
+dnl distribution terms that you use for the rest of that program.
+dnl
+
+AC_PREREQ(2.57)
+AC_INIT(util-vserver-pl, 0.1, support@planet-lab.org)
+AC_CONFIG_SRCDIR([python/vserverimpl.c])
+AC_CONFIG_HEADER([config.h])
+
+AM_INIT_AUTOMAKE([1.8.3 gnu dist-bzip2 subdir-objects])
+AM_MAINTAINER_MODE
+
+AC_CANONICAL_BUILD
+AC_CANONICAL_HOST
+
+# Checks for programs.
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_LN_S
+AM_PROG_CC_C_O
+AC_DISABLE_STATIC
+AC_PROG_LIBTOOL
+AM_PATH_PYTHON(2.3)
+
+AC_ARG_VAR(CC, [The C compiler])
+
+
+AC_CHECK_HEADERS([asm/types.h stdint.h], [ : ],
+ [ ensc_have_vserver=no ])
+AC_CHECK_TYPES([xid_t], [ : ], [AC_DEFINE_UNQUOTED([xid_t], [uint32_t],
+ [Define this to an unsigned integer type])],
+ [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif])
+AC_CHECK_TYPES([nid_t], [ : ], [AC_DEFINE_UNQUOTED([nid_t], [uint32_t],
+ [Define this to an unsigned integer type])],
+ [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif])
+AC_CHECK_TYPES([tag_t], [ : ], [AC_DEFINE_UNQUOTED([tag_t], [uint32_t],
+ [Define this to an unsigned integer type])],
+ [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif])
+
+AC_CHECK_HEADER([vserver.h], [ : ],
+ [ AC_MSG_ERROR([No vserver.h found!]) ])
+
+AC_CHECK_LIB(vserver, vc_ctx_create, [ : ],
+ [ AC_MSG_ERROR([No libvserver found!]) ])
+
+dnl {check for libnl
+dnl
+
+ensc_have_libnl=yes
+LIBNL_ROOT=`readlink -f ../libnl*/`
+if test -d "$LIBNL_ROOT"; then
+ CFLAGS="$CFLAGS -I ${LIBNL_ROOT}/include"
+ LDFLAGS="$LDFLAGS -L${LIBNL_ROOT}/lib"
+fi
+
+if test x"$ensc_have_libnl" = xyes; then
+ AC_CHECK_HEADERS([asm/types.h stdint.h], [ : ],
+ [ ensc_have_libnl=no ])
+ AC_CHECK_TYPES([__s64], [ : ], [AC_DEFINE_UNQUOTED([__s64], [int64_t],
+ [Define this to a signed 64-bit integer type])],
+ [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif])
+ AC_CHECK_TYPES([__u64], [ : ], [AC_DEFINE_UNQUOTED([__u64], [uint64_t],
+ [Define this to an unsigned 64-bit integer type])],
+ [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif])
+fi
+
+if test x"$ensc_have_libnl" = xyes; then
+ AC_CHECK_HEADERS([netlink/netlink.h netlink/route/addr.h], [ : ],
+ [ ensc_have_libnl=no ], [AC_INCLUDES_DEFAULT()
+#ifdef HAVE_ASM_TYPES_H
+# include <asm/types.h>
+#endif]
+ )
+fi
+
+if test x"$ensc_have_libnl" = xyes; then
+ AC_CHECK_LIB(nl, nlmsg_get_src, [ : ],
+ [ ensc_have_libnl=no ])
+fi
+
+if test x"$ensc_have_libnl" != xyes; then
+ AC_MSG_WARN([
+****
+**** 'libnl' could not be found;
+**** this will disable the build of 'vip6-autod'
+****])
+fi
+
+AM_CONDITIONAL(ENSC_HAVE_LIBNL, test x"$ensc_have_libnl" = xyes)
+
+dnl
+dnl libnl stuff ends here}
+dnl
+dnl #######################
+
+dnl
+dnl Get python includes
+dnl
+
+AC_MSG_CHECKING([for python includes])
+PYTHON_INCLUDES=`python-config --includes 2>/dev/null`
+if test x"$PYTHON_INCLUDES" = x; then
+ python_inc_plat=`$PYTHON -c "from distutils.sysconfig import get_python_inc; print get_python_inc(1)"`
+ python_inc=`$PYTHON -c "from distutils.sysconfig import get_python_inc; print get_python_inc()"`
+ if test x"$python_inc_plat" != x; then
+ PYTHON_INCLUDES="$PYTHON_INCLUDES -I$python_inc_plat"
+ fi
+ if test x"$python_inc" != x; then
+ PYTHON_INCLUDES="$PYTHON_INCLUDES -I$python_inc"
+ fi
+fi
+AC_MSG_RESULT([$PYTHON_INCLUDES])
+AC_SUBST(PYTHON_INCLUDES)
+
+
+dnl
+dnl Figure out util-vserver directories
+dnl
+
+old_PATH="$PATH"
+PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin"
+
+AC_MSG_CHECKING([for vserver-Rootdir])
+DEFAULT_VSERVERDIR=`vserver-info 2>/dev/null | awk '$1 == "vserver-Rootdir:" { print $2 }'`
+if test x"$DEFAULT_VSERVERDIR" = x; then
+ AC_MSG_ERROR([No vserver-Rootdir could be found!])
+fi
+AC_MSG_RESULT([$DEFAULT_VSERVERDIR])
+AC_DEFINE_UNQUOTED([DEFAULT_VSERVERDIR], ["$DEFAULT_VSERVERDIR"],
+ [Define this to the path where your guests live])
+
+AC_MSG_CHECKING([for util-vserver-vars])
+UV_PREFIX=`vserver-info 2>/dev/null | awk '$1 == "prefix:" { print $2 }'`
+UV_PKGLIBDIR=`echo "$UV_PREFIX"/lib*/util-vserver`
+if test x"$UV_PREFIX" = x -o ! -d "$UV_PKGLIBDIR" -o ! -r "$UV_PKGLIBDIR/util-vserver-vars"; then
+ AC_MSG_ERROR([No util-vserver-vars could be found in $UV_PKGLIBDIR])
+fi
+AC_MSG_RESULT([$UV_PKGLIBDIR])
+AC_SUBST(UV_PKGLIBDIR)
+
+
+PATH="$old_PATH"
+
+
+AC_DEFINE_UNQUOTED([LOCALSTATEDIR], ["$localstatedir"],
+ [Define this to the local state directory])
+AC_ARG_WITH([initrddir], [AC_HELP_STRING([--with-initrddir <DIR>],
+ [use <DIR> as directory for SysV init-files (default: $sysconfdir/init.d)])],
+ [case "$withval" in
+ yes|no) AC_MSG_ERROR(['$withval' is not a valid value for '--with-initrddir']);;
+ *) sysvdir="$withval";;
+ esac],
+ [sysvdir="${sysconfdir}/init.d"])
+AC_SUBST(sysvdir)
+
+
+AC_DEFINE(_FILE_OFFSET_BITS, [64], [Use 64bit interface for filesystem operations])
+
+AC_CONFIG_FILES([util-vserver-pl.spec Makefile])
+AC_OUTPUT
--- /dev/null
+.de Sh \" Subsection
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Ip \" List item
+.br
+.ie \\n(.$>=3 .ne \\$3
+.el .ne 3
+.IP "\\$1" \\$2
+..
+.TH "VSH" 8 "2004-07-29" "PlanetLab specific Vserver shell" "vsh"
+
+.SH NAME
+vsh \- Safely trampoline's a slice user from global vserver context to
+the correspondingly named local vserver context\&.
+
+.SH "SYNOPSIS"
+
+.PP
+\fBvsh\fR [COMMAND] \fR
+
+.TP
+[COMMAND]
+when specified, the command to run, otherwise vsh will just run the
+user's shell as a login shell
+
+.SH "SUMMARY"
+
+.PP
+vsh is used as the login shell for slice users in the global vserver's
+/etc/passwd\&. It is invoked either by sshd when a slice user logs on
+to a PlanetLab node or as root using the su command\&. When this
+occurs, vsh switches vserver context and sets the uid/gid of the slice
+user, as specified in the slice's vserver /etc/passwd\&.
+
+.PP
+vsh assumes that the same account (by name) used to ssh/su into the
+vserver also exists in the vserver specific /etc/passwd file\&. This
+is the only reason that for now it is deemed to be PlanetLab
+specific\&.
--- /dev/null
+#!/usr/bin/python
+
+import bwlimit
+
+if __name__ == '__main__':
+ bwlimit.main()
--- /dev/null
+#!/usr/bin/python
+#
+# Bandwidth limit module for PlanetLab nodes. The intent is to use the
+# Hierarchical Token Bucket (HTB) queueing discipline (qdisc) to allow
+# slices to fairly share access to available node bandwidth. We
+# currently define three classes of "available node bandwidth":
+#
+# 1. Available hardware bandwidth (bwmax): The maximum rate of the
+# hardware.
+#
+# 2. Available capped bandwidth (bwcap): The maximum rate allowed to
+# non-exempt destinations. By default, equal to bwmax, but may be
+# lowered by PIs.
+#
+# 3. Available uncapped ("exempt") bandwidth: The difference between
+# bwmax and what is currently being used of bwcap, or the maximum rate
+# allowed to destinations exempt from caps (e.g., Internet2).
+#
+# All three classes of bandwidth are fairly shared according to the
+# notion of "shares". For instance, if the node is capped at 5 Mbps,
+# there are N slices, and each slice has 1 share, then each slice
+# should get at least 5/N Mbps of bandwidth. How HTB is implemented
+# makes this statement a little too simplistic. What it really means
+# is that during any single time period, only a certain number of
+# bytes can be sent onto the wire. Each slice is guaranteed that at
+# least some small number of its bytes will be sent. Whatever is left
+# over from the budget, is split in proportion to the number of shares
+# each slice has.
+#
+# Even if the node is not capped at a particular limit (bwcap ==
+# bwmax), this module enforces fair share access to bwmax. Also, if
+# the node is capped at a particular limit, rules may optionally be
+# defined that classify certain packets into the "exempt" class. This
+# class receives whatever bandwidth is leftover between bwcap and
+# bwmax; slices fairly share this bandwidth as well.
+#
+# The root context is exempt from sharing and can send as much as it
+# needs to.
+#
+# Some relevant URLs:
+#
+# 1. http://lartc.org/howto for how to use tc
+# 2. http://luxik.cdi.cz/~devik/qos/htb/ for info on HTB
+#
+# Andy Bavier <acb@cs.princeton.edu>
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (C) 2006 The Trustees of Princeton University
+#
+# $Id: bwlimit.py,v 1.15 2007/02/07 04:21:11 mlhuang Exp $
+#
+
+import sys, os, re, getopt
+from sets import Set
+import pwd
+
+
+# Where the tc binary lives
+TC = "/sbin/tc"
+
+# Default interface
+dev = "eth0"
+
+# Verbosity level
+verbose = 0
+
+# bwmin should be small enough that it can be considered negligibly
+# slow compared to the hardware. 8 bits/second appears to be the
+# smallest value supported by tc.
+bwmin = 8
+
+# bwmax should be large enough that it can be considered at least as
+# fast as the hardware.
+bwmax = 1000*1000*1000
+
+# quantum is the maximum number of bytes that can be borrowed by a
+# share (or slice, if each slice gets 1 share) in one time period
+# (with HZ=1000, 1 ms). If multiple slices are competing for bandwidth
+# above their guarantees, and each is attempting to borrow up to the
+# node bandwidth cap, quantums control how the excess bandwidth is
+# distributed. Slices with 2 shares will borrow twice the amount in
+# one time period as slices with 1 share, so averaged over time, they
+# will get twice as much of the excess bandwidth. The value should be
+# as small as possible and at least 1 MTU. By default, it would be
+# calculated as bwmin/10, but since we use such small a value for
+# bwmin, it's better to just set it to a value safely above 1 Ethernet
+# MTU.
+quantum = 1600
+
+# cburst is the maximum number of bytes that can be burst onto the
+# wire in one time period (with HZ=1000, 1 ms). If multiple slices
+# have data queued for transmission, cbursts control how long each
+# slice can have the wire for. If not specified, it is set to the
+# smallest possible value that would enable the slice's "ceil" rate
+# (usually the node bandwidth cap), to be reached if a slice was able
+# to borrow enough bandwidth to do so. For now, it's unclear how or if
+# to relate this to the notion of shares, so just let tc set the
+# default.
+cburst = None
+
+# There is another parameter that controls how bandwidth is allocated
+# between slices on nodes that is outside the scope of HTB. We enforce
+# a 16 GByte/day total limit on each slice, which works out to about
+# 1.5mbit. If a slice exceeds this byte limit before the day finishes,
+# it is capped at (i.e., its "ceil" rate is set to) the smaller of the
+# node bandwidth cap or 1.5mbit. pl_mom is in charge of enforcing this
+# rule and executes this script to override "ceil".
+
+# We support multiple bandwidth limits, by reserving the top nibble of
+# the minor classid to be the "subclassid". Theoretically, we could
+# support up to 15 subclasses, but for now, we only define two: the
+# "default" subclass 1:10 that is capped at the node bandwidth cap (in
+# this example, 5mbit) and the "exempt" subclass 1:20 that is capped
+# at bwmax (i.e., not capped). The 1:1 parent class exists only to
+# make the borrowing model work. All bandwidth above minimum
+# guarantees is fairly shared (in this example, slice 2 is guaranteed
+# at least 1mbit in addition to fair access to the rest), subject to
+# the restrictions of the class hierarchy: namely, that the total
+# bandwidth to non-exempt destinations should not exceed the node
+# bandwidth cap.
+#
+# 1:
+# |
+# 1:1 (1gbit)
+# ______________|_____________
+# | |
+# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit)
+# | |
+# 1:1000 (8bit, 5mbit), 1:2000 (8bit, 1gbit),
+# 1:1001 (8bit, 5mbit), 1:2001 (8bit, 1gbit),
+# 1:1002 (1mbit, 5mbit), 1:2002 (1mbit, 1gbit),
+# ... ...
+# 1:1FFF (8bit, 5mbit) 1:2FFF (8bit, 1gbit)
+#
+default_minor = 0x1000
+exempt_minor = 0x2000
+
+# root_xid is for the root context. The root context is exempt from
+# fair sharing in both the default and exempt subclasses. The root
+# context gets 5 shares by default.
+root_xid = 0x0000
+root_share = 5
+
+# default_xid is for unclassifiable packets. Packets should not be
+# classified here very often. They can be if a slice's HTB classes are
+# deleted before its processes are. Each slice gets 1 share by
+# default.
+default_xid = 0x0FFF
+default_share = 1
+
+# See tc_util.c and http://physics.nist.gov/cuu/Units/binary.html. Be
+# warned that older versions of tc interpret "kbps", "mbps", "mbit",
+# and "kbit" to mean (in this system) "kibps", "mibps", "mibit", and
+# "kibit" and that if an older version is installed, all rates will
+# be off by a small fraction.
+suffixes = {
+ "": 1,
+ "bit": 1,
+ "kibit": 1024,
+ "kbit": 1000,
+ "mibit": 1024*1024,
+ "mbit": 1000000,
+ "gibit": 1024*1024*1024,
+ "gbit": 1000000000,
+ "tibit": 1024*1024*1024*1024,
+ "tbit": 1000000000000,
+ "bps": 8,
+ "kibps": 8*1024,
+ "kbps": 8000,
+ "mibps": 8*1024*1024,
+ "mbps": 8000000,
+ "gibps": 8*1024*1024*1024,
+ "gbps": 8000000000,
+ "tibps": 8*1024*1024*1024*1024,
+ "tbps": 8000000000000
+}
+
+
+def get_tc_rate(s):
+ """
+ Parses an integer or a tc rate string (e.g., 1.5mbit) into bits/second
+ """
+
+ if type(s) == int:
+ return s
+ m = re.match(r"([0-9.]+)(\D*)", s)
+ if m is None:
+ return -1
+ suffix = m.group(2).lower()
+ if suffixes.has_key(suffix):
+ return int(float(m.group(1)) * suffixes[suffix])
+ else:
+ return -1
+
+
+def format_tc_rate(rate):
+ """
+ Formats a bits/second rate into a tc rate string
+ """
+
+ if rate >= 1000000000 and (rate % 1000000000) == 0:
+ return "%.0fgbit" % (rate / 1000000000.)
+ elif rate >= 1000000 and (rate % 1000000) == 0:
+ return "%.0fmbit" % (rate / 1000000.)
+ elif rate >= 1000:
+ return "%.0fkbit" % (rate / 1000.)
+ else:
+ return "%.0fbit" % rate
+
+
+# Parse /etc/planetlab/bwcap (or equivalent)
+def read_bwcap(bwcap_file):
+ bwcap = bwmax
+ try:
+ fp = open(bwcap_file, "r")
+ line = fp.readline().strip()
+ if line:
+ bwcap = get_tc_rate(line)
+ except:
+ pass
+ if bwcap == -1:
+ bwcap = bwmax
+ return bwcap
+
+
+def get_bwcap(dev = dev):
+ """
+ Get the current (live) value of the node bandwidth cap
+ """
+
+ state = tc("-d class show dev %s" % dev)
+ base_re = re.compile(r"class htb 1:10 parent 1:1 .*ceil ([^ ]+) .*")
+ base_classes = filter(None, map(base_re.match, state))
+ if not base_classes:
+ return -1
+ if len(base_classes) > 1:
+ raise Exception, "unable to get current bwcap"
+ return get_tc_rate(base_classes[0].group(1))
+
+
+def get_slice(xid):
+ """
+ Get slice name ("princeton_mlh") from slice xid (500)
+ """
+
+ if xid == root_xid:
+ return "root"
+ if xid == default_xid:
+ return "default"
+ try:
+ return pwd.getpwuid(xid).pw_name
+ except KeyError:
+ pass
+
+ return None
+
+def get_xid(slice):
+ """
+ Get slice xid ("princeton_mlh") from slice name ("500" or "princeton_mlh")
+ """
+
+ if slice == "root":
+ return root_xid
+ if slice == "default":
+ return default_xid
+ try:
+ try:
+ return int(slice)
+ except ValueError:
+ pass
+ return pwd.getpwnam(slice).pw_uid
+ except KeyError:
+ pass
+
+ return None
+
+def run(cmd, input = None):
+ """
+ Shortcut for running a shell command
+ """
+
+ try:
+ if verbose:
+ sys.stderr.write("Executing: " + cmd + "\n")
+ if input is None:
+ fileobj = os.popen(cmd, "r")
+ output = fileobj.readlines()
+ else:
+ fileobj = os.popen(cmd, "w")
+ fileobj.write(input)
+ output = None
+ if fileobj.close() is None:
+ return output
+ except Exception, e:
+ pass
+ return None
+
+
+def tc(cmd):
+ """
+ Shortcut for running a tc command
+ """
+
+ return run(TC + " " + cmd)
+
+
+def init(dev = dev, bwcap = bwmax):
+ """
+ (Re)initialize the bandwidth limits on this node
+ """
+
+ # Load the module used to manage exempt classes
+ run("/sbin/modprobe ip_set_iphash")
+
+ # Save current settings
+ paramslist = get(None, dev)
+
+ # Delete root qdisc 1: if it exists. This will also automatically
+ # delete any child classes.
+ for line in tc("qdisc show dev %s" % dev):
+ # Search for the root qdisc 1:
+ m = re.match(r"qdisc htb 1:", line)
+ if m is not None:
+ tc("qdisc del dev %s root handle 1:" % dev)
+ break
+
+ # Initialize HTB. The "default" clause specifies that if a packet
+ # fails classification, it should go into the class with handle
+ # 1FFF.
+ tc("qdisc add dev %s root handle 1: htb default %x" % \
+ (dev, default_minor | default_xid))
+
+ # Set up a parent class from which all subclasses borrow.
+ tc("class add dev %s parent 1: classid 1:1 htb rate %dbit" % \
+ (dev, bwmax))
+
+ # Set up a subclass that represents the node bandwidth cap. We
+ # allow each slice to borrow up to this rate, so it is also
+ # usually the "ceil" rate for each slice.
+ tc("class add dev %s parent 1:1 classid 1:10 htb rate %dbit ceil %dbit" % \
+ (dev, bwmin, bwcap))
+
+ # Set up a subclass that represents "exemption" from the node
+ # bandwidth cap. Once the node bandwidth cap is reached, bandwidth
+ # to exempt destinations can still be fairly shared up to bwmax.
+ tc("class add dev %s parent 1:1 classid 1:20 htb rate %dbit ceil %dbit" % \
+ (dev, bwmin, bwmax))
+
+ # Set up the root class (and tell VNET what it is). Packets sent
+ # by root end up here and are capped at the node bandwidth
+ # cap.
+ #on(root_xid, dev, share = root_share)
+ #try:
+ # file("/proc/sys/vnet/root_class", "w").write("%d" % ((1 << 16) | default_minor | root_xid))
+ #except:
+ # pass
+
+ # Set up the default class. Packets that fail classification end
+ # up here.
+ on(default_xid, dev, share = default_share)
+
+ # Restore old settings
+ for (xid, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes) in paramslist:
+ if xid not in (root_xid, default_xid):
+ on(xid, dev, share, minrate, maxrate, minexemptrate, maxexemptrate)
+
+
+def get(xid = None, dev = dev):
+ """
+ Get the bandwidth limits and current byte totals for a
+ particular slice xid as a tuple (xid, share, minrate, maxrate,
+ minexemptrate, maxexemptrate, bytes, exemptbytes), or all classes
+ as a list of such tuples.
+ """
+
+ if xid is None:
+ ret = []
+ else:
+ ret = None
+
+ rates = {}
+ rate = None
+
+ # ...
+ # class htb 1:1000 parent 1:10 leaf 1000: prio 0 quantum 8000 rate 8bit ceil 10000Kbit ...
+ # Sent 6851486 bytes 49244 pkt (dropped 0, overlimits 0 requeues 0)
+ # ...
+ # class htb 1:2000 parent 1:20 leaf 2000: prio 0 quantum 8000 rate 8bit ceil 1000Mbit ...
+ # Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ # ...
+ for line in tc("-s -d class show dev %s" % dev):
+ # Rate parameter line
+ params = re.match(r"class htb 1:([0-9a-f]+) parent 1:(10|20)", line)
+ # Statistics line
+ stats = re.match(r".* Sent ([0-9]+) bytes", line)
+ # Another class
+ ignore = re.match(r"class htb", line)
+
+ if params is not None:
+ # Which class
+ if params.group(2) == "10":
+ min = 'min'
+ max = 'max'
+ bytes = 'bytes'
+ else:
+ min = 'minexempt'
+ max = 'maxexempt'
+ bytes = 'exemptbytes'
+
+ # Slice ID
+ id = int(params.group(1), 16) & 0x0FFF;
+
+ if rates.has_key(id):
+ rate = rates[id]
+ else:
+ rate = {'id': id}
+
+ # Parse share
+ rate['share'] = 1
+ m = re.search(r"quantum (\d+)", line)
+ if m is not None:
+ rate['share'] = int(m.group(1)) / quantum
+
+ # Parse minrate
+ rate[min] = bwmin
+ m = re.search(r"rate (\w+)", line)
+ if m is not None:
+ rate[min] = get_tc_rate(m.group(1))
+
+ # Parse maxrate
+ rate[max] = bwmax
+ m = re.search(r"ceil (\w+)", line)
+ if m is not None:
+ rate[max] = get_tc_rate(m.group(1))
+
+ # Which statistics to parse
+ rate['stats'] = bytes
+
+ rates[id] = rate
+
+ elif stats is not None:
+ if rate is not None:
+ rate[rate['stats']] = int(stats.group(1))
+
+ elif ignore is not None:
+ rate = None
+
+ # Keep parsing until we get everything
+ if rate is not None and \
+ rate.has_key('min') and rate.has_key('minexempt') and \
+ rate.has_key('max') and rate.has_key('maxexempt') and \
+ rate.has_key('bytes') and rate.has_key('exemptbytes'):
+ params = (rate['id'], rate['share'],
+ rate['min'], rate['max'],
+ rate['minexempt'], rate['maxexempt'],
+ rate['bytes'], rate['exemptbytes'])
+ if xid is None:
+ # Return a list of parameters
+ ret.append(params)
+ rate = None
+ elif xid == rate['id']:
+ # Return the parameters for this class
+ ret = params
+ break
+
+ return ret
+
+
+def on(xid, dev = dev, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None):
+ """
+ Apply specified bandwidth limit to the specified slice xid
+ """
+
+ # Get defaults from current state if available
+ cap = get(xid, dev)
+ if cap is not None:
+ if share is None:
+ share = cap[1]
+ if minrate is None:
+ minrate = cap[2]
+ if maxrate is None:
+ maxrate = cap[3]
+ if minexemptrate is None:
+ minexemptrate = cap[4]
+ if maxexemptrate is None:
+ maxexemptrate = cap[5]
+
+ # Figure out what the current node bandwidth cap is
+ bwcap = get_bwcap()
+
+ # Set defaults
+ if share is None:
+ share = default_share
+ if minrate is None:
+ minrate = bwmin
+ else:
+ minrate = get_tc_rate(minrate)
+ if maxrate is None:
+ maxrate = bwcap
+ else:
+ maxrate = get_tc_rate(maxrate)
+ if minexemptrate is None:
+ minexemptrate = minrate
+ else:
+ minexemptrate = get_tc_rate(minexemptrate)
+ if maxexemptrate is None:
+ maxexemptrate = bwmax
+ else:
+ maxexemptrate = get_tc_rate(maxexemptrate)
+
+ # Sanity checks
+ if maxrate < bwmin:
+ maxrate = bwmin
+ if maxrate > bwcap:
+ maxrate = bwcap
+ if minrate < bwmin:
+ minrate = bwmin
+ if minrate > maxrate:
+ minrate = maxrate
+ if maxexemptrate < bwmin:
+ maxexemptrate = bwmin
+ if maxexemptrate > bwmax:
+ maxexemptrate = bwmax
+ if minexemptrate < bwmin:
+ minexemptrate = bwmin
+ if minexemptrate > maxexemptrate:
+ minexemptrate = maxexemptrate
+
+ # Set up subclasses for the slice
+ tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \
+ (dev, default_minor | xid, minrate, maxrate, share * quantum))
+
+ tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \
+ (dev, exempt_minor | xid, minexemptrate, maxexemptrate, share * quantum))
+
+ # Attach a FIFO to each subclass, which helps to throttle back
+ # processes that are sending faster than the token buckets can
+ # support.
+ tc("qdisc replace dev %s parent 1:%x handle %x pfifo" % \
+ (dev, default_minor | xid, default_minor | xid))
+
+ tc("qdisc replace dev %s parent 1:%x handle %x pfifo" % \
+ (dev, exempt_minor | xid, exempt_minor | xid))
+
+
+def set(xid, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None):
+ on(xid = xid, share = share,
+ minrate = minrate, maxrate = maxrate,
+ minexemptrate = minexemptrate, maxexemptrate = maxexemptrate)
+
+
+# Remove class associated with specified slice xid. If further packets
+# are seen from this slice, they will be classified into the default
+# class 1:1FFF.
+def off(xid, dev = dev):
+ """
+ Remove class associated with specified slice xid. If further
+ packets are seen from this slice, they will be classified into the
+ default class 1:1FFF.
+ """
+
+ cap = get(xid, dev)
+ if cap is not None:
+ tc("class del dev %s classid 1:%x" % (dev, default_minor | xid))
+ tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid))
+
+
+def exempt_init(group_name, node_ips):
+ """
+ Initialize the list of destinations exempt from the node bandwidth
+ (burst) cap.
+ """
+
+ # Clean up
+ iptables = "/sbin/iptables -t MANGLE %s POSTROUTING"
+ run(iptables % "-F")
+ run("/sbin/ipset -X " + group_name)
+
+ # Create a hashed IP set of all of these destinations
+ lines = ["-N %s iphash" % group_name]
+ add_cmd = "-A %s " % group_name
+ lines += [(add_cmd + ip) for ip in node_ips]
+ lines += ["COMMIT"]
+ restore = "\n".join(lines) + "\n"
+ run("/sbin/ipset -R", restore)
+
+ # Add rule to match on destination IP set
+ run((iptables + " -m set --set %s dst -j CLASSIFY --set-class 1:%x") %
+ ("-A", group_name, exempt_minor))
+
+
+def usage():
+ bwcap_description = format_tc_rate(get_bwcap())
+
+ print """
+Usage:
+
+%s [OPTION]... [COMMAND] [ARGUMENT]...
+
+Options:
+ -d device Network interface (default: %s)
+ -r rate Node bandwidth cap (default: %s)
+ -q quantum Share multiplier (default: %d bytes)
+ -n Print rates in numeric bits per second
+ -v Enable verbose debug messages
+ -h This message
+
+Commands:
+ init
+ (Re)initialize all bandwidth parameters
+ on slice [share|-] [minrate|-] [maxrate|-] [minexemptrate|-] [maxexemptrate|-]
+ Set bandwidth parameter(s) for the specified slice
+ off slice
+ Remove all bandwidth parameters for the specified slice
+ get
+ Get all bandwidth parameters for all slices
+ get slice
+ Get bandwidth parameters for the specified slice
+""" % (sys.argv[0], dev, bwcap_description, quantum)
+ sys.exit(1)
+
+
+def main():
+ global dev, quantum, verbose
+
+ # Defaults
+ numeric = False
+ bwcap = get_bwcap()
+
+ (opts, argv) = getopt.getopt(sys.argv[1:], "d:nr:q:vh")
+ for (opt, optval) in opts:
+ if opt == '-d':
+ dev = optval
+ elif opt == '-n':
+ numeric = True
+ elif opt == '-r':
+ bwcap = get_tc_rate(optval)
+ elif opt == '-q':
+ quantum = int(optval)
+ elif opt == '-v':
+ verbose += 1
+ elif opt == '-h':
+ usage()
+
+ if len(argv):
+ if argv[0] == "init" or (argv[0] == "on" and len(argv) == 1):
+ # (Re)initialize
+ init(dev, get_tc_rate(bwcap))
+
+ elif argv[0] == "get" or argv[0] == "show":
+ # Show
+ if len(argv) >= 2:
+ # Show a particular slice
+ xid = get_xid(argv[1])
+ if xid is None:
+ sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1])
+ usage()
+ params = get(xid, dev)
+ if params is None:
+ paramslist = []
+ else:
+ paramslist = [params]
+ else:
+ # Show all slices
+ paramslist = get(None, dev)
+
+ for (xid, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes) in paramslist:
+ slice = get_slice(xid)
+ if slice is None:
+ # Orphaned (not associated with a slice) class
+ slice = "%d?" % xid
+ if numeric:
+ print "%s %d %d %d %d %d %d %d" % \
+ (slice, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes)
+ else:
+ print "%s %d %s %s %s %s %d %d" % \
+ (slice, share,
+ format_tc_rate(minrate), format_tc_rate(maxrate),
+ format_tc_rate(minexemptrate), format_tc_rate(maxexemptrate),
+ bytes, exemptbytes)
+
+ elif len(argv) >= 2:
+ # slice, ...
+ xid = get_xid(argv[1])
+ if xid is None:
+ sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1])
+ usage()
+
+ if argv[0] == "on" or argv[0] == "add" or argv[0] == "replace" or argv[0] == "set":
+ # Enable cap
+ args = []
+ if len(argv) >= 3:
+ # ... share, minrate, maxrate, minexemptrate, maxexemptrate
+ casts = [int, get_tc_rate, get_tc_rate, get_tc_rate, get_tc_rate]
+ for i, arg in enumerate(argv[2:]):
+ if i >= len(casts):
+ break
+ if arg == "-":
+ args.append(None)
+ else:
+ args.append(casts[i](arg))
+ on(xid, dev, *args)
+
+ elif argv[0] == "off" or argv[0] == "del":
+ # Disable cap
+ off(xid, dev)
+
+ else:
+ usage()
+
+ else:
+ usage()
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/bin/env python
+
+import vserver, sys, os, re, getopt
+
+def usage():
+ print """
+Usage:
+
+%s [OPTION]... [COMMAND] [ARGUMENT]...
+
+Options:
+
+
+Commands:
+ set slice space
+ Set max disk limit for a slice
+
+ get slice
+ Get current disk limit for slice
+""" % (sys.argv[0])
+
+ sys.exit(1)
+
+def get(argv):
+ slicename = argv[0]
+ vs = vserver.VServer(slicename)
+ limit = vs.get_disklimit()
+ return "%s %d limit" % (slicename,limit)
+
+def set(argv):
+ slicename = argv[0]
+ vs = vserver.VServer(slicename)
+ oldlimit = vs.get_disklimit()
+ newlimit = int(argv[1])
+ if newlimit >= oldlimit:
+ vs.set_disklimit(newlimit)
+ return None
+
+def main():
+ functions = {"get":get, "set":set}
+ argv = sys.argv[1:]
+ if len(argv):
+ func = functions.get(argv[0],usage)
+ result = func(argv[1:])
+ if result <> None:
+ print result
+ sys.exit(0)
+
+ # no command given
+ usage()
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+# Copyright 2005 Princeton University
+
+#$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $
+
+import errno
+import fcntl
+import os
+import re
+import pwd
+import signal
+import sys
+import time
+import traceback
+import subprocess
+import resource
+
+import vserverimpl
+import cpulimit, bwlimit
+
+from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED
+from vserverimpl import DLIMIT_INF
+from vserverimpl import VC_LIM_KEEP
+from vserverimpl import VLIMIT_NSOCK
+from vserverimpl import VLIMIT_OPENFD
+from vserverimpl import VLIMIT_ANON
+from vserverimpl import VLIMIT_SHMEM
+
+#
+# these are the flags taken from the kernel linux/vserver/legacy.h
+#
+FLAGS_LOCK = 1
+FLAGS_SCHED = 2 # XXX - defined in util-vserver/src/chcontext.c
+FLAGS_NPROC = 4
+FLAGS_PRIVATE = 8
+FLAGS_INIT = 16
+FLAGS_HIDEINFO = 32
+FLAGS_ULIMIT = 64
+FLAGS_NAMESPACE = 128
+
+RLIMITS = { "NSOCK": VLIMIT_NSOCK,
+ "OPENFD": VLIMIT_OPENFD,
+ "ANON": VLIMIT_ANON,
+ "SHMEM": VLIMIT_SHMEM}
+
+# add in the platform supported rlimits
+for entry in resource.__dict__.keys():
+ if entry.find("RLIMIT_")==0:
+ k = entry[len("RLIMIT_"):]
+ if not RLIMITS.has_key(k):
+ RLIMITS[k]=resource.__dict__[entry]
+ else:
+ print "WARNING: duplicate RLIMITS key %s" % k
+
+class NoSuchVServer(Exception): pass
+
+
+class VServerConfig:
+ def __init__(self, name, directory):
+ self.name = name
+ self.dir = directory
+ self.cache = None
+ if not (os.path.isdir(self.dir) and
+ os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
+ raise NoSuchVServer, "%s does not exist" % self.dir
+
+ def get(self, option, default = None):
+ try:
+ if self.cache:
+ return self.cache[option]
+ else:
+ f = open(os.path.join(self.dir, option), "r")
+ buf = f.read().rstrip()
+ f.close()
+ return buf
+ except:
+ if default is not None:
+ return default
+ else:
+ raise KeyError, "Key %s is not set for %s" % (option, self.name)
+
+ def update(self, option, value):
+ if self.cache:
+ return
+
+ try:
+ old_umask = os.umask(0022)
+ filename = os.path.join(self.dir, option)
+ try:
+ os.makedirs(os.path.dirname(filename), 0755)
+ except:
+ pass
+ f = open(filename, 'w')
+ if isinstance(value, list):
+ f.write("%s\n" % "\n".join(value))
+ else:
+ f.write("%s\n" % value)
+ f.close()
+ os.umask(old_umask)
+ except:
+ raise
+
+ def unset(self, option):
+ if self.cache:
+ return
+
+ try:
+ filename = os.path.join(self.dir, option)
+ os.unlink(filename)
+ try:
+ os.removedirs(os.path.dirname(filename))
+ except:
+ pass
+ return True
+ except:
+ return False
+
+ def cache_it(self):
+ self.cache = {}
+ def add_to_cache(cache, dirname, fnames):
+ for file in fnames:
+ full_name = os.path.join(dirname, file)
+ if os.path.islink(full_name):
+ fnames.remove(file)
+ elif (os.path.isfile(full_name) and
+ os.access(full_name, os.R_OK)):
+ f = open(full_name, "r")
+ cache[full_name.replace(os.path.join(self.dir, ''),
+ '')] = f.read().rstrip()
+ f.close()
+ os.path.walk(self.dir, add_to_cache, self.cache)
+
+
+class VServer:
+
+ INITSCRIPTS = [('/etc/rc.vinit', 'start'),
+ ('/etc/rc.d/rc', '%(runlevel)d')]
+
+ def __init__(self, name, vm_id = None, vm_running = None):
+
+ self.name = name
+ self.rlimits_changed = False
+ self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name)
+ if not (os.path.isdir(self.dir) and
+ os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
+ raise NoSuchVServer, "no such vserver: " + name
+ self.config = VServerConfig(name, "/etc/vservers/%s" % name)
+ self.remove_caps = ~vserverimpl.CAP_SAFE;
+ if vm_id == None:
+ vm_id = int(self.config.get('context'))
+ self.ctx = vm_id
+ if vm_running == None:
+ vm_running = self.is_running()
+ self.vm_running = vm_running
+
+ def have_limits_changed(self):
+ return self.rlimits_changed
+
+ def set_rlimit_limit(self,type,hard,soft,minimum):
+ """Generic set resource limit function for vserver"""
+ global RLIMITS
+ changed = False
+ try:
+ old_hard, old_soft, old_minimum = self.get_rlimit_limit(type)
+ if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True
+ if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True
+ if old_minimum != VC_LIM_KEEP and old_minimum <> minimum: changed = True
+ self.rlimits_changed = self.rlimits_changed or changed
+ except OSError, e:
+ if self.is_running(): print "Unexpected error with getrlimit for running context %d" % self.ctx
+
+ resource_type = RLIMITS[type]
+ try:
+ ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum)
+ except OSError, e:
+ if self.is_running(): print "Unexpected error with setrlimit for running context %d" % self.ctx
+
+ def set_rlimit_config(self,type,hard,soft,minimum):
+ """Generic set resource limit function for vserver"""
+ if hard <> VC_LIM_KEEP:
+ self.config.update('rlimits/%s.hard' % type.lower(), hard)
+ if soft <> VC_LIM_KEEP:
+ self.config.update('rlimits/%s.soft' % type.lower(), soft)
+ if minimum <> VC_LIM_KEEP:
+ self.config.update('rlimits/%s.min' % type.lower(), minimum)
+ self.set_rlimit_limit(type,hard,soft,minimum)
+
+ def get_rlimit_limit(self,type):
+ """Generic get resource configuration function for vserver"""
+ global RLIMITS
+ resource_type = RLIMITS[type]
+ try:
+ ret = vserverimpl.getrlimit(self.ctx,resource_type)
+ except OSError, e:
+ print "Unexpected error with getrlimit for context %d" % self.ctx
+ ret = self.get_rlimit_config(type)
+ return ret
+
+ def get_rlimit_config(self,type):
+ """Generic get resource configuration function for vserver"""
+ hard = int(self.config.get("rlimits/%s.hard"%type.lower(),VC_LIM_KEEP))
+ soft = int(self.config.get("rlimits/%s.soft"%type.lower(),VC_LIM_KEEP))
+ minimum = int(self.config.get("rlimits/%s.min"%type.lower(),VC_LIM_KEEP))
+ return (hard,soft,minimum)
+
+ def set_capabilities(self, capabilities):
+ return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities))
+
+ def set_capabilities_config(self, capabilities):
+ self.config.update('bcapabilities', capabilities)
+ self.set_capabilities(capabilities)
+
+ def get_capabilities(self):
+ return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx))
+
+ def get_capabilities_config(self):
+ return self.config.get('bcapabilities', '')
+
+ def set_ipaddresses(self, addresses):
+ vserverimpl.netremove(self.ctx, "all")
+ for a in addresses.split(","):
+ vserverimpl.netadd(self.ctx, a)
+
+ def set_ipaddresses_config(self, addresses):
+ i = 0
+ for a in addresses.split(","):
+ self.config.update("interfaces/%d/ip" % i, a)
+ i += 1
+ while self.config.unset("interfaces/%d/ip" % i):
+ i += 1
+ self.set_ipaddresses(addresses)
+
+ def get_ipaddresses_config(self):
+ i = 0
+ ret = []
+ while True:
+ r = self.config.get("interfaces/%d/ip" % i, '')
+ if r == '':
+ break
+ ret += [r]
+ i += 1
+ return ",".join(ret)
+
+ def get_ipaddresses(self):
+ # No clean way to do this right now.
+ return None
+
+ def __do_chroot(self):
+ self.config.cache_it()
+ os.chroot(self.dir)
+ os.chdir("/")
+
+ def chroot_call(self, fn, *args):
+
+ cwd_fd = os.open(".", os.O_RDONLY)
+ try:
+ root_fd = os.open("/", os.O_RDONLY)
+ try:
+ self.__do_chroot()
+ result = fn(*args)
+ finally:
+ os.fchdir(root_fd)
+ os.chroot(".")
+ os.fchdir(cwd_fd)
+ os.close(root_fd)
+ finally:
+ os.close(cwd_fd)
+ return result
+
+ def set_disklimit(self, block_limit):
+ # block_limit is in kB
+ if block_limit == 0:
+ try:
+ vserverimpl.unsetdlimit(self.dir, self.ctx)
+ except OSError, e:
+ print "Unexpected error with unsetdlimit for context %d" % self.ctx
+ return
+
+ if self.vm_running:
+ block_usage = vserverimpl.DLIMIT_KEEP
+ inode_usage = vserverimpl.DLIMIT_KEEP
+ else:
+ # init_disk_info() must have been called to get usage values
+ block_usage = self.disk_blocks
+ inode_usage = self.disk_inodes
+
+
+ try:
+ vserverimpl.setdlimit(self.dir,
+ self.ctx,
+ block_usage,
+ block_limit,
+ inode_usage,
+ vserverimpl.DLIMIT_INF, # inode limit
+ 2) # %age reserved for root
+ except OSError, e:
+ print "Unexpected error with setdlimit for context %d" % self.ctx
+
+
+ self.config.update('dlimits/0/space_total', block_limit)
+
+ def is_running(self):
+ return vserverimpl.isrunning(self.ctx)
+
+ def get_disklimit(self):
+
+ try:
+ (self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
+ reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
+ except OSError, ex:
+ if ex.errno != errno.ESRCH:
+ raise
+ # get here if no vserver disk limit has been set for xid
+ block_limit = -1
+
+ return block_limit
+
+ def set_sched_config(self, cpu_share, sched_flags):
+
+ """ Write current CPU scheduler parameters to the vserver
+ configuration file. This method does not modify the kernel CPU
+ scheduling parameters for this context. """
+
+ if sched_flags & SCHED_CPU_GUARANTEED:
+ cpu_guaranteed = cpu_share
+ else:
+ cpu_guaranteed = 0
+ self.config.update('sched/fill-rate2', cpu_share)
+ self.config.update('sched/fill-rate', cpu_guaranteed)
+
+ if self.vm_running:
+ self.set_sched(cpu_share, sched_flags)
+
+ def set_sched(self, cpu_share, sched_flags = 0):
+ """ Update kernel CPU scheduling parameters for this context. """
+ vserverimpl.setsched(self.ctx, cpu_share, sched_flags)
+
+ def get_sched(self):
+ # have no way of querying scheduler right now on a per vserver basis
+ return (-1, False)
+
+ def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
+ exempt_min = None, exempt_max = None,
+ share = None, dev = "eth0"):
+
+ if minrate is None:
+ bwlimit.off(self.ctx, dev)
+ else:
+ bwlimit.on(self.ctx, dev, share,
+ minrate, maxrate, exempt_min, exempt_max)
+
+ def get_bwlimit(self, dev = "eth0"):
+
+ result = bwlimit.get(self.ctx)
+ # result of bwlimit.get is (ctx, share, minrate, maxrate)
+ if result:
+ result = result[1:]
+ return result
+
+ def open(self, filename, mode = "r", bufsize = -1):
+
+ return self.chroot_call(open, filename, mode, bufsize)
+
+ def __do_chcontext(self, state_file):
+
+ if state_file:
+ print >>state_file, "%u" % self.ctx
+ state_file.close()
+
+ if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
+ self.set_resources()
+ vserverimpl.setup_done(self.ctx)
+
+ def __prep(self, runlevel, log):
+
+ """ Perform all the crap that the vserver script does before
+ actually executing the startup scripts. """
+
+ # remove /var/run and /var/lock/subsys files
+ # but don't remove utmp from the top-level /var/run
+ RUNDIR = "/var/run"
+ LOCKDIR = "/var/lock/subsys"
+ filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs)
+ garbage = reduce((lambda (out, ff), (dir, subdirs, files):
+ (out + map((dir + "/").__add__, ff(files)),
+ lambda fs: fs)),
+ list(os.walk(RUNDIR)),
+ ([], filter_fn))[0]
+ garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__,
+ os.listdir(LOCKDIR)))
+ if False:
+ for f in garbage:
+ os.unlink(f)
+
+ # set the initial runlevel
+ f = open(RUNDIR + "/utmp", "w")
+ vserverimpl.setrunlevel(f, runlevel)
+ f.close()
+
+ # mount /proc and /dev/pts
+ self.__do_mount("none", self.dir, "/proc", "proc")
+ # XXX - magic mount options
+ self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
+
+ def __do_mount(self, *mount_args):
+
+ try:
+ vserverimpl.mount(*mount_args)
+ except OSError, ex:
+ if ex.errno == errno.EBUSY:
+ # assume already mounted
+ return
+ raise ex
+
+ def enter(self):
+ self.__do_chroot()
+ self.__do_chcontext(None)
+
+ def start(self, wait, runlevel = 3):
+ self.vm_running = True
+ self.rlimits_changed = False
+
+ child_pid = os.fork()
+ if child_pid == 0:
+ # child process
+ try:
+ # get a new session
+ os.setsid()
+
+ # open state file to record vserver info
+ state_file = open("/var/run/vservers/%s" % self.name, "w")
+
+ # use /dev/null for stdin, /var/log/boot.log for stdout/err
+ fd = os.open("/dev/null", os.O_RDONLY)
+ if fd != 0:
+ os.dup2(fd, 0)
+ os.close(fd)
+ self.__do_chroot()
+ log = open("/var/log/boot.log", "w", 0)
+ if log.fileno() != 1:
+ os.dup2(log.fileno(), 1)
+ os.dup2(1, 2)
+
+ print >>log, ("%s: starting the virtual server %s" %
+ (time.asctime(time.gmtime()), self.name))
+
+ # perform pre-init cleanup
+ self.__prep(runlevel, log)
+
+ # execute each init script in turn
+ # XXX - we don't support all scripts that vserver script does
+ self.__do_chcontext(state_file)
+ for cmd in self.INITSCRIPTS:
+ try:
+ # enter vserver context
+ arg_subst = { 'runlevel': runlevel }
+ cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
+ cmd[1:])
+ print >>log, "executing '%s'" % " ".join(cmd_args)
+ os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
+ except:
+ traceback.print_exc()
+ os._exit(1)
+
+ # we get here due to an exception in the top-level child process
+ except Exception, ex:
+ traceback.print_exc()
+ os._exit(0)
+
+ # parent process
+ return child_pid
+
+ def set_resources(self):
+
+ """ Called when vserver context is entered for first time,
+ should be overridden by subclass. """
+
+ pass
+
+ def init_disk_info(self):
+ cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir)
+ p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ close_fds=True)
+ p.stdin.close()
+ line = p.stdout.readline()
+ if not line:
+ sys.stderr.write(p.stderr.read())
+ p.stdout.close()
+ p.stderr.close()
+ ret = p.wait()
+
+ (space, inodes) = line.split()
+ self.disk_inodes = int(inodes)
+ self.disk_blocks = int(space)
+ #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
+
+ return self.disk_blocks * 1024
+
+ def stop(self, signal = signal.SIGKILL):
+ vserverimpl.killall(self.ctx, signal)
+ self.vm_running = False
+ self.rlimits_changed = False
+
+
+
+def create(vm_name, static = False, ctor = VServer):
+
+ options = ['vuseradd']
+ if static:
+ options += ['--static']
+ ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name])
+ if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0:
+ out = "system command ('%s') " % options
+ if os.WIFEXITED(ret):
+ out += "failed, rc = %d" % os.WEXITSTATUS(ret)
+ else:
+ out += "killed by signal %d" % os.WTERMSIG(ret)
+ raise SystemError, out
+ vm_id = pwd.getpwnam(vm_name)[2]
+
+ return ctor(vm_name, vm_id)
--- /dev/null
+/* Copyright 2005 Princeton University
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following
+disclaimer in the documentation and/or other materials provided
+with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
+UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <Python.h>
+
+#include <errno.h>
+#include <stdint.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <utmp.h>
+
+#include "config.h"
+#include "vserver.h"
+#include "planetlab.h"
+
+static inline PyObject *inc_and_ret_none(void)
+{
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+#define NONE inc_and_ret_none()
+
+/*
+ * context create
+ */
+static PyObject *
+vserver_chcontext(PyObject *self, PyObject *args)
+{
+ int ctx_is_new;
+ xid_t ctx;
+ uint_least64_t bcaps = 0;
+
+ if (!PyArg_ParseTuple(args, "I|K", &ctx, &bcaps))
+ return NULL;
+ bcaps |= ~(vc_get_insecurebcaps() | (1 << VC_CAP_NET_BIND_SERVICE));
+
+ if ((ctx_is_new = pl_chcontext(ctx, bcaps, 0)) < 0)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return PyBool_FromLong(ctx_is_new);
+}
+
+static PyObject *
+vserver_setup_done(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+
+ if (!PyArg_ParseTuple(args, "I", &ctx))
+ return NULL;
+
+ if (pl_setup_done(ctx) < 0)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_isrunning(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+ PyObject *ret;
+ struct stat statbuf;
+ char fname[64];
+
+ if (!PyArg_ParseTuple(args, "I", &ctx))
+ return NULL;
+
+ sprintf(fname,"/proc/virtual/%d", ctx);
+
+ if(stat(&fname[0],&statbuf)==0)
+ ret = PyBool_FromLong(1);
+ else
+ ret = PyBool_FromLong(0);
+
+ return ret;
+}
+
+static PyObject *
+__vserver_get_rlimit(xid_t xid, int resource) {
+ struct vc_rlimit limits;
+ PyObject *ret;
+
+ errno = 0;
+ if (vc_get_rlimit(xid, resource, &limits)==-1)
+ ret = PyErr_SetFromErrno(PyExc_OSError);
+ else
+ ret = Py_BuildValue("LLL",limits.hard, limits.soft, limits.min);
+
+ return ret;
+}
+
+static PyObject *
+vserver_get_rlimit(PyObject *self, PyObject *args) {
+ xid_t xid;
+ int resource;
+ PyObject *ret;
+
+ if (!PyArg_ParseTuple(args, "Ii", &xid, &resource))
+ ret = NULL;
+ else
+ ret = __vserver_get_rlimit(xid, resource);
+
+ return ret;
+}
+
+static PyObject *
+vserver_set_rlimit(PyObject *self, PyObject *args) {
+ struct vc_rlimit limits;
+ struct rlimit lim;
+ xid_t xid;
+ int resource, lresource;
+ PyObject *ret;
+
+ limits.min = VC_LIM_KEEP;
+ limits.soft = VC_LIM_KEEP;
+ limits.hard = VC_LIM_KEEP;
+
+ if (!PyArg_ParseTuple(args, "IiLLL", &xid, &resource, &limits.hard, &limits.soft, &limits.min))
+ return NULL;
+
+ lresource = resource;
+ switch (resource) {
+ case VC_VLIMIT_NSOCK:
+ case VC_VLIMIT_ANON:
+ case VC_VLIMIT_SHMEM:
+ goto do_vc_set_rlimit;
+ case VC_VLIMIT_OPENFD:
+ lresource = RLIMIT_NOFILE;
+ break;
+ default:
+ break;
+ }
+
+ getrlimit(lresource,&lim);
+ if (adjust_lim(&limits,&lim)) {
+ setrlimit(lresource, &lim);
+ }
+
+ do_vc_set_rlimit:
+ errno = 0;
+ if (vc_set_rlimit(xid, resource, &limits)==-1)
+ ret = PyErr_SetFromErrno(PyExc_OSError);
+ else
+ ret = __vserver_get_rlimit(xid, resource);
+
+ return ret;
+}
+
+/*
+ * setsched
+ */
+static PyObject *
+vserver_setsched(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+ uint32_t cpu_share;
+ uint32_t cpu_sched_flags = VC_VXF_SCHED_FLAGS;
+
+ if (!PyArg_ParseTuple(args, "II|I", &ctx, &cpu_share, &cpu_sched_flags))
+ return NULL;
+
+ /* ESRCH indicates that there are no processes in the context */
+ if (pl_setsched(ctx, cpu_share, cpu_sched_flags) &&
+ errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_get_dlimit(PyObject *self, PyObject *args)
+{
+ PyObject *res;
+ char* path;
+ unsigned xid;
+ struct vc_ctx_dlimit data;
+ int r;
+
+ if (!PyArg_ParseTuple(args, "si", &path,&xid))
+ return NULL;
+
+ memset(&data, 0, sizeof(data));
+ r = vc_get_dlimit(path, xid, 0, &data);
+ if (r>=0) {
+ res = Py_BuildValue("(i,i,i,i,i)",
+ data.space_used,
+ data.space_total,
+ data.inodes_used,
+ data.inodes_total,
+ data.reserved);
+ } else {
+ res = PyErr_SetFromErrno(PyExc_OSError);
+ }
+
+ return res;
+}
+
+
+static PyObject *
+vserver_set_dlimit(PyObject *self, PyObject *args)
+{
+ char* path;
+ unsigned xid;
+ struct vc_ctx_dlimit data;
+
+ memset(&data,0,sizeof(data));
+ if (!PyArg_ParseTuple(args, "siiiiii", &path,
+ &xid,
+ &data.space_used,
+ &data.space_total,
+ &data.inodes_used,
+ &data.inodes_total,
+ &data.reserved))
+ return NULL;
+
+ if ((vc_add_dlimit(path, xid, 0) && errno != EEXIST) ||
+ vc_set_dlimit(path, xid, 0, &data))
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_unset_dlimit(PyObject *self, PyObject *args)
+{
+ char *path;
+ unsigned xid;
+
+ if (!PyArg_ParseTuple(args, "si", &path, &xid))
+ return NULL;
+
+ if (vc_rem_dlimit(path, xid, 0) && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_killall(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+ int sig;
+ struct vc_ctx_flags cflags = {
+ .flagword = 0,
+ .mask = VC_VXF_PERSISTENT
+ };
+ struct vc_net_flags nflags = {
+ .flagword = 0,
+ .mask = VC_NXF_PERSISTENT
+ };
+
+ if (!PyArg_ParseTuple(args, "Ii", &ctx, &sig))
+ return NULL;
+
+ if (vc_ctx_kill(ctx, 0, sig) && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ if (vc_set_cflags(ctx, &cflags) && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ if (vc_set_nflags(ctx, &nflags) && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_set_bcaps(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+ struct vc_ctx_caps caps;
+
+ if (!PyArg_ParseTuple(args, "IK", &ctx, &caps.bcaps))
+ return NULL;
+
+ caps.bmask = vc_get_insecurebcaps();
+ caps.cmask = caps.ccaps = 0;
+ if (vc_set_ccaps(ctx, &caps) == -1 && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_text2bcaps(PyObject *self, PyObject *args)
+{
+ struct vc_ctx_caps caps = { .bcaps = 0 };
+ const char *list;
+ int len;
+ struct vc_err_listparser err;
+
+ if (!PyArg_ParseTuple(args, "s#", &list, &len))
+ return NULL;
+
+ vc_list2bcap(list, len, &err, &caps);
+
+ return Py_BuildValue("K", caps.bcaps);
+}
+
+static PyObject *
+vserver_get_bcaps(PyObject *self, PyObject *args)
+{
+ xid_t ctx;
+ struct vc_ctx_caps caps;
+
+ if (!PyArg_ParseTuple(args, "I", &ctx))
+ return NULL;
+
+ if (vc_get_ccaps(ctx, &caps) == -1) {
+ if (errno != -ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+ else
+ caps.bcaps = 0;
+ }
+
+ return Py_BuildValue("K", caps.bcaps & vc_get_insecurebcaps());
+}
+
+static PyObject *
+vserver_bcaps2text(PyObject *self, PyObject *args)
+{
+ struct vc_ctx_caps caps = { .bcaps = 0 };
+ PyObject *list;
+ const char *cap;
+
+ if (!PyArg_ParseTuple(args, "K", &caps.bcaps))
+ return NULL;
+
+ list = PyString_FromString("");
+
+ while ((cap = vc_lobcap2text(&caps.bcaps)) != NULL) {
+ if (list == NULL)
+ break;
+ PyString_ConcatAndDel(&list, PyString_FromFormat(
+ (PyString_Size(list) > 0 ? ",CAP_%s" : "CAP_%s" ),
+ cap));
+ }
+
+ return list;
+}
+
+static inline int
+convert_address(const char *str, struct vc_net_addr *addr)
+{
+ void *dst;
+ if (inet_pton(AF_INET6, str, addr->vna_v6_ip.s6_addr) > 0) {
+ addr->vna_type = VC_NXA_TYPE_IPV6;
+ return 0;
+ }
+ else if (inet_pton(AF_INET, str, &addr->vna_v4_ip.s_addr) > 0) {
+ addr->vna_type = VC_NXA_TYPE_IPV4;
+ return 0;
+ }
+ return -1;
+}
+
+static int
+mask_to_prefix(void *data, int limit)
+{
+ uint8_t *mask = data;
+ int prefix;
+ for (prefix = 0; prefix < limit && mask[prefix >> 3] & (1 << (prefix & 0x07)); prefix++)
+ ;
+ return prefix;
+}
+
+static int
+get_mask(struct vc_net_addr *addr)
+{
+ struct ifaddrs *head, *ifa;
+ int ret = 0;
+ int family, offset, len;
+ void *ip;
+
+ switch (addr->vna_type) {
+ case VC_NXA_TYPE_IPV4:
+ family = AF_INET;
+ offset = offsetof(struct sockaddr_in, sin_addr.s_addr);
+ ip = &addr->vna_v4_ip.s_addr;
+ len = 4;
+ addr->vna_v4_mask.s_addr = htonl(0xffffff00);
+ addr->vna_prefix = 24;
+ break;
+ case VC_NXA_TYPE_IPV6:
+ family = AF_INET6;
+ offset = offsetof(struct sockaddr_in6, sin6_addr.s6_addr);
+ ip = addr->vna_v6_ip.s6_addr;
+ len = 16;
+ addr->vna_v6_mask.s6_addr32[9] = addr->vna_v6_mask.s6_addr32[1] = 0xffffffff;
+ addr->vna_v6_mask.s6_addr32[2] = addr->vna_v6_mask.s6_addr32[3] = 0x00000000;
+ addr->vna_prefix = 64;
+ break;
+ default:
+ errno = -EINVAL;
+ return -1;
+ }
+
+ if (getifaddrs(&head) == -1)
+ return -1;
+ for (ifa = head; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family == family &&
+ memcmp((char *) ifa->ifa_addr + offset, ip, len) == 0) {
+ switch (addr->vna_type) {
+ case VC_NXA_TYPE_IPV4:
+ memcpy(&addr->vna_v4_mask.s_addr, ifa->ifa_netmask + offset, len);
+ addr->vna_prefix = mask_to_prefix(&addr->vna_v4_mask.s_addr, 32);
+ break;
+ case VC_NXA_TYPE_IPV6:
+ memcpy(addr->vna_v6_mask.s6_addr, ifa->ifa_netmask + offset, len);
+ addr->vna_prefix = mask_to_prefix(addr->vna_v6_mask.s6_addr, 128);
+ break;
+ }
+ ret = 1;
+ break;
+ }
+ }
+ freeifaddrs(head);
+ return ret;
+}
+
+/* XXX These two functions are really similar */
+static PyObject *
+vserver_net_add(PyObject *self, PyObject *args)
+{
+ struct vc_net_addr addr;
+ nid_t nid;
+ const char *ip;
+
+ if (!PyArg_ParseTuple(args, "Is", &nid, &ip))
+ return NULL;
+
+ if (convert_address(ip, &addr) == -1)
+ return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip);
+
+ switch (get_mask(&addr)) {
+ case -1:
+ return PyErr_SetFromErrno(PyExc_OSError);
+ case 0:
+ /* XXX error here? */
+ break;
+ }
+ addr.vna_type |= VC_NXA_TYPE_ADDR;
+
+ if (vc_net_add(nid, &addr) == -1 && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+static PyObject *
+vserver_net_remove(PyObject *self, PyObject *args)
+{
+ struct vc_net_addr addr;
+ nid_t nid;
+ const char *ip;
+
+ if (!PyArg_ParseTuple(args, "Is", &nid, &ip))
+ return NULL;
+
+ if (strcmp(ip, "all") == 0)
+ addr.vna_type = VC_NXA_TYPE_ANY;
+ else if (strcmp(ip, "all4") == 0)
+ addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
+ else if (strcmp(ip, "all6") == 0)
+ addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
+ else {
+ if (convert_address(ip, &addr) == -1)
+ return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip);
+ addr.vna_type |= VC_NXA_TYPE_ADDR;
+ }
+
+ switch (get_mask(&addr)) {
+ case -1:
+ return PyErr_SetFromErrno(PyExc_OSError);
+ }
+
+ if (vc_net_remove(nid, &addr) == -1 && errno != ESRCH)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return NONE;
+}
+
+struct secure_dirs {
+ int host_fd;
+ int cwd_fd;
+ int guest_fd;
+ int target_fd;
+};
+
+static inline int
+fchroot(int fd)
+{
+ if (fchdir(fd) == -1 || chroot(".") == -1)
+ return -1;
+ return 0;
+}
+
+static inline int
+restore_dirs(struct secure_dirs *dirs)
+{
+ if (dirs->host_fd != -1) {
+ if (fchroot(dirs->host_fd) == -1)
+ return -1;
+ if (close(dirs->host_fd) == -1)
+ return -1;
+ }
+ if (dirs->guest_fd != -1) {
+ if (close(dirs->guest_fd) == -1)
+ return -1;
+ }
+ if (dirs->target_fd != -1) {
+ if (close(dirs->target_fd) == -1)
+ return -1;
+ }
+ if (dirs->cwd_fd != -1) {
+ if (fchdir(dirs->cwd_fd) == -1)
+ return -1;
+ if (close(dirs->cwd_fd) == -1)
+ return -1;
+ }
+ return 0;
+}
+
+static inline int
+secure_chdir(struct secure_dirs *dirs, const char *guest, const char *target)
+{
+ dirs->host_fd = dirs->cwd_fd = dirs->guest_fd = dirs->target_fd = -1;
+
+ dirs->host_fd = open("/", O_RDONLY|O_DIRECTORY);
+ if (dirs->host_fd == -1)
+ return -1;
+
+ dirs->cwd_fd = open(".", O_RDONLY|O_DIRECTORY);
+ if (dirs->cwd_fd == -1)
+ return -1;
+
+ dirs->guest_fd = open(guest, O_RDONLY|O_DIRECTORY);
+ if (dirs->guest_fd == -1)
+ return -1;
+ if (fchroot(dirs->guest_fd) == -1)
+ return -1;
+
+ dirs->target_fd = open(target, O_RDONLY|O_DIRECTORY);
+ if (dirs->target_fd == -1)
+ return -1;
+
+ if (fchroot(dirs->host_fd) == -1 || close(dirs->host_fd) == -1)
+ return -1;
+ dirs->host_fd = -1;
+ if (close(dirs->guest_fd) == -1)
+ return -1;
+ dirs->guest_fd = -1;
+
+ if (fchdir(dirs->target_fd) == -1 || close(dirs->target_fd) == -1)
+ return -1;
+
+ return 0;
+}
+
+static PyObject *
+vserver_mount(PyObject *self, PyObject *args)
+{
+ const char *guest, *target, *source, *type, *data = NULL;
+ unsigned long flags = 0;
+ struct secure_dirs dirs;
+
+ if (!PyArg_ParseTuple(args, "ssss|ks", &source, &guest, &target, &type,
+ &flags, &data))
+ return NULL;
+
+ if (secure_chdir(&dirs, guest, target) == -1)
+ goto out;
+ if (mount(source, ".", type, flags, data) == -1)
+ goto out;
+ restore_dirs(&dirs);
+
+ return NONE;
+
+out:
+ restore_dirs(&dirs);
+ return PyErr_SetFromErrno(PyExc_OSError);
+}
+
+static PyObject *
+vserver_umount(PyObject *self, PyObject *args)
+{
+ const char *guest, *target;
+ int flags = 0;
+ char *path;
+ PyObject *ret;
+
+ if (!PyArg_ParseTuple(args, "ss|i", &guest, &target, &flags))
+ return NULL;
+
+ path = calloc(strlen(guest) + strlen(target) + 2, sizeof(char));
+ sprintf(path, "%s/%s", guest, target);
+ if (umount2(path, flags) == -1)
+ ret = PyErr_SetFromErrno(PyExc_OSError);
+ else
+ ret = NONE;
+ free(path);
+
+ return ret;
+}
+
+static PyObject *
+vserver_set_runlevel(PyObject *self, PyObject *args)
+{
+ const char *file;
+ int runlevel;
+ struct utmp ut;
+
+ if (!PyArg_ParseTuple(args, "si", &file, &runlevel))
+ return NULL;
+
+ utmpname(file);
+ setutent();
+ memset(&ut, 0, sizeof(ut));
+ ut.ut_type = RUN_LVL;
+ ut.ut_pid = ('#' << 8) + runlevel + '0';
+ pututline(&ut);
+ endutent();
+
+ return NONE;
+}
+
+static PyMethodDef methods[] = {
+ { "chcontext", vserver_chcontext, METH_VARARGS,
+ "chcontext to vserver with provided flags" },
+ { "setup_done", vserver_setup_done, METH_VARARGS,
+ "Release vserver setup lock" },
+ { "setsched", vserver_setsched, METH_VARARGS,
+ "Change vserver scheduling attributes for given vserver context" },
+ { "setdlimit", vserver_set_dlimit, METH_VARARGS,
+ "Set disk limits for given vserver context" },
+ { "unsetdlimit", vserver_unset_dlimit, METH_VARARGS,
+ "Remove disk limits for given vserver context" },
+ { "getdlimit", vserver_get_dlimit, METH_VARARGS,
+ "Get disk limits for given vserver context" },
+ { "setrlimit", vserver_set_rlimit, METH_VARARGS,
+ "Set resource limits for given resource of a vserver context" },
+ { "getrlimit", vserver_get_rlimit, METH_VARARGS,
+ "Get resource limits for given resource of a vserver context" },
+ { "killall", vserver_killall, METH_VARARGS,
+ "Send signal to all processes in vserver context" },
+ { "isrunning", vserver_isrunning, METH_VARARGS,
+ "Check if vserver is running"},
+ { "setbcaps", vserver_set_bcaps, METH_VARARGS,
+ "Set POSIX capabilities of a vserver context" },
+ { "getbcaps", vserver_get_bcaps, METH_VARARGS,
+ "Get POSIX capabilities of a vserver context" },
+ { "text2bcaps", vserver_text2bcaps, METH_VARARGS,
+ "Translate a string of capabilities to a bitmap" },
+ { "bcaps2text", vserver_bcaps2text, METH_VARARGS,
+ "Translate a capability-bitmap into a string" },
+ { "netadd", vserver_net_add, METH_VARARGS,
+ "Assign an IP address to a context" },
+ { "netremove", vserver_net_remove, METH_VARARGS,
+ "Remove IP address(es) from a context" },
+ { "mount", vserver_mount, METH_VARARGS,
+ "Perform the mount() system call" },
+ { "umount", vserver_umount, METH_VARARGS,
+ "Perform the umount2() system call" },
+ { "setrunlevel", vserver_set_runlevel, METH_VARARGS,
+ "Set the runlevel in utmp" },
+ { NULL, NULL, 0, NULL }
+};
+
+PyMODINIT_FUNC
+initvserverimpl(void)
+{
+ PyObject *mod;
+
+ mod = Py_InitModule("vserverimpl", methods);
+
+ /* export the set of 'safe' capabilities */
+ PyModule_AddIntConstant(mod, "CAP_SAFE", ~vc_get_insecurebcaps());
+
+ /* export the default vserver directory */
+ PyModule_AddStringConstant(mod, "VSERVER_BASEDIR", DEFAULT_VSERVERDIR);
+
+ /* export limit-related constants */
+ PyModule_AddIntConstant(mod, "DLIMIT_KEEP", (int)VC_CDLIM_KEEP);
+ PyModule_AddIntConstant(mod, "DLIMIT_INF", (int)VC_CDLIM_INFINITY);
+ PyModule_AddIntConstant(mod, "VC_LIM_KEEP", (int)VC_LIM_KEEP);
+
+ PyModule_AddIntConstant(mod, "RLIMIT_CPU", (int)RLIMIT_CPU);
+ PyModule_AddIntConstant(mod, "RLIMIT_RSS", (int)RLIMIT_RSS);
+ PyModule_AddIntConstant(mod, "RLIMIT_NPROC", (int)RLIMIT_NPROC);
+ PyModule_AddIntConstant(mod, "RLIMIT_NOFILE", (int)RLIMIT_NOFILE);
+ PyModule_AddIntConstant(mod, "RLIMIT_MEMLOCK", (int)RLIMIT_MEMLOCK);
+ PyModule_AddIntConstant(mod, "RLIMIT_AS", (int)RLIMIT_AS);
+ PyModule_AddIntConstant(mod, "RLIMIT_LOCKS", (int)RLIMIT_LOCKS);
+
+ PyModule_AddIntConstant(mod, "RLIMIT_SIGPENDING", (int)RLIMIT_SIGPENDING);
+ PyModule_AddIntConstant(mod, "RLIMIT_MSGQUEUE", (int)RLIMIT_MSGQUEUE);
+
+ PyModule_AddIntConstant(mod, "VLIMIT_NSOCK", (int)VC_VLIMIT_NSOCK);
+ PyModule_AddIntConstant(mod, "VLIMIT_OPENFD", (int)VC_VLIMIT_OPENFD);
+ PyModule_AddIntConstant(mod, "VLIMIT_ANON", (int)VC_VLIMIT_ANON);
+ PyModule_AddIntConstant(mod, "VLIMIT_SHMEM", (int)VC_VLIMIT_SHMEM);
+
+ /* scheduler flags */
+ PyModule_AddIntConstant(mod,
+ "VS_SCHED_CPU_GUARANTEED",
+ VS_SCHED_CPU_GUARANTEED);
+}
--- /dev/null
+#!/bin/bash
+#
+# vcached: VServer cache allocator
+#
+# Description: A script that preallocates vservers and stores them in
+# a cache. Preallocated vservers from the cache may be then used to
+# instantiate real vservers. Requires that /var/run/vcached.pid does
+# not exist on startup. Should run periodically as a cron job.
+#
+# Based on work by:
+#
+# Brent Chun - bnc@intel-research.net
+# Tristan Koo - tristan.koo@intel-research.net
+# William Wung - wungism@uclink.berkeley.edu
+#
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (c) 2004-2005 The Trustees of Princeton University
+#
+# $Id: vcached,v 1.14 2007/07/05 19:05:14 dhozac Exp $
+#
+
+PATH=/sbin:/usr/sbin:$PATH
+
+# number of images to keep cached
+slots=32
+
+# PID file
+pidfile=/var/run/vcached.pid
+
+# log file
+logfile=/var/log/vcached.log
+
+# debug
+debug=0
+
+usage()
+{
+ echo "usage: vcached [OPTION...]"
+ echo " -s [slots] number of images to keep cached"
+ echo " -p [pidfile] PID file"
+ echo " -l [logfile] log file"
+ echo " -d debug"
+ exit 1
+}
+
+# parse options
+while getopts 's:p:l:dh' OPT ; do
+ case "$OPT" in
+ s) slots=$OPTARG ;;
+ p) pidfile=$OPTARG ;;
+ l) logfile=$OPTARG ;;
+ d) debug=1 ;;
+ h|*) usage ;;
+ esac
+done
+
+# append output to log file
+exec 1>>$logfile
+exec 2>>$logfile
+
+# check if we are already running
+if [ -f $pidfile ] && kill -0 `cat $pidfile` >/dev/null 2>&1 ; then
+ echo "vcached(`cat $pidfile`) already running"
+ exit 1
+fi
+echo $$ > $pidfile
+
+# clean up lock file before exiting
+trap "rm -f $pidfile" EXIT
+
+: ${UTIL_VSERVER_VARS:=/usr/lib/util-vserver/util-vserver-vars}
+test -e "$UTIL_VSERVER_VARS" || {
+ echo "Can not find util-vserver installation; aborting..."
+ exit 1
+}
+. "$UTIL_VSERVER_VARS"
+
+# make sure barrier bit is set on /vservers to prevent chroot() escapes
+setattr --barrier $__DEFAULT_VSERVERDIR
+
+# take out the trash
+#rm -rf "$__DEFAULT_VSERVERDIR/.vtmp"
+
+mkdir -p "$__DEFAULT_VSERVERDIR/.vcache"
+mkdir -p "$__DEFAULT_VSERVERDIR/.vtmp"
+
+[ $debug -ne 0 ] && echo "$(date) Checking the cache"
+for i in $(seq 0 $(($slots - 1))) ; do
+ if [ ! -d "$__DEFAULT_VSERVERDIR/.vcache/v$i" ] ; then
+ echo "$(date) Caching v$i"
+ # build image in .vtmp
+ TMP=$(mktemp -d "$__DEFAULT_VSERVERDIR/.vtmp/v$i.XXXXXX")
+ "$_VCLONE" "$__DEFAULT_VSERVERDIR/.vref/default/" "$TMP"/
+ RETVAL=$?
+ # move it to .vcache when complete
+ if [ $RETVAL -eq 0 ] ; then
+ mv "$TMP" "$__DEFAULT_VSERVERDIR/.vcache/v$i"
+ echo "$(date) v$i ready"
+ else
+ echo "$(date) Error $RETVAL building v$i"
+ rm -rf "$TMP"
+ fi
+ fi
+done
+
+exit 0
--- /dev/null
+#
+# vcached: VServer cache allocator
+#
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (c) 2004-2005 The Trustees of Princeton University
+#
+# $Id: vcached.cron,v 1.2 2005/09/01 18:52:53 mlhuang Exp $
+#
+
+*/15 * * * * root /usr/sbin/vcached
--- /dev/null
+/var/log/vcached.log {
+ compress
+ daily
+ notifempty
+ rotate 5
+ missingok
+ postrotate
+ kill -HUP `cat /var/run/vcached.pid 2>/dev/null` 2>/dev/null || true
+ endscript
+}
--- /dev/null
+#!/bin/bash
+#
+# useradd(8) wrapper for vservers
+#
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (C) 2004-2006 The Trustees of Princeton University
+#
+# $Id: vuseradd,v 1.28 2007/07/05 19:05:14 dhozac Exp $
+#
+
+: ${UTIL_VSERVER_VARS:=/usr/lib/util-vserver/util-vserver-vars}
+test -e "$UTIL_VSERVER_VARS" || {
+ echo "Can not find util-vserver installation; aborting..."
+ exit 1
+}
+. "$UTIL_VSERVER_VARS"
+
+shopt -s nullglob
+
+# Defaults
+TYPE="default"
+
+usage()
+{
+ TYPES=
+ pushd "$__DEFAULT_VSERVERDIR/.vref" >/dev/null
+ for ref in * ; do
+ if [ -z "$TYPES" ] ; then
+ TYPES=$ref
+ else
+ TYPES="$TYPES, $ref"
+ fi
+ done
+ popd >/dev/null
+
+ echo "Usage: vuseradd [OPTION]... [NAME]"
+ echo " -t Reference image type ($TYPES)"
+ exit 1
+}
+
+# Get options
+while getopts "t:" opt ; do
+ case $opt in
+ t)
+ TYPE="$OPTARG"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+shift $(($OPTIND - 1))
+
+# Get slice name
+[ -z "$1" ] && usage
+NAME=$1
+
+# Add slices group to /etc/group if not already present
+groupadd slices 2>/dev/null || :
+
+# Add slice name to /etc/passwd
+useradd -g slices -s /bin/vsh $NAME -p '*'
+
+USERID=`id -u $NAME`
+GROUPID=`id -g $NAME`
+GROUPNAME=`id -gn $NAME`
+
+# Create /etc/vservers configuration files
+if [ ! -d $__CONFDIR/$NAME ] ; then
+ # Move away the guest contents for now
+ if [ -d $__DEFAULT_VSERVERDIR/$NAME ] ; then
+ mkdir -p "$__DEFAULT_VSERVERDIR/.vtmp"
+ TMP=$(mktemp -d "$__DEFAULT_VSERVERDIR/.vtmp/$NAME.XXXXXX")
+ mv $__DEFAULT_VSERVERDIR/$NAME "$TMP"
+ HAS_VSERVERDIR=1
+ else
+ HAS_VSERVERDIR=0
+ fi
+
+ $_VSERVER $NAME build -m skeleton --context $USERID \
+ --interface nodev:0.0.0.0/0 \
+ --flags persistent,~info_init,sched_hard
+ RETVAL=$?
+ DIR=$__CONFDIR/$NAME
+ if [ $RETVAL -ne 0 ] ; then
+ echo "Error $RETVAL building $DIR"
+ rm -rf $DIR $__DEFAULT_VSERVERDIR/$NAME
+ fi
+ mkdir -p $DIR/apps/init $DIR/rlimits $DIR/sched $DIR/dlimits/0
+ echo default > $DIR/apps/init/mark
+ echo 1000 > $DIR/rlimits/nproc
+
+ # Set persistent for the network context
+ echo persistent > $DIR/nflags
+
+ # Set up the scheduler
+ echo 1000 > $DIR/sched/interval
+ echo 1000 > $DIR/sched/interval2
+ echo 0 > $DIR/sched/fill-rate
+ echo 32 > $DIR/sched/fill-rate2
+ touch $DIR/sched/idle-time
+ echo 100 > $DIR/sched/tokens
+ echo 50 > $DIR/sched/tokens-min
+ echo 100 > $DIR/sched/tokens-max
+
+ # Set up disk limits (unlimited)
+ echo `$_READLINK $DIR/vdir` > $DIR/dlimits/0/directory
+ echo 2 > $DIR/dlimits/0/reserved
+ echo -1 > $DIR/dlimits/0/inodes_total
+ echo -1 > $DIR/dlimits/0/space_total
+
+ # Remove the basically empty guest directory
+ rm -rf $__DEFAULT_VSERVERDIR/$NAME
+ # Move the guest back
+ if [ "$HAS_VSERVERDIR" = 1 ] ; then
+ mv "$TMP/$NAME" $__DEFAULT_VSERVERDIR/$NAME
+ rm -rf "$TMP"
+ fi
+fi
+
+if [ ! -d "$__DEFAULT_VSERVERDIR/$NAME" ] ; then
+ # Check the cache
+ if [ "$TYPE" = "default" ] ; then
+ for i in "$__DEFAULT_VSERVERDIR/.vcache/"* ; do
+ [ -d "$i" ] && mv "$i" "$__DEFAULT_VSERVERDIR/$NAME" && break
+ done
+ fi
+
+ # Build slice from reference image
+ if [ ! -d "$__DEFAULT_VSERVERDIR/$NAME" ] ; then
+ REF="$__DEFAULT_VSERVERDIR/.vref/$TYPE"
+
+ # Build in temporary directory
+ mkdir -p "$__DEFAULT_VSERVERDIR/.vtmp"
+ TMP=$(mktemp -d "$__DEFAULT_VSERVERDIR/.vtmp/$NAME.XXXXXX")
+ "$_VCLONE" "$REF"/ "$TMP"/
+ RETVAL=$?
+
+ # Move it to its permanent location when complete
+ if [ $RETVAL -eq 0 ] ; then
+ mv "$TMP" "$__DEFAULT_VSERVERDIR/$NAME"
+ else
+ echo "Error $RETVAL building $__DEFAULT_VSERVERDIR/$NAME"
+ rm -rf "$TMP" $__CONFDIR/$NAME $__PKGSTATEDIR/$NAME.ctx
+ userdel -r $NAME
+ exit $RETVAL
+ fi
+ fi
+fi
+
+if [ -d "$__DEFAULT_VSERVERDIR/$NAME" ] ; then
+ # Fix permissions
+ chmod 755 "$__DEFAULT_VSERVERDIR/$NAME"
+
+ # Add user in vserver
+ $_VSERVER ----insecure $NAME suexec root sh -c \
+ "groupadd -g $GROUPID $GROUPNAME ; useradd -u $USERID -g $GROUPID -p '' $NAME"
+
+ # Add an unrestricted entry to /etc/sudoers file
+ if [ -f "$__DEFAULT_VSERVERDIR/$NAME/etc/sudoers" ] && \
+ ! grep -q "^$NAME" "$__DEFAULT_VSERVERDIR/$NAME/etc/sudoers" ; then
+ echo "$NAME ALL=(ALL) ALL" >> "$__DEFAULT_VSERVERDIR/$NAME/etc/sudoers"
+ fi
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# userdel(8) wrapper for vservers
+#
+# Copyright (c) 2004 The Trustees of Princeton University (Trustees).
+#
+# $Id: vuserdel,v 1.11 2007/06/29 14:13:01 dhozac Exp $
+#
+
+: ${UTIL_VSERVER_VARS:=/usr/lib/util-vserver/util-vserver-vars}
+test -e "$UTIL_VSERVER_VARS" || {
+ echo "Can not find util-vserver installation; aborting..."
+ exit 1
+}
+. "$UTIL_VSERVER_VARS"
+
+usage()
+{
+ echo "usage: $0 name"
+ exit 1
+}
+
+[ -z "$1" ] && usage
+[ "$1" == "--static" ] && { STATIC=yes; shift; }
+NAME=$1
+
+# read config file to get context ID
+CTX=`cat $__CONFDIR/$NAME/context`
+
+# don't bother stopping gracefully, just kill all the processes
+chcontext --silent --secure --ctx $CTX $__LEGACYDIR/vserverkillall
+
+# unmount any directories in vserver that are mount points
+for d in `sed -ne "s%^[^ ]* \($__DEFAULT_VSERVERDIR/$NAME/[^ ]*\) .*%\1%p" /proc/mounts`
+do
+ # use echo -e to turn escaped whitespace back into regular chars
+ # be careful about embedded backquotes here (i think we're safe)
+ dir=`echo -e "$d"`
+ echo "unmounting $dir"
+ umount -l "$dir"
+done
+
+# delete user
+[ -z "$STATIC" ] && userdel -r $NAME
+
+# remove vserver configuration directory
+rm -rf /etc/vservers/$NAME
+
+# remove vserver profile
+rm -f /var/run/vservers/$NAME.ctx
+
+# destroy vserver
+if [ -d $__DEFAULT_VSERVERDIR/$NAME ] ; then
+ TMP=$(mktemp -d "$__DEFAULT_VSERVERDIR/.vtmp/$NAME.XXXXXX")
+ mv "$__DEFAULT_VSERVERDIR/$NAME" "$TMP"
+ rm -rf "$TMP"
+fi
--- /dev/null
+/* Copyright 2005 Princeton University
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
+UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+
+#include "vserver.h"
+#include "planetlab.h"
+
+static int
+create_context(xid_t ctx, uint64_t bcaps)
+{
+ struct vc_ctx_caps vc_caps;
+ struct vc_net_flags vc_nf;
+
+ /* Create network context */
+ if (vc_net_create(ctx) == VC_NOCTX) {
+ if (errno == EEXIST)
+ goto process;
+ return -1;
+ }
+
+ /* Make the network context persistent */
+ vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
+ if (vc_set_nflags(ctx, &vc_nf))
+ return -1;
+
+process:
+ /*
+ * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
+ */
+ if (vc_ctx_create(ctx, 0) == VC_NOCTX)
+ return -1;
+
+ /* Set capabilities - these don't take effect until SETUP flag is unset */
+ vc_caps.bcaps = bcaps;
+ vc_caps.bmask = ~0ULL; /* currently unused */
+ vc_caps.ccaps = 0; /* don't want any of these */
+ vc_caps.cmask = ~0ULL;
+ if (vc_set_ccaps(ctx, &vc_caps))
+ return -1;
+
+ if (pl_setsched(ctx, 1, 0) < 0) {
+ PERROR("pl_setsched(%u)", ctx);
+ exit(1);
+ }
+
+ return 0;
+}
+
+int
+pl_setup_done(xid_t ctx)
+{
+ struct vc_ctx_flags vc_flags;
+
+ /* unset SETUP flag - this allows other processes to migrate */
+ /* set the PERSISTENT flag - so the context doesn't vanish */
+ /* Don't clear the STATE_INIT flag, as that would make us the init task. */
+ vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
+ vc_flags.flagword = VC_VXF_PERSISTENT;
+ if (vc_set_cflags(ctx, &vc_flags))
+ return -1;
+
+ return 0;
+}
+
+#define RETRY_LIMIT 10
+
+int
+pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
+{
+ int retry_count = 0;
+ int net_migrated = 0;
+
+ pl_set_ulimits(slr);
+
+ for (;;)
+ {
+ struct vc_ctx_flags vc_flags;
+
+ if (vc_get_cflags(ctx, &vc_flags))
+ {
+ if (errno != ESRCH)
+ return -1;
+
+ /* context doesn't exist - create it */
+ if (create_context(ctx, bcaps))
+ {
+ if (errno == EEXIST)
+ /* another process beat us in a race */
+ goto migrate;
+ if (errno == EBUSY)
+ /* another process is creating - poll the SETUP flag */
+ continue;
+ return -1;
+ }
+
+ /* created context and migrated to it i.e., we're done */
+ return 1;
+ }
+
+ /* check the SETUP flag */
+ if (vc_flags.flagword & VC_VXF_STATE_SETUP)
+ {
+ /* context is still being setup - wait a while then retry */
+ if (retry_count++ >= RETRY_LIMIT)
+ {
+ errno = EBUSY;
+ return -1;
+ }
+ sleep(1);
+ continue;
+ }
+
+ /* context has been setup */
+ migrate:
+ if (net_migrated || !vc_net_migrate(ctx))
+ {
+ if (!vc_ctx_migrate(ctx, 0))
+ break; /* done */
+ net_migrated = 1;
+ }
+
+ /* context disappeared - retry */
+ }
+
+ return 0;
+}
+
+/* it's okay for a syscall to fail because the context doesn't exist */
+#define VC_SYSCALL(x) \
+do \
+{ \
+ if (x) \
+ return errno == ESRCH ? 0 : -1; \
+} \
+while (0)
+
+int
+pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
+{
+ struct vc_set_sched vc_sched;
+ struct vc_ctx_flags vc_flags;
+ uint32_t new_flags;
+
+ vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
+ VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
+ VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
+ VC_VXSM_IDLE_TIME);
+ vc_sched.fill_rate = 0;
+ vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
+ vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
+ vc_sched.tokens = 100; /* initial allocation of tokens */
+ vc_sched.tokens_min = 50; /* need this many tokens to run */
+ vc_sched.tokens_max = 100; /* max accumulated number of tokens */
+
+ if (cpu_share == (uint32_t)VC_LIM_KEEP)
+ vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
+
+ /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
+ if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
+ new_flags = 0;
+ vc_sched.fill_rate = vc_sched.fill_rate2;
+ }
+ else
+ new_flags = VC_VXF_SCHED_SHARE;
+
+ VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
+
+ vc_flags.mask = VC_VXF_SCHED_FLAGS;
+ vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
+ VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
+
+ return 0;
+}
+
+struct pl_resources {
+ char *name;
+ unsigned long long *limit;
+};
+
+#define WHITESPACE(buffer,index,len) \
+ while(isspace((int)buffer[index])) \
+ if (index < len) index++; else goto out;
+
+#define VSERVERCONF "/etc/vservers/"
+void
+pl_get_limits(const char *context, struct sliver_resources *slr)
+{
+ FILE *fb;
+ int cwd;
+ size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
+ char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
+ struct pl_resources *r;
+ struct pl_resources sliver_list[] = {
+ {"sched/fill-rate2", &slr->vs_cpu},
+
+ {"rlimits/nproc.hard", &slr->vs_nproc.hard},
+ {"rlimits/nproc.soft", &slr->vs_nproc.soft},
+ {"rlimits/nproc.min", &slr->vs_nproc.min},
+
+ {"rlimits/rss.hard", &slr->vs_rss.hard},
+ {"rlimits/rss.soft", &slr->vs_rss.soft},
+ {"rlimits/rss.min", &slr->vs_rss.min},
+
+ {"rlimits/as.hard", &slr->vs_as.hard},
+ {"rlimits/as.soft", &slr->vs_as.soft},
+ {"rlimits/as.min", &slr->vs_as.min},
+
+ {"rlimits/openfd.hard", &slr->vs_openfd.hard},
+ {"rlimits/openfd.soft", &slr->vs_openfd.soft},
+ {"rlimits/openfd.min", &slr->vs_openfd.min},
+
+ {0,0}
+ };
+
+ sprintf(conf, "%s%s", VSERVERCONF, context);
+
+ slr->vs_rss.hard = VC_LIM_KEEP;
+ slr->vs_rss.soft = VC_LIM_KEEP;
+ slr->vs_rss.min = VC_LIM_KEEP;
+
+ slr->vs_as.hard = VC_LIM_KEEP;
+ slr->vs_as.soft = VC_LIM_KEEP;
+ slr->vs_as.min = VC_LIM_KEEP;
+
+ slr->vs_nproc.hard = VC_LIM_KEEP;
+ slr->vs_nproc.soft = VC_LIM_KEEP;
+ slr->vs_nproc.min = VC_LIM_KEEP;
+
+ slr->vs_openfd.hard = VC_LIM_KEEP;
+ slr->vs_openfd.soft = VC_LIM_KEEP;
+ slr->vs_openfd.min = VC_LIM_KEEP;
+
+ cwd = open(".", O_RDONLY);
+ if (cwd == -1) {
+ perror("cannot get a handle on .");
+ goto out;
+ }
+ if (chdir(conf) == -1) {
+ fprintf(stderr, "cannot chdir to ");
+ perror(conf);
+ goto out_fd;
+ }
+
+ for (r = &sliver_list[0]; r->name; r++) {
+ char buf[1000];
+ fb = fopen(r->name, "r");
+ if (fb == NULL)
+ continue;
+ if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf))
+ *r->limit = atoi(buf);
+ fclose(fb);
+ }
+
+ fchdir(cwd);
+out_fd:
+ close(cwd);
+out:
+ free(conf);
+}
+
+int
+adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim)
+{
+ int adjusted = 0;
+ if (vcr->min != VC_LIM_KEEP) {
+ if (vcr->min > lim->rlim_cur) {
+ lim->rlim_cur = vcr->min;
+ adjusted = 1;
+ }
+ if (vcr->min > lim->rlim_max) {
+ lim->rlim_max = vcr->min;
+ adjusted = 1;
+ }
+ }
+
+ if (vcr->soft != VC_LIM_KEEP) {
+ switch (vcr->min != VC_LIM_KEEP) {
+ case 1:
+ if (vcr->soft < vcr->min)
+ break;
+ case 0:
+ lim->rlim_cur = vcr->soft;
+ adjusted = 1;
+ }
+ }
+
+ if (vcr->hard != VC_LIM_KEEP) {
+ switch (vcr->min != VC_LIM_KEEP) {
+ case 1:
+ if (vcr->hard < vcr->min)
+ break;
+ case 0:
+ lim->rlim_cur = vcr->hard;
+ adjusted = 1;
+ }
+ }
+ return adjusted;
+}
+
+static inline void
+set_one_ulimit(int resource, const struct vc_rlimit *limit)
+{
+ struct rlimit lim;
+ getrlimit(resource, &lim);
+ adjust_lim(limit, &lim);
+ setrlimit(resource, &lim);
+}
+
+void
+pl_set_ulimits(const struct sliver_resources *slr)
+{
+ if (!slr)
+ return;
+
+ set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
+ set_one_ulimit(RLIMIT_AS, &slr->vs_as);
+ set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
+ set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
+}
--- /dev/null
+/* Copyright 2005 Princeton University
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
+UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LIB_PLANETLAB_H_
+#define _LIB_PLANETLAB_H_
+
+#ifndef VC_VXF_SCHED_SHARE
+# define VC_VXF_SCHED_SHARE 0x00000800ull
+#endif
+
+#define VC_VXF_SCHED_FLAGS (VC_VXF_SCHED_HARD | VC_VXF_SCHED_SHARE)
+
+struct sliver_resources {
+ unsigned long long vs_cpu;
+ struct vc_rlimit vs_rss;
+ struct vc_rlimit vs_as;
+ struct vc_rlimit vs_nproc;
+ struct vc_rlimit vs_openfd;
+};
+
+int adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim);
+
+int
+pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr);
+
+int
+pl_setup_done(xid_t ctx);
+
+int
+pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags);
+
+/* scheduler flags */
+#define VS_SCHED_CPU_GUARANTEED 1
+
+/* Null byte made explicit */
+#define NULLBYTE_SIZE 1
+
+void pl_get_limits(const char *, struct sliver_resources *);
+void pl_set_ulimits(const struct sliver_resources *);
+
+static inline int
+_PERROR(const char *format, char *file, int line, int _errno, ...)
+{
+ va_list ap;
+
+ va_start(ap, _errno);
+ fprintf(stderr, "%s:%d: ", file, line);
+ vfprintf(stderr, format, ap);
+ if (_errno)
+ fprintf(stderr, ": %s (%d)", strerror(_errno), _errno);
+ fputs("\n", stderr);
+ fflush(stderr);
+
+ return _errno;
+}
+
+#define PERROR(format, args...) _PERROR(format, __FILE__, __LINE__, errno, ## args)
+#endif
--- /dev/null
+/*
+ * $Id$
+ * Copyright (c) 2007 The Trustees of Princeton University
+ * Author: Daniel Hokka Zakrisson <daniel@hozac.com>
+ *
+ * Licensed under the terms of the GNU General Public License
+ * version 2 or later.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <syslog.h>
+
+#include <asm/types.h>
+#include <netlink/netlink.h>
+#include <netlink/route/addr.h>
+
+#include <vserver.h>
+
+#define HAS_ADDRESS 0x01
+#define HAS_PREFIX 0x02
+
+struct nid_list {
+ nid_t nid;
+ struct nid_list *next;
+};
+struct prefix {
+ uint32_t mask;
+ int ifindex;
+ struct {
+ struct in6_addr addr;
+ int prefix_len;
+ time_t valid_until;
+ } prefix;
+ struct {
+ struct in6_addr addr;
+ int prefix_len;
+ time_t valid_until;
+ } address;
+};
+struct nid_prefix_map {
+ struct {
+ struct nid_prefix_map *prev;
+ struct nid_prefix_map *next;
+ } n;
+ struct {
+ struct nid_prefix_map *prev;
+ struct nid_prefix_map *next;
+ } p;
+ struct prefix *prefix;
+ nid_t nid;
+};
+
+struct nl_handle *handle;
+
+/* from linux/include/net/ipv6.h */
+static inline int ipv6_prefix_equal(struct in6_addr *prefix,
+ struct in6_addr *addr, int prefixlen)
+{
+ uint32_t *a1 = prefix->s6_addr32, *a2 = addr->s6_addr32;
+ unsigned pdw, pbi;
+
+ /* check complete u32 in prefix */
+ pdw = prefixlen >> 5;
+ if (pdw && memcmp(a1, a2, pdw << 2))
+ return 0;
+
+ /* check incomplete u32 in prefix */
+ pbi = prefixlen & 0x1f;
+ if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi))))
+ return 0;
+
+ return 1;
+}
+
+static int add_address_to_interface(int ifindex, struct in6_addr *address,
+ int prefix)
+{
+ int err = -1;
+ struct rtnl_addr *rta;
+ struct nl_addr *nl;
+
+ nl = nl_addr_build(AF_INET6, address, sizeof(struct in6_addr));
+ rta = rtnl_addr_alloc();
+
+ rtnl_addr_set_family(rta, AF_INET6);
+ rtnl_addr_set_ifindex(rta, ifindex);
+ rtnl_addr_set_local(rta, nl);
+ rtnl_addr_set_prefixlen(rta, prefix);
+
+ if (rtnl_addr_add(handle, rta, NLM_F_REPLACE) != -1 || errno == EEXIST)
+ err = 0;
+
+ rtnl_addr_free(rta);
+ nl_addr_destroy(nl);
+ return err;
+}
+
+static inline int remove_address_from_interface(struct nid_prefix_map *entry)
+{
+ struct rtnl_addr *rta;
+ struct nl_addr *nl;
+ struct in6_addr a;
+ int ret;
+
+ memcpy(&a, &entry->prefix->address.addr, sizeof(a));
+ if (entry->nid != 0) {
+ a.s6_addr[11] = (entry->nid & 0x7f80) >> 7;
+ a.s6_addr[12] = (entry->nid & 0x7f) << 1;
+ }
+
+ nl = nl_addr_build(AF_INET6, &a, sizeof(a));
+ if (!nl)
+ return -1;
+ rta = rtnl_addr_alloc();
+ if (!rta)
+ return -1;
+
+ rtnl_addr_set_family(rta, AF_INET6);
+ rtnl_addr_set_ifindex(rta, entry->prefix->ifindex);
+ rtnl_addr_set_local(rta, nl);
+ rtnl_addr_set_prefixlen(rta, entry->prefix->address.prefix_len);
+
+ ret = rtnl_addr_delete(handle, rta, 0);
+
+ rtnl_addr_free(rta);
+ nl_addr_destroy(nl);
+
+ return ret;
+}
+
+static int add_to_map(struct nid_prefix_map *map, struct nid_prefix_map *new)
+{
+ struct nid_prefix_map *i;
+#define PUT_IT_IN_PLACE(node, member, om) \
+ /* find the correct location in the list */ \
+ for (i = map->node.next; i->node.next && i->member < \
+ new->member; i = i->node.next) \
+ ; \
+ if (i && i->member == new->member && i->om == new->om) \
+ return 0; \
+ /* first in the list */ \
+ if (!i || !i->node.prev) { \
+ new->node.prev = NULL; \
+ new->node.next = i; \
+ map->node.next = new; \
+ if (i) \
+ i->node.prev = new; \
+ } \
+ /* last in the list */ \
+ else if (i->node.next == NULL) { \
+ new->node.prev = i; \
+ new->node.next = NULL; \
+ i->node.next = new; \
+ } \
+ /* somewhere in the middle */ \
+ else { \
+ new->node.prev = i->node.prev; \
+ new->node.next = i; \
+ i->node.prev->node.next = new; \
+ i->node.prev = new; \
+ }
+ PUT_IT_IN_PLACE(p, prefix, nid)
+ PUT_IT_IN_PLACE(n, nid, prefix)
+ return 1;
+}
+
+static inline void remove_from_map(struct nid_prefix_map *map,
+ struct nid_prefix_map *entry)
+{
+ if (map->n.next == entry)
+ map->n.next = entry->n.next;
+ if (map->n.prev == entry)
+ map->n.prev = entry->n.prev;
+ if (map->p.next == entry)
+ map->p.next = entry->p.next;
+ if (map->p.prev == entry)
+ map->p.prev = entry->p.prev;
+}
+
+static inline void remove_from_map_and_free(struct nid_prefix_map *map,
+ struct nid_prefix_map *entry)
+{
+ remove_from_map(map, entry);
+ free(entry);
+}
+
+static int add_nid_to_map(struct nid_prefix_map *map, struct prefix *prefix,
+ nid_t nid)
+{
+ struct nid_prefix_map *new = calloc(1, sizeof(struct nid_prefix_map));
+ int ret;
+
+ if (!new)
+ return -1;
+
+ new->prefix = prefix;
+ new->nid = nid;
+ ret = add_to_map(map, new);
+
+ if (ret == 0)
+ free(new);
+
+ return ret;
+}
+
+static int add_prefix_to_map(struct nid_prefix_map *map, struct prefix *prefix)
+{
+ return add_nid_to_map(map, prefix, 0);
+}
+
+static void cleanup_prefix(struct nid_prefix_map *map,
+ struct nid_prefix_map *first)
+{
+ struct nid_prefix_map *i, *p = NULL;
+
+ for (i = first; i && first->prefix == i->prefix; i = i->p.next) {
+ if (p)
+ remove_from_map_and_free(map, p);
+
+ /* ignore errors */
+ remove_address_from_interface(i);
+
+ p = i;
+ }
+ if (p)
+ remove_from_map_and_free(map, p);
+}
+
+static inline int add_nid_to_list(struct nid_list **head, nid_t nid)
+{
+ struct nid_list *i, *new;
+
+ for (i = *head; i && i->next && i->next->nid < nid; i = i->next)
+ ;
+ /* check if this nid is first in the list */
+ if (i && i->nid == nid)
+ return 0;
+ /* check if it's already in the list */
+ if (i && i->next && i->next->nid == nid)
+ return 0;
+
+ /* add it */
+ new = calloc(1, sizeof(struct nid_list));
+ if (!new)
+ return -1;
+ new->nid = nid;
+
+ /* this is the lowest nid in the list */
+ if (i == *head) {
+ *head = new;
+ new->next = i;
+ }
+ /* in the middle/at the end */
+ else if (i) {
+ new->next = i->next;
+ i->next = new;
+ }
+ /* there was no list */
+ else
+ *head = new;
+
+ return 1;
+}
+
+static inline void free_nid_list(struct nid_list *head)
+{
+ struct nid_list *p;
+ for (p = NULL; head; head = head->next) {
+ if (p)
+ free(p);
+ p = head;
+ }
+ if (p)
+ free(p);
+}
+
+static inline void cleanup_nid(struct nid_prefix_map *map,
+ nid_t nid)
+{
+ struct nid_prefix_map *i, *p = NULL;
+ for (i = map->n.next; i->nid < nid; i = i->n.next)
+ ;
+ /* this nid doesn't have any entries in the map */
+ if (i->nid != nid)
+ return;
+ for (; i->nid == nid; i = i->n.next) {
+ if (p)
+ remove_from_map_and_free(map, p);
+ remove_address_from_interface(i);
+ p = i;
+ }
+ if (p)
+ remove_from_map_and_free(map, p);
+}
+
+static inline void cleanup_nids(struct nid_prefix_map *map,
+ struct nid_list *previous,
+ struct nid_list *current)
+{
+ struct nid_list *p, *pprev = NULL, *c;
+ for (p = previous, c = current; p; pprev = p, p = p->next) {
+ if (pprev)
+ free(pprev);
+ while (c->nid < p->nid)
+ c = c->next;
+ if (c->nid == p->nid)
+ continue;
+ /* this context has disappeared */
+ cleanup_nid(map, p->nid);
+ }
+ if (pprev)
+ free(pprev);
+}
+
+static void do_slices_autoconf(struct nid_prefix_map *map)
+{
+ DIR *dp;
+ struct dirent *de;
+ struct vc_net_addr addr;
+ struct nid_prefix_map *i;
+ struct nid_list *current = NULL, *n;
+ static struct nid_list *previous = NULL;
+
+ if ((dp = opendir("/proc/virtnet")) == NULL)
+ return;
+ while ((de = readdir(dp)) != NULL) {
+ nid_t nid;
+
+ if (!isdigit(de->d_name[0]))
+ continue;
+
+ nid = strtoul(de->d_name, NULL, 10);
+ addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
+ if (vc_net_remove(nid, &addr) == -1) {
+ syslog(LOG_ERR, "vc_net_remove(%u): %s", nid, strerror(errno));
+ continue;
+ }
+
+ add_nid_to_list(¤t, nid);
+ }
+ closedir(dp);
+
+ for (n = current; n; n = n->next) {
+ for (i = map->p.next; i && i->nid == 0;) {
+ /* expired */
+ if (i->prefix->mask & HAS_PREFIX && i->prefix->prefix.valid_until < time(NULL)) {
+ struct nid_prefix_map *tmp;
+ char buf[64];
+
+ inet_ntop(AF_INET6, &i->prefix->address.addr, buf, sizeof(buf));
+ syslog(LOG_NOTICE, "Address %s timed out", buf);
+
+ tmp = i->p.next;
+
+ cleanup_prefix(map, i);
+
+ i = tmp;
+ continue;
+ }
+ if (i->prefix->mask != (HAS_ADDRESS|HAS_PREFIX))
+ goto next;
+
+ addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ADDR;
+ memcpy(&addr.vna_v6_ip, &i->prefix->address.addr, sizeof(struct in6_addr));
+ addr.vna_prefix = i->prefix->prefix.prefix_len;
+ if (addr.vna_prefix == 64) {
+ addr.vna_v6_mask.s6_addr32[0] = addr.vna_v6_mask.s6_addr32[1] = 0xffffffff;
+ addr.vna_v6_mask.s6_addr32[2] = addr.vna_v6_mask.s6_addr32[3] = 0;
+ }
+ addr.vna_v6_ip.s6_addr[11] = (n->nid & 0x7f80) >> 7;
+ addr.vna_v6_ip.s6_addr[12] = (n->nid & 0x007f) << 1;
+ if (vc_net_add(n->nid, &addr) == -1) {
+ syslog(LOG_ERR, "vc_net_add(%u): %s", n->nid, strerror(errno));
+ goto next;
+ }
+ if (add_address_to_interface(i->prefix->ifindex, &addr.vna_v6_ip, addr.vna_prefix) == -1) {
+ syslog(LOG_ERR, "add_address_to_interface: %s", strerror(errno));
+ goto next;
+ }
+ if (add_nid_to_map(map, i->prefix, n->nid) == -1) {
+ syslog(LOG_ERR, "add_nid_to_map: %s", strerror(errno));
+ goto next;
+ }
+next:
+ i = i->p.next;
+ }
+ }
+
+ cleanup_nids(map, previous, current);
+ previous = current;
+}
+
+/* XXX These two functions are very similar */
+static int add_prefix(struct nid_prefix_map *map, struct prefixmsg *msg,
+ struct in6_addr *prefix, struct prefix_cacheinfo *cache)
+{
+ struct nid_prefix_map *i = map;
+ struct prefix *new;
+
+ if (!msg || !prefix || !cache)
+ return -1;
+ /* XXX IF_PREFIX_AUTOCONF == 0x02 */
+ if (!(msg->prefix_flags & 0x02))
+ return -1;
+
+ do {
+ if (i->p.next != NULL)
+ i = i->p.next;
+ if (ipv6_prefix_equal(prefix, &i->prefix->prefix.addr, msg->prefix_len) ||
+ ipv6_prefix_equal(prefix, &i->prefix->address.addr, msg->prefix_len)) {
+ i->prefix->mask |= HAS_PREFIX;
+ i->prefix->ifindex = msg->prefix_ifindex;
+ memcpy(&i->prefix->prefix.addr, prefix, sizeof(*prefix));
+ i->prefix->prefix.prefix_len = msg->prefix_len;
+ i->prefix->prefix.valid_until = time(NULL) + cache->preferred_time;
+ return 0;
+ }
+ } while (i->p.next && i->nid == 0);
+
+ /* not yet in the map */
+ new = calloc(1, sizeof(*new));
+ if (!new)
+ return -1;
+ new->mask = HAS_PREFIX;
+ memcpy(&new->prefix.addr, prefix, sizeof(*prefix));
+ new->prefix.prefix_len = msg->prefix_len;
+ new->prefix.valid_until = time(NULL) + cache->preferred_time;
+ if (add_prefix_to_map(map, new) == -1)
+ return -1;
+
+ return 1;
+}
+
+static inline int add_address(struct nid_prefix_map *map, struct ifaddrmsg *msg,
+ struct in6_addr *address, struct ifa_cacheinfo *cache)
+{
+ struct nid_prefix_map *i = map;
+ struct prefix *new;
+
+ if (!msg || !address || !cache)
+ return -1;
+
+ if (address->s6_addr[11] != 0xFF || address->s6_addr[12] != 0xFE)
+ return -1;
+
+ do {
+ if (i->p.next != NULL)
+ i = i->p.next;
+ if (ipv6_prefix_equal(address, &i->prefix->prefix.addr, msg->ifa_prefixlen) ||
+ ipv6_prefix_equal(address, &i->prefix->address.addr, 128)) {
+ i->prefix->mask |= HAS_ADDRESS;
+ memcpy(&i->prefix->address.addr, address, sizeof(*address));
+ i->prefix->address.prefix_len = msg->ifa_prefixlen;
+ i->prefix->address.valid_until = time(NULL) + cache->ifa_prefered;
+ return 0;
+ }
+ } while (i->p.next && i->nid == 0);
+
+ new = calloc(1, sizeof(*new));
+ if (!new)
+ return -1;
+ new->mask = HAS_ADDRESS;
+ memcpy(&new->address.addr, address, sizeof(*address));
+ new->address.prefix_len = msg->ifa_prefixlen;
+ new->address.valid_until = time(NULL) + cache->ifa_prefered;
+ if (add_prefix_to_map(map, new) == -1)
+ return -1;
+
+ return 1;
+}
+
+static struct nla_policy addr_policy[IFA_MAX+1] = {
+ [IFA_ADDRESS] = { .minlen = sizeof(struct in6_addr) },
+ [IFA_LABEL] = { .type = NLA_STRING,
+ .maxlen = IFNAMSIZ },
+ [IFA_CACHEINFO] = { .minlen = sizeof(struct ifa_cacheinfo) },
+};
+static struct nla_policy prefix_policy[PREFIX_MAX+1] = {
+ [PREFIX_ADDRESS] = { .minlen = sizeof(struct in6_addr) },
+ [PREFIX_CACHEINFO] = { .minlen = sizeof(struct prefix_cacheinfo) },
+};
+int handle_valid_msg(struct nl_msg *msg, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ int ret = -1;
+ char *payload;
+ struct sockaddr_nl *source = nlmsg_get_src(msg);
+
+ payload = nlmsg_data(nlh);
+ if (source->nl_groups == RTMGRP_IPV6_PREFIX) {
+ struct prefixmsg *prefixmsg;
+ struct in6_addr *prefix = NULL;
+ struct prefix_cacheinfo *cacheinfo = NULL;
+ struct nlattr *tb[PREFIX_MAX+1];
+
+ if (nlmsg_parse(nlh, sizeof(struct prefixmsg), tb, PREFIX_MAX, prefix_policy) < 0) {
+ syslog(LOG_ERR, "Failed to parse prefixmsg");
+ return -1;
+ }
+
+ prefixmsg = (struct prefixmsg *) payload;
+ if (tb[PREFIX_ADDRESS])
+ prefix = nl_data_get(nla_get_data(tb[PREFIX_ADDRESS]));
+ if (tb[PREFIX_CACHEINFO])
+ cacheinfo = nl_data_get(nla_get_data(tb[PREFIX_CACHEINFO]));
+ ret = add_prefix(arg, prefixmsg, prefix, cacheinfo);
+ }
+ else if (source->nl_groups == RTMGRP_IPV6_IFADDR) {
+ struct ifaddrmsg *ifaddrmsg;
+ struct in6_addr *address = NULL;
+ struct ifa_cacheinfo *cacheinfo = NULL;
+ struct nlattr *tb[IFA_MAX+1];
+
+ if (nlmsg_parse(nlh, sizeof(struct ifaddrmsg), tb, IFA_MAX, addr_policy) < 0) {
+ syslog(LOG_ERR, "Failed to parse ifaddrmsg");
+ return -1;
+ }
+
+ ifaddrmsg = (struct ifaddrmsg *) payload;
+ if (tb[IFA_ADDRESS])
+ address = nl_data_get(nla_get_data(tb[IFA_ADDRESS]));
+ if (tb[IFA_CACHEINFO])
+ cacheinfo = nl_data_get(nla_get_data(tb[IFA_CACHEINFO]));
+ ret = add_address(arg, ifaddrmsg, address, cacheinfo);
+ }
+ if (ret >= 0)
+ do_slices_autoconf(arg);
+
+ return 0;
+}
+
+int handle_error_msg(struct sockaddr_nl *source, struct nlmsgerr *err,
+ void *arg)
+{
+ syslog(LOG_ERR, "%s", strerror(err->error));
+ return 0;
+}
+
+int handle_no_op(struct nl_msg *msg, void *arg)
+{
+ return 0;
+}
+
+/* only for access in the signal handler */
+struct nid_prefix_map map = {
+ .n = {
+ .next = NULL,
+ .prev = NULL,
+ },
+ .p = {
+ .next = NULL,
+ .prev = NULL,
+ },
+};
+void signal_handler(int signal)
+{
+ switch (signal) {
+ case SIGUSR1:
+ do_slices_autoconf(&map);
+ break;
+ }
+}
+
+static int write_pidfile(const char *filename)
+{
+ FILE *fp;
+ fp = fopen(filename, "w");
+ if (!fp)
+ return -1;
+ fprintf(fp, "%d\n", getpid());
+ fclose(fp);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct nl_cb *cbs;
+
+ openlog("vip6-autod", LOG_PERROR, LOG_DAEMON);
+
+ handle = nl_handle_alloc_nondefault(NL_CB_VERBOSE);
+ cbs = nl_handle_get_cb(handle);
+ nl_cb_set(cbs, NL_CB_VALID, NL_CB_CUSTOM, handle_valid_msg, &map);
+ nl_cb_set(cbs, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, handle_no_op, NULL);
+ nl_cb_err(cbs, NL_CB_CUSTOM, handle_error_msg, &map);
+ nl_disable_sequence_check(handle);
+
+ nl_join_groups(handle, RTMGRP_IPV6_PREFIX|RTMGRP_IPV6_IFADDR);
+ if (nl_connect(handle, NETLINK_ROUTE) == -1) {
+ syslog(LOG_CRIT, "nl_connect: %s", strerror(errno));
+ exit(1);
+ }
+
+ if (daemon(0, 0) == -1)
+ return -1;
+
+ write_pidfile(LOCALSTATEDIR "/run/vip6-autod.pid");
+
+ signal(SIGUSR1, signal_handler);
+
+ while (nl_recvmsgs(handle, cbs) > 0);
+
+ nl_close(handle);
+ closelog();
+ return 0;
+}
--- /dev/null
+/*
+ * Marc E. Fiuczynski <mef@cs.princeton.edu>
+ *
+ * Copyright (c) 2004 The Trustees of Princeton University (Trustees).
+ *
+ * vsh is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * vsh is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Poptop; see the file COPYING. If not, write to the Free
+ * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <pwd.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/syscall.h>
+#include <asm/unistd.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+//--------------------------------------------------------------------
+#include <vserver.h>
+#include "planetlab.h"
+
+/* Change to root:root (before entering new context) */
+static int setuidgid_root()
+{
+ if (setgid(0) < 0) {
+ PERROR("setgid(0)");
+ return -1;
+ }
+ if (setuid(0) < 0) {
+ PERROR("setuid(0)");
+ return -1;
+ }
+ return 0;
+}
+
+static void compute_new_root(char *base, char **root, const struct passwd *pwd)
+{
+ int root_len;
+
+ root_len =
+ strlen(base) + strlen("/") +
+ strlen(pwd->pw_name) + NULLBYTE_SIZE;
+ (*root) = (char *)malloc(root_len);
+ if ((*root) == NULL) {
+ PERROR("malloc(%d)", root_len);
+ exit(1);
+ }
+
+ sprintf((*root), "%s/%s", base, pwd->pw_name);
+ (*root)[root_len - 1] = '\0';
+}
+
+static int sandbox_chroot(const struct passwd *pwd)
+{
+ char *sandbox_root = NULL;
+
+ compute_new_root(DEFAULT_VSERVERDIR,&sandbox_root, pwd);
+ if (chroot(sandbox_root) < 0) {
+ PERROR("chroot(%s)", sandbox_root);
+ exit(1);
+ }
+ if (chdir("/") < 0) {
+ PERROR("chdir(/)");
+ exit(1);
+ }
+ return 0;
+}
+
+static int sandbox_processes(xid_t ctx, const char *context, const struct passwd *pwd)
+{
+#ifdef CONFIG_VSERVER_LEGACY
+ int flags;
+
+ flags = 0;
+ flags |= 1; /* VX_INFO_LOCK -- cannot request a new vx_id */
+ /* flags |= 4; VX_INFO_NPROC -- limit number of procs in a context */
+
+ (void) vc_new_s_context(ctx, 0, flags);
+
+ /* use legacy dirty hack for capremove */
+ if (vc_new_s_context(VC_SAMECTX, vc_get_insecurebcaps(), flags) == VC_NOCTX) {
+ PERROR("vc_new_s_context(%u, 0x%16llx, 0x%08x)",
+ VC_SAMECTX, vc_get_insecurebcaps(), flags);
+ exit(1);
+ }
+#else
+ int ctx_is_new;
+ struct sliver_resources slr;
+ char hostname[HOST_NAME_MAX+1];
+ pl_get_limits(context,&slr);
+
+ if (gethostname(hostname, sizeof hostname) == -1)
+ {
+ PERROR("gethostname(...)");
+ exit(1);
+ }
+
+ /* check whether the slice has been suspended */
+ if (slr.vs_cpu==0)
+ {
+ fprintf(stderr, "*** %s: %s has zero cpu resources and presumably it has been disabled/suspended ***\n", hostname, context);
+ exit(0);
+ }
+
+ (void) (sandbox_chroot(pwd));
+
+ if ((ctx_is_new = pl_chcontext(ctx, ~vc_get_insecurebcaps(),&slr)) < 0)
+ {
+ PERROR("pl_chcontext(%u)", ctx);
+ exit(1);
+ }
+ if (ctx_is_new)
+ {
+ fprintf(stderr, " *** %s: %s has not been started yet, please check back later ***\n", hostname, context);
+ exit(1);
+ }
+#endif
+ return 0;
+}
+
+
+void runas_slice_user(struct passwd *pwd)
+{
+ char *username = pwd->pw_name;
+ char *home_env, *logname_env, *mail_env, *shell_env, *user_env;
+ int home_len, logname_len, mail_len, shell_len, user_len;
+ static char *envp[10];
+
+ if (setgid(pwd->pw_gid) < 0) {
+ PERROR("setgid(%d)", pwd->pw_gid);
+ exit(1);
+ }
+
+ if (setuid(pwd->pw_uid) < 0) {
+ PERROR("setuid(%d)", pwd->pw_uid);
+ exit(1);
+ }
+
+ if (chdir(pwd->pw_dir) < 0) {
+ PERROR("chdir(%s)", pwd->pw_dir);
+ exit(1);
+ }
+
+ home_len = strlen("HOME=") + strlen(pwd->pw_dir) + NULLBYTE_SIZE;
+ logname_len = strlen("LOGNAME=") + strlen(username) + NULLBYTE_SIZE;
+ mail_len = strlen("MAIL=/var/spool/mail/") + strlen(username)
+ + NULLBYTE_SIZE;
+ shell_len = strlen("SHELL=") + strlen(pwd->pw_shell) + NULLBYTE_SIZE;
+ user_len = strlen("USER=") + strlen(username) + NULLBYTE_SIZE;
+
+ home_env = (char *)malloc(home_len);
+ logname_env = (char *)malloc(logname_len);
+ mail_env = (char *)malloc(mail_len);
+ shell_env = (char *)malloc(shell_len);
+ user_env = (char *)malloc(user_len);
+
+ if ((home_env == NULL) ||
+ (logname_env == NULL) ||
+ (mail_env == NULL) ||
+ (shell_env == NULL) ||
+ (user_env == NULL)) {
+ PERROR("malloc");
+ exit(1);
+ }
+
+ sprintf(home_env, "HOME=%s", pwd->pw_dir);
+ sprintf(logname_env, "LOGNAME=%s", username);
+ sprintf(mail_env, "MAIL=/var/spool/mail/%s", username);
+ sprintf(shell_env, "SHELL=%s", pwd->pw_shell);
+ sprintf(user_env, "USER=%s", username);
+
+ home_env[home_len - 1] = '\0';
+ logname_env[logname_len - 1] = '\0';
+ mail_env[mail_len - 1] = '\0';
+ shell_env[shell_len - 1] = '\0';
+ user_env[user_len - 1] = '\0';
+
+ envp[0] = home_env;
+ envp[1] = logname_env;
+ envp[2] = mail_env;
+ envp[3] = shell_env;
+ envp[4] = user_env;
+ envp[5] = 0;
+
+ if ((putenv(home_env) < 0) ||
+ (putenv(logname_env) < 0) ||
+ (putenv(mail_env) < 0) ||
+ (putenv(shell_env) < 0) ||
+ (putenv(user_env) < 0)) {
+ PERROR("vserver: putenv error ");
+ exit(1);
+ }
+}
+
+void slice_enter(struct passwd *pwd)
+{
+ if (setuidgid_root() < 0) { /* For chroot, new_s_context */
+ fprintf(stderr, "vsh: Could not become root, check that SUID flag is set on binary\n");
+ exit(2);
+ }
+
+#ifdef CONFIG_VSERVER_LEGACY
+ (void) (sandbox_chroot(pwd));
+#endif
+
+ if (sandbox_processes((xid_t) pwd->pw_uid, pwd->pw_name, pwd) < 0) {
+ fprintf(stderr, "vsh: Could not change context to %d\n", pwd->pw_uid);
+ exit(2);
+ }
+}
+
+//--------------------------------------------------------------------
+
+#define DEFAULT_SHELL "/bin/sh"
+
+/* Exit statuses for programs like 'env' that exec other programs.
+ EXIT_FAILURE might not be 1, so use EXIT_FAIL in such programs. */
+enum
+{
+ EXIT_CANNOT_INVOKE = 126,
+ EXIT_ENOENT = 127
+};
+
+int main(int argc, char **argv)
+{
+ struct passwd pwdd, *result, *prechroot, *postchroot = &pwdd;
+ char *context, *username, *shell, *pwdBuffer;
+ long pwdBuffer_len;
+ uid_t uid;
+ int index, i;
+
+ if (argv[0][0]=='-')
+ index = 1;
+ else
+ index = 0;
+
+ uid = getuid();
+ if ((prechroot = getpwuid(uid)) == NULL) {
+ PERROR("getpwuid(%d)", uid);
+ exit(1);
+ }
+
+ context = (char*)strdup(prechroot->pw_name);
+ if (!context) {
+ PERROR("strdup");
+ exit(2);
+ }
+
+ /* enter vserver "context" */
+ slice_enter(prechroot);
+
+ /* Get the /etc/passwd entry for this user, this time inside
+ * the chroot.
+ */
+ username = context;
+
+ pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (pwdBuffer_len == -1) {
+ PERROR("sysconf(_SC_GETPW_R_SIZE_MAX");
+ exit(1);
+ }
+ pwdBuffer = (char*)malloc(pwdBuffer_len);
+ if (pwdBuffer == NULL) {
+ PERROR("malloc(%d)", pwdBuffer_len);
+ exit(1);
+ }
+
+ errno = 0;
+ if ((getpwnam_r(username,postchroot,pwdBuffer,pwdBuffer_len, &result) != 0) ||
+ (errno != 0) || result != postchroot) {
+ PERROR("getpwnam_r(%s)", username);
+ exit(1);
+ }
+
+ /* Now run as username in this context. Note that for PlanetLab's
+ vserver configuration the context name also happens to be the
+ "default" username within the vserver context.
+ */
+ runas_slice_user(postchroot);
+
+ /* Make sure pw->pw_shell is non-NULL.*/
+ if (postchroot->pw_shell == NULL || postchroot->pw_shell[0] == '\0') {
+ postchroot->pw_shell = (char *) DEFAULT_SHELL;
+ }
+
+ shell = (char *)strdup(postchroot->pw_shell);
+ if (!shell) {
+ PERROR("strdup");
+ exit(2);
+ }
+
+ /* Check whether 'su' or 'sshd' invoked us as a login shell or
+ not; did this above when testing argv[0]=='-'.
+ */
+ argv[0] = shell;
+ if (index == 1) {
+ char **args;
+ args = (char**)malloc(sizeof(char*)*(argc+2));
+ if (!args) {
+ PERROR("malloc(%d)", sizeof(char*)*(argc+2));
+ exit(1);
+ }
+ args[0] = argv[0];
+ args[1] = "-l";
+ for(i=1;i<argc+1;i++) {
+ args[i+1] = argv[i];
+ }
+ argv = args;
+ }
+ (void) execvp(shell,argv);
+ {
+ int exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE);
+ exit (exit_status);
+ }
+
+ return 0; /* shutup compiler */
+}
--- /dev/null
+#!/bin/bash
+#
+# vip6-autod assigns auto-discovered IPv6 addresses to guests
+#
+# chkconfig: 2345 99 01
+# description: starts vip6-autod
+
+: ${UTIL_VSERVER_VARS:=/usr/lib/util-vserver/util-vserver-vars}
+test -e "$UTIL_VSERVER_VARS" || {
+ echo $"Can not find util-vserver installation (the file '$UTIL_VSERVER_VARS' would be expected); aborting..." >&2
+ exit 1
+}
+. "$UTIL_VSERVER_VARS"
+
+LOCKFILE=vip6-autod
+. "$_LIB_VSERVER_INIT_FUNCTIONS"
+
+prog="vip6-autod"
+
+function start()
+{
+ _beginResult $"Starting $prog"
+ $__SBINDIR/$prog
+ _endResult $?
+ local retval=$?
+ test "$retval" -ne 0 || touch "$lockfile"
+ return $retval
+}
+
+function stop()
+{
+ _beginResult $"Stopping $prog"
+ kill `cat $LOCALSTATEDIR/run/vip6-autod.pid` &>/dev/null
+ _endResult $?
+ local retval=$?
+ $_RM -f "$lockfile"
+ return $retval
+}
+
+function restart()
+{
+ stop
+ start
+}
+
+case "$1" in
+ start|stop|restart) $1;;
+ reload) ;;
+ condrestart)
+ test -f $lockfile && restart || :
+ ;;
+ status)
+ status $prog
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|reload|restart|condrestart|status}"
+ exit 2
+ ;;
+esac
--- /dev/null
+%define name util-vserver-pl
+%define version 0.1
+%define release 1%{?pldistro:.%{pldistro}}%{?date:.%{date}}
+
+%define python_sitearch %( python -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)" )
+
+Summary: PlanetLab extensions to util-vserver
+Name: %{name}
+Version: %{version}
+Release: %{release}
+License: GPL
+Group: System Environment/Base
+Source0: %{name}-%{version}.tar.bz2
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+Requires: util-vserver util-vserver-core util-vserver-build util-vserver-sysv
+Obsoletes: util-vserver-py32 resman util-vserver-python
+BuildRequires: util-vserver-core util-vserver-devel
+BuildRequires: autoconf automake libtool
+
+%description
+This package contains all PlanetLab extensions to util-vserver.
+
+%prep
+%setup -q
+autoreconf -fi
+
+
+%build
+%configure
+make
+
+
+%install
+rm -fr %{buildroot}
+make DESTDIR=%{buildroot} install
+
+ln -s ..%{_sbindir}/vsh %{buildroot}/bin/vsh
+
+rm -f %{buildroot}%{python_sitearch}/vserverimpl.a
+rm -f %{buildroot}%{python_sitearch}/vserverimpl.la
+
+# Generate file list for python package
+find "%{buildroot}" -name '*.py' | { while read FILE; do
+ f="${FILE#%{buildroot}}"
+ echo "${f}"
+ # need to touch these files, as they are not produced on FC4 or below
+ touch ${FILE}c
+ touch ${FILE}o
+ echo %%ghost "${f}c"
+ echo %%ghost "${f}o"
+done } > %name-python.list
+
+
+%post
+# add /bin/vsh to list of secure shells
+if [ ! -f /etc/shells ] || ! grep -q '^/bin/vsh$' /etc/shells ; then
+ echo /bin/vsh >> /etc/shells
+fi
+
+
+%postun
+# 0 = erase, 1 = upgrade
+if [ "$1" = 0 ] ; then
+ perl -i -n -e 'next if /^\/bin\/vsh$/; print' /etc/shells
+fi
+
+
+%clean
+rm -fr %{buildroot}
+
+
+%files -f %name-python.list
+%defattr(-,root,root,-)
+%{_sbindir}/bwlimit
+%{_sbindir}/disklimit
+%{_sbindir}/vuseradd
+%{_sbindir}/vuserdel
+
+%{_sbindir}/vsh
+/bin/vsh
+%{_mandir}/man8/vsh.8*
+
+%{_sbindir}/vip6-autod
+%{_sysconfdir}/init.d/vip6-autod
+
+%{_sbindir}/vcached
+%{_sysconfdir}/cron.d/vcached.cron
+%{_sysconfdir}/logrotate.d/vcached.logorate
+
+
+%changelog
+* Fri Nov 30 2007 Daniel Hokka Zakrisson <daniel@hozac.com> - 0.1-1
+- Initial release