--- /dev/null
+#ifndef _NX_INLINE_H
+#define _NX_INLINE_H
+
+
+// #define NX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "vserver/network.h"
+
+#if defined(NX_DEBUG)
+#define nxdprintk(x...) printk("nxd: " x)
+#else
+#define nxdprintk(x...)
+#endif
+
+
+void free_nx_info(struct nx_info *);
+
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
+#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__get_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+ if (!nxi)
+ return NULL;
+ nxdprintk("get_nx_info(%p[%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+ _file, _line);
+ atomic_inc(&nxi->nx_refcount);
+ return nxi;
+}
+
+#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+ if (!nxi)
+ return;
+ nxdprintk("put_nx_info(%p[%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+ _file, _line);
+ if (atomic_dec_and_lock(&nxi->nx_refcount, &nxlist_lock)) {
+ list_del(&nxi->nx_list);
+ spin_unlock(&nxlist_lock);
+ free_nx_info(nxi);
+ }
+}
+
+
+#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
+ const char *_file, int _line)
+{
+ BUG_ON(*nxp);
+ if (!nxi)
+ return;
+ nxdprintk("set_nx_info(%p[#%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+ _file, _line);
+ *nxp = __get_nx_info(nxi, _file, _line);
+}
+
+#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_nx_info(struct nx_info **nxp,
+ const char *_file, int _line)
+{
+ struct nx_info *nxo = *nxp;
+
+ if (!nxo)
+ return;
+ nxdprintk("clr_nx_info(%p[#%d.%d])\t%s:%d\n",
+ nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_refcount):0,
+ _file, _line);
+ *nxp = NULL;
+ wmb();
+ __put_nx_info(nxo, _file, _line);
+}
+
+
+#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
+ const char *_file, int _line)
+{
+ struct nx_info *nxi;
+
+ task_lock(p);
+ nxi = __get_nx_info(p->nx_info, _file, _line);
+ task_unlock(p);
+ return nxi;
+}
+
+#define nx_verify_info(p,i) \
+ __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __nx_verify_info(
+ struct nx_info *ipa, struct nx_info *ipb,
+ const char *_file, int _line)
+{
+ if (ipa == ipb)
+ return;
+ printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n",
+ ipa, ipb, _file, _line);
+}
+
+
+#define nx_task_nid(t) ((t)->nid)
+
+#define nx_current_nid() nx_task_nid(current)
+
+#define nx_check(c,m) __nx_check(nx_current_nid(),c,m)
+
+#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1)
+
+#undef nxdprintk
+#define nxdprintk(x...)
+
+
+#define __nx_flags(v,m,f) (((v) & (m)) ^ (f))
+
+#define __nx_task_flags(t,m,f) \
+ (((t) && ((t)->nx_info)) ? \
+ __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0)
+
+#define nx_current_flags() \
+ ((current->nx_info) ? current->nx_info->nx_flags : 0)
+
+#define nx_flags(m,f) __nx_flags(nx_current_flags(),(m),(f))
+
+
+#define nx_current_ncaps() \
+ ((current->nx_info) ? current->nx_info->nx_ncaps : 0)
+
+#define nx_ncaps(c) (nx_current_ncaps() & (c))
+
+
+
+#define sock_nx_init(s) do { \
+ (s)->sk_nid = 0; \
+ (s)->sk_nx_info = NULL; \
+ } while (0)
+
+
+
+#endif
--- /dev/null
+#ifndef _VX_INLINE_H
+#define _VX_INLINE_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+#include "vserver/cvirt.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+
+void free_vx_info(struct vx_info *);
+
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
+#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi,
+ const char *_file, int _line)
+{
+ if (!vxi)
+ return NULL;
+ vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+ _file, _line);
+ atomic_inc(&vxi->vx_refcount);
+ return vxi;
+}
+
+#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+{
+ if (!vxi)
+ return;
+ vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+ _file, _line);
+ if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) {
+ list_del(&vxi->vx_list);
+ spin_unlock(&vxlist_lock);
+ free_vx_info(vxi);
+ }
+}
+
+#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
+ const char *_file, int _line)
+{
+ BUG_ON(*vxp);
+ if (!vxi)
+ return;
+ vxdprintk("set_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+ _file, _line);
+ *vxp = __get_vx_info(vxi, _file, _line);
+}
+
+#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_vx_info(struct vx_info **vxp,
+ const char *_file, int _line)
+{
+ struct vx_info *vxo = *vxp;
+
+ vxdprintk("clr_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_refcount):0,
+ _file, _line);
+ *vxp = NULL;
+ wmb();
+ __put_vx_info(vxo, _file, _line);
+}
+
+
+#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
+ const char *_file, int _line)
+{
+ struct vx_info *vxi;
+
+ task_lock(p);
+ vxi = __get_vx_info(p->vx_info, _file, _line);
+ task_unlock(p);
+ return vxi;
+}
+
+
+#define vx_verify_info(p,i) \
+ __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __vx_verify_info(
+ struct vx_info *vxa, struct vx_info *vxb,
+ const char *_file, int _line)
+{
+ if (vxa == vxb)
+ return;
+ printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n",
+ vxa, vxb, _file, _line);
+}
+
+
+#define vx_task_xid(t) ((t)->xid)
+
+#define vx_current_xid() vx_task_xid(current)
+
+#define vx_check(c,m) __vx_check(vx_current_xid(),c,m)
+
+#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1)
+
+
+/*
+ * check current context for ADMIN/WATCH and
+ * optionally agains supplied argument
+ */
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+{
+ if (mode & VX_ARG_MASK) {
+ if ((mode & VX_IDENT) &&
+ (id == cid))
+ return 1;
+ }
+ if (mode & VX_ATR_MASK) {
+ if ((mode & VX_DYNAMIC) &&
+ (id >= MIN_D_CONTEXT) &&
+ (id <= MAX_S_CONTEXT))
+ return 1;
+ if ((mode & VX_STATIC) &&
+ (id > 1) && (id < MIN_D_CONTEXT))
+ return 1;
+ }
+ return (((mode & VX_ADMIN) && (cid == 0)) ||
+ ((mode & VX_WATCH) && (cid == 1)));
+}
+
+
+#define __vx_flags(v,m,f) (((v) & (m)) ^ (f))
+
+#define __vx_task_flags(t,m,f) \
+ (((t) && ((t)->vx_info)) ? \
+ __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+
+#define vx_current_flags() \
+ ((current->vx_info) ? current->vx_info->vx_flags : 0)
+
+#define vx_flags(m,f) __vx_flags(vx_current_flags(),(m),(f))
+
+
+#define vx_current_ccaps() \
+ ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+
+#define vx_ccaps(c) (vx_current_ccaps() & (c))
+
+#define vx_current_bcaps() \
+ (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
+ current->vx_info->vx_bcaps : cap_bset)
+
+
+#define VX_DEBUG_ACC_RSS 0
+#define VX_DEBUG_ACC_VM 0
+#define VX_DEBUG_ACC_VML 0
+
+#undef vxdprintk
+#if (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+#define vx_acc_page(m, d, v, r) \
+ __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
+
+static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
+ int res, int dir, char *file, int line)
+{
+ if (v) {
+ if (dir > 0)
+ ++(*v);
+ else
+ --(*v);
+ }
+ if (vxi) {
+ if (dir > 0)
+ atomic_inc(&vxi->limit.res[res]);
+ else
+ atomic_dec(&vxi->limit.res[res]);
+ }
+}
+
+
+#define vx_acc_pages(m, p, v, r) \
+ __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
+
+static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
+ int res, int pages, char *file, int line)
+{
+ if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+ (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+ (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+ vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?atomic_read(&vxi->limit.res[res]):0),
+ pages, file, line);
+ if (pages == 0)
+ return;
+ if (v)
+ *v += pages;
+ if (vxi)
+ atomic_add(pages, &vxi->limit.res[res]);
+}
+
+
+
+#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS)
+
+#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS)
+
+#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p))
+
+#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p))
+
+#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p))
+
+#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p))
+
+
+
+#define vx_pages_avail(m, p, r) \
+ __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+
+static inline int __vx_pages_avail(struct vx_info *vxi,
+ int res, int pages, char *file, int line)
+{
+ if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+ (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+ (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+ printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?vxi->limit.rlim[res]:1),
+ (vxi?atomic_read(&vxi->limit.res[res]):0),
+ pages, file, line);
+ if (!vxi)
+ return 1;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (atomic_read(&vxi->limit.res[res]) + pages < vxi->limit.rlim[res])
+ return 1;
+ return 0;
+}
+
+#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+
+/* file limits */
+
+#define VX_DEBUG_ACC_FILE 0
+#define VX_DEBUG_ACC_OPENFD 0
+
+#undef vxdprintk
+#if (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_acc_cres(v,d,r) \
+ __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
+
+static inline void __vx_acc_cres(struct vx_info *vxi,
+ int res, int dir, char *file, int line)
+{
+ if (vxi) {
+ if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+ (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+ printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?atomic_read(&vxi->limit.res[res]):0),
+ (dir>0)?"++":"--", file, line);
+ if (dir > 0)
+ atomic_inc(&vxi->limit.res[res]);
+ else
+ atomic_dec(&vxi->limit.res[res]);
+ }
+}
+
+#define vx_files_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
+#define vx_files_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+
+#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
+#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+
+#define vx_cres_avail(v,n,r) \
+ __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+
+static inline int __vx_cres_avail(struct vx_info *vxi,
+ int res, int num, char *file, int line)
+{
+ if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+ (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+ printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?vxi->limit.rlim[res]:1),
+ (vxi?atomic_read(&vxi->limit.res[res]):0),
+ num, file, line);
+ if (!vxi)
+ return 1;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (vxi->limit.rlim[res] < atomic_read(&vxi->limit.res[res]) + num)
+ return 0;
+ return 1;
+}
+
+#define vx_files_avail(n) \
+ vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
+
+#define vx_openfd_avail(n) \
+ vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
+
+/* socket limits */
+
+#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
+#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+
+#define vx_sock_avail(n) \
+ vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
+
+/* procfs ioctls */
+
+#define FIOC_GETXFLG _IOR('x', 5, long)
+#define FIOC_SETXFLG _IOW('x', 6, long)
+
+/* utsname virtualization */
+
+static inline struct new_utsname *vx_new_utsname(void)
+{
+ if (current->vx_info)
+ return ¤t->vx_info->cvirt.utsname;
+ return &system_utsname;
+}
+
+#define vx_new_uts(x) ((vx_new_utsname())->x)
+
+/* generic flag merging */
+
+#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m)))
+
+#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m)))
+
+
+/* socket accounting */
+
+#include <linux/socket.h>
+
+static inline int vx_sock_type(int family)
+{
+ int type = 4;
+
+ if (family > 0 && family < 3)
+ type = family;
+ else if (family == PF_INET6)
+ type = 3;
+ return type;
+}
+
+#define vx_acc_sock(v,f,p,s) \
+ __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
+
+static inline void __vx_acc_sock(struct vx_info *vxi,
+ int family, int pos, int size, char *file, int line)
+{
+ if (vxi) {
+ int type = vx_sock_type(family);
+
+ atomic_inc(&vxi->cacct.sock[type][pos].count);
+ atomic_add(size, &vxi->cacct.sock[type][pos].total);
+ }
+}
+
+#define vx_sock_recv(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
+#define vx_sock_send(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
+#define vx_sock_fail(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
+
+
+#define sock_vx_init(s) do { \
+ (s)->sk_xid = 0; \
+ (s)->sk_vx_info = NULL; \
+ } while (0)
+
+
+/* pid faking stuff */
+
+
+#define vx_map_tgid(v,p) \
+ __vx_map_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
+ char *file, int line)
+{
+ if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+ vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
+ vxi, vxi->vx_flags, pid,
+ (pid == vxi->vx_initpid)?1:pid,
+ file, line);
+ if (pid == vxi->vx_initpid)
+ return 1;
+ }
+ return pid;
+}
+
+#define vx_rmap_tgid(v,p) \
+ __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
+ char *file, int line)
+{
+ if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+ vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
+ vxi, vxi->vx_flags, pid,
+ (pid == 1)?vxi->vx_initpid:pid,
+ file, line);
+ if ((pid == 1) && vxi->vx_initpid)
+ return vxi->vx_initpid;
+ }
+ return pid;
+}
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#endif
--- /dev/null
+#ifndef _LINUX_VSERVER_H
+#define _LINUX_VSERVER_H
+
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+#include <linux/vinline.h>
+#include <linux/ninline.h>
+
+#endif
--- /dev/null
+#ifndef _VX_CONTEXT_H
+#define _VX_CONTEXT_H
+
+#include <linux/types.h>
+
+#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
+#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
+
+#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#define _VX_INFO_DEF_
+#include "cvirt.h"
+#include "limit.h"
+#include "sched.h"
+#undef _VX_INFO_DEF_
+
+struct vx_info {
+ struct list_head vx_list; /* linked list of contexts */
+ xid_t vx_id; /* context id */
+ atomic_t vx_refcount; /* refcount */
+ struct vx_info *vx_parent; /* parent context */
+
+ struct namespace *vx_namespace; /* private namespace */
+ struct fs_struct *vx_fs; /* private namespace fs */
+ uint64_t vx_flags; /* VX_INFO_xxx */
+ uint64_t vx_bcaps; /* bounding caps (system) */
+ uint64_t vx_ccaps; /* context caps (vserver) */
+
+ pid_t vx_initpid; /* PID of fake init process */
+
+ struct _vx_limit limit; /* vserver limits */
+ struct _vx_sched sched; /* vserver scheduler */
+ struct _vx_cvirt cvirt; /* virtual/bias stuff */
+ struct _vx_cacct cacct; /* context accounting */
+
+ char vx_name[65]; /* vserver name */
+};
+
+
+extern spinlock_t vxlist_lock;
+extern struct list_head vx_infos;
+
+
+#define VX_ADMIN 0x0001
+#define VX_WATCH 0x0002
+#define VX_DUMMY 0x0008
+
+#define VX_IDENT 0x0010
+#define VX_EQUIV 0x0020
+#define VX_PARENT 0x0040
+#define VX_CHILD 0x0080
+
+#define VX_ARG_MASK 0x00F0
+
+#define VX_DYNAMIC 0x0100
+#define VX_STATIC 0x0200
+
+#define VX_ATR_MASK 0x0F00
+
+
+void free_vx_info(struct vx_info *);
+
+extern struct vx_info *find_vx_info(int);
+extern struct vx_info *find_or_create_vx_info(int);
+extern int vx_info_id_valid(int);
+
+extern int vx_migrate_task(struct task_struct *, struct vx_info *);
+
+#endif /* __KERNEL__ */
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
+
+struct vcmd_vx_info_v0 {
+ uint32_t xid;
+ uint32_t initpid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_ctx_create VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
+
+struct vcmd_ctx_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VXF_INFO_LOCK 0x00000001
+#define VXF_INFO_SCHED 0x00000002
+#define VXF_INFO_NPROC 0x00000004
+#define VXF_INFO_PRIVATE 0x00000008
+
+#define VXF_INFO_INIT 0x00000010
+#define VXF_INFO_HIDE 0x00000020
+#define VXF_INFO_ULIMIT 0x00000040
+#define VXF_INFO_NSPACE 0x00000080
+
+#define VXF_SCHED_HARD 0x00000100
+#define VXF_SCHED_PRIO 0x00000200
+#define VXF_SCHED_PAUSE 0x00000400
+
+#define VXF_VIRT_MEM 0x00010000
+#define VXF_VIRT_UPTIME 0x00020000
+#define VXF_VIRT_CPU 0x00040000
+
+#define VXF_HIDE_MOUNT 0x01000000
+#define VXF_HIDE_NETIF 0x02000000
+
+#define VXF_STATE_SETUP (1ULL<<32)
+#define VXF_STATE_INIT (1ULL<<33)
+
+#define VXF_FORK_RSS (1ULL<<48)
+
+#define VXF_ONE_TIME (0x0003ULL<<32)
+
+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0)
+
+struct vcmd_ctx_caps_v0 {
+ uint64_t bcaps;
+ uint64_t ccaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VXC_SET_UTSNAME 0x00000001
+#define VXC_SET_RLIMIT 0x00000002
+
+#define VXC_ICMP_PING 0x00000100
+
+#define VXC_SECURE_MOUNT 0x00010000
+
+
+#endif /* _VX_CONTEXT_H */
--- /dev/null
+#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/utsname.h>
+#include <linux/rwsem.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/atomic.h>
+
+/* context sub struct */
+
+struct _vx_cvirt {
+ int max_threads;
+
+ unsigned int bias_cswtch;
+ struct timespec bias_idle;
+ struct timespec bias_tp;
+ uint64_t bias_jiffies;
+
+ struct new_utsname utsname;
+};
+
+struct sock_acc {
+ atomic_t count;
+ atomic_t total;
+};
+
+struct _vx_cacct {
+ atomic_t nr_threads;
+ int nr_running;
+
+ unsigned long total_forks;
+
+ struct sock_acc sock[5][3];
+};
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].total);
+}
+
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+ uint64_t idle_jiffies = vx_idle_jiffies();
+
+ // new->virt.bias_cswtch = kstat.context_swtch;
+ cvirt->bias_jiffies = get_jiffies_64();
+
+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+ do_posix_clock_monotonic_gettime(&cvirt->bias_tp);
+
+ down_read(&uts_sem);
+ cvirt->utsname = system_utsname;
+ up_read(&uts_sem);
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+ return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+ int i,j;
+
+ atomic_set(&cacct->nr_threads, 1);
+ for (i=0; i<5; i++) {
+ for (j=0; j<3; j++) {
+ atomic_set(&cacct->sock[i][j].count, 0);
+ atomic_set(&cacct->sock[i][j].total, 0);
+ }
+ }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+ return;
+}
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+ int length = 0;
+ return length;
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+ int i,j, length = 0;
+ static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+ for (i=0; i<5; i++) {
+ length += sprintf(buffer + length,
+ "%s:", type[i]);
+ for (j=0; j<3; j++) {
+ length += sprintf(buffer + length,
+ "\t%12lu/%-12lu"
+ ,vx_sock_count(cacct, i, j)
+ ,vx_sock_total(cacct, i, j)
+ );
+ }
+ buffer[length++] = '\n';
+ }
+ return length;
+}
+
+#else /* _VX_INFO_DEF_ */
+#ifndef _VX_CVIRT_H
+#define _VX_CVIRT_H
+
+#include "switch.h"
+
+/* cvirt vserver commands */
+
+
+#ifdef __KERNEL__
+
+struct timespec;
+
+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle);
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_CVIRT_H */
+#endif
--- /dev/null
+#ifndef _VX_INODE_H
+#define _VX_INODE_H
+
+#include "switch.h"
+
+/* inode vserver commands */
+
+#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0)
+#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0)
+
+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
+
+struct vcmd_ctx_iattr_v0 {
+ /* device handle in id */
+ uint64_t ino;
+ uint32_t xid;
+ uint32_t flags;
+ uint32_t mask;
+};
+
+struct vcmd_ctx_iattr_v1 {
+ const char __user *name;
+ uint32_t xid;
+ uint32_t flags;
+ uint32_t mask;
+};
+
+
+#define IATTR_XID 0x01000000
+
+#define IATTR_ADMIN 0x00000001
+#define IATTR_WATCH 0x00000002
+#define IATTR_HIDE 0x00000004
+#define IATTR_FLAGS 0x00000007
+
+#define IATTR_BARRIER 0x00010000
+#define IATTR_IUNLINK 0x00020000
+#define IATTR_IMMUTABLE 0x00040000
+
+
+#ifdef CONFIG_PROC_SECURE
+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
+#else
+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
+#endif
+
+#ifdef __KERNEL__
+
+#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1)
+
+extern int vc_get_iattr_v0(uint32_t, void __user *);
+extern int vc_set_iattr_v0(uint32_t, void __user *);
+
+extern int vc_get_iattr(uint32_t, void __user *);
+extern int vc_set_iattr(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+/* inode ioctls */
+
+#define FIOC_GETXFLG _IOR('x', 5, long)
+#define FIOC_SETXFLG _IOW('x', 6, long)
+
+#endif /* _VX_INODE_H */
--- /dev/null
+#ifndef _VX_LEGACY_H
+#define _VX_LEGACY_H
+
+#include "switch.h"
+#include "network.h"
+
+/* compatibiliy vserver commands */
+
+#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1)
+#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3)
+
+#define VCMD_create_context VC_CMD(VSETUP, 1, 0)
+
+/* compatibiliy vserver arguments */
+
+struct vcmd_new_s_context_v1 {
+ uint32_t remove_cap;
+ uint32_t flags;
+};
+
+struct vcmd_set_ipv4root_v3 {
+ /* number of pairs in id */
+ uint32_t broadcast;
+ struct {
+ uint32_t ip;
+ uint32_t mask;
+ } nx_mask_pair[NB_IPV4ROOT];
+};
+
+
+#define VX_INFO_LOCK 1 /* Can't request a new vx_id */
+#define VX_INFO_NPROC 4 /* Limit number of processes in a context */
+#define VX_INFO_PRIVATE 8 /* Noone can join this security context */
+#define VX_INFO_INIT 16 /* This process wants to become the */
+ /* logical process 1 of the security */
+ /* context */
+#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */
+#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */
+ /* to become the global limits */
+ /* of the context */
+#define VX_INFO_NAMESPACE 128 /* save private namespace */
+
+
+#define NB_S_CONTEXT 16
+
+#define NB_IPV4ROOT 16
+
+
+#ifdef __KERNEL__
+extern int vc_new_s_context(uint32_t, void __user *);
+extern int vc_set_ipv4root(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_LEGACY_H */
--- /dev/null
+#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+/* context sub struct */
+
+#define RLIMIT_OPENFD 12
+
+#define NUM_RLIMITS 16
+
+#define VLIMIT_SOCK 16
+
+
+struct _vx_limit {
+ atomic_t ticks;
+
+ unsigned long rlim[NUM_RLIMITS]; /* Per context limit */
+ atomic_t res[NUM_RLIMITS]; /* Current value */
+};
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+ int lim;
+
+ for (lim=0; lim<NUM_RLIMITS; lim++) {
+ limit->rlim[lim] = RLIM_INFINITY;
+ atomic_set(&limit->res[lim], 0);
+ }
+}
+
+extern unsigned int vx_debug_limit;
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+ int lim, value;
+
+ for (lim=0; lim<NUM_RLIMITS; lim++) {
+ value = atomic_read(&limit->res[lim]);
+ if (value && vx_debug_limit)
+ printk("!!! limit: %p[%d] = %d on exit.\n",
+ limit, lim, value);
+ }
+}
+
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+ return sprintf(buffer,
+ "PROC:\t%8d/%ld\n"
+ "VM:\t%8d/%ld\n"
+ "VML:\t%8d/%ld\n"
+ "RSS:\t%8d/%ld\n"
+ "FILES:\t%8d/%ld\n"
+ "OFD:\t%8d/%ld\n"
+ ,atomic_read(&limit->res[RLIMIT_NPROC])
+ ,limit->rlim[RLIMIT_NPROC]
+ ,atomic_read(&limit->res[RLIMIT_AS])
+ ,limit->rlim[RLIMIT_AS]
+ ,atomic_read(&limit->res[RLIMIT_MEMLOCK])
+ ,limit->rlim[RLIMIT_MEMLOCK]
+ ,atomic_read(&limit->res[RLIMIT_RSS])
+ ,limit->rlim[RLIMIT_RSS]
+ ,atomic_read(&limit->res[RLIMIT_NOFILE])
+ ,limit->rlim[RLIMIT_NOFILE]
+ ,atomic_read(&limit->res[RLIMIT_OPENFD])
+ ,limit->rlim[RLIMIT_OPENFD]
+ );
+}
+
+#else /* _VX_INFO_DEF_ */
+#ifndef _VX_LIMIT_H
+#define _VX_LIMIT_H
+
+#include "switch.h"
+
+/* rlimit vserver commands */
+
+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
+
+struct vcmd_ctx_rlimit_v0 {
+ uint32_t id;
+ uint64_t minimum;
+ uint64_t softlimit;
+ uint64_t maximum;
+};
+
+struct vcmd_ctx_rlimit_mask_v0 {
+ uint32_t minimum;
+ uint32_t softlimit;
+ uint32_t maximum;
+};
+
+#define CRLIM_UNSET (0ULL)
+#define CRLIM_INFINITY (~0ULL)
+#define CRLIM_KEEP (~1ULL)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
+struct sysinfo;
+
+void vx_vsi_meminfo(struct sysinfo *);
+void vx_vsi_swapinfo(struct sysinfo *);
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_LIMIT_H */
+#endif
--- /dev/null
+#ifndef _VX_NAMESPACE_H
+#define _VX_NAMESPACE_H
+
+#include <linux/types.h>
+
+
+/* virtual host info names */
+
+#define VCMD_vx_set_vhi_name VC_CMD(VHOST, 1, 0)
+#define VCMD_vx_get_vhi_name VC_CMD(VHOST, 2, 0)
+
+struct vcmd_vx_vhi_name_v0 {
+ uint32_t field;
+ char name[65];
+};
+
+
+enum vx_vhi_name_field {
+ VHIN_CONTEXT=0,
+ VHIN_SYSNAME,
+ VHIN_NODENAME,
+ VHIN_RELEASE,
+ VHIN_VERSION,
+ VHIN_MACHINE,
+ VHIN_DOMAINNAME,
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_set_vhi_name(uint32_t, void __user *);
+extern int vc_get_vhi_name(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_enter_namespace VC_CMD(PROCALT, 1, 0)
+#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0)
+#define VCMD_set_namespace VC_CMD(PROCALT, 3, 0)
+
+#ifdef __KERNEL__
+
+struct vx_info;
+struct namespace;
+struct fs_struct;
+
+extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
+
+extern int vc_enter_namespace(uint32_t, void __user *);
+extern int vc_cleanup_namespace(uint32_t, void __user *);
+extern int vc_set_namespace(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_NAMESPACE_H */
--- /dev/null
+#ifndef _VX_NETWORK_H
+#define _VX_NETWORK_H
+
+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
+
+#define IP_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */
+
+#define NB_IPV4ROOT 16
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/utsname.h>
+#include <asm/resource.h>
+#include <asm/atomic.h>
+
+
+struct nx_info {
+ struct list_head nx_list; /* linked list of nxinfos */
+ nid_t nx_id; /* vnet id */
+ atomic_t nx_refcount;
+
+ uint64_t nx_flags; /* network flag word */
+ uint64_t nx_ncaps; /* network capabilities */
+
+ int nbipv4;
+ __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */
+ /* The first one is used to connect */
+ /* and for bind any service */
+ /* The other must be used explicity */
+ __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */
+ /* Used to select the proper source */
+ /* address for sockets */
+ __u32 v4_bcast; /* Broadcast address to receive UDP */
+
+ char nx_name[65]; /* network context name */
+};
+
+
+extern spinlock_t nxlist_lock;
+extern struct list_head nx_infos;
+
+
+void free_nx_info(struct nx_info *);
+struct nx_info *create_nx_info(void);
+
+extern struct nx_info *find_nx_info(int);
+extern int nx_info_id_valid(int);
+
+struct in_ifaddr;
+struct net_device;
+
+int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *);
+int dev_in_nx_info(struct net_device *, struct nx_info *);
+
+
+#endif /* __KERNEL__ */
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
+
+struct vcmd_nx_info_v0 {
+ uint32_t nid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
+
+struct vcmd_net_nx_v0 {
+ uint16_t type;
+ uint16_t count;
+ uint32_t ip[4];
+ uint32_t mask[4];
+ /* more to come */
+};
+
+// IPN_TYPE_IPV4
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
+
+struct vcmd_net_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP (1ULL<<32)
+
+
+#define IPF_ONE_TIME (0x0001ULL<<32)
+
+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
+
+struct vcmd_net_caps_v0 {
+ uint64_t ncaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPC_WOSSNAME 0x00000001
+
+
+#endif /* _VX_NETWORK_H */
--- /dev/null
+#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+#include <asm/cpumask.h>
+
+/* context sub struct */
+
+struct _vx_sched {
+ spinlock_t tokens_lock; /* lock for this structure */
+
+ int fill_rate; /* Fill rate: add X tokens... */
+ int interval; /* Divisor: per Y jiffies */
+ atomic_t tokens; /* number of CPU tokens in this context */
+ int tokens_min; /* Limit: minimum for unhold */
+ int tokens_max; /* Limit: no more than N tokens */
+ uint32_t jiffies; /* add an integral multiple of Y to this */
+
+ uint64_t ticks; /* token tick events */
+ cpumask_t cpus_allowed; /* cpu mask for context */
+};
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+ /* scheduling; hard code starting values as constants */
+ sched->fill_rate = 1;
+ sched->interval = 4;
+ sched->tokens_min = HZ >> 4;
+ sched->tokens_max = HZ >> 1;
+ sched->jiffies = jiffies;
+ sched->tokens_lock = SPIN_LOCK_UNLOCKED;
+
+ atomic_set(&sched->tokens, HZ >> 2);
+ sched->cpus_allowed = CPU_MASK_ALL;
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+ return;
+}
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+ return sprintf(buffer,
+ "Ticks:\t%16lld\n"
+ "Token:\t\t%8d\n"
+ "FillRate:\t%8d\n"
+ "Interval:\t%8d\n"
+ "TokensMin:\t%8d\n"
+ "TokensMax:\t%8d\n"
+ ,sched->ticks
+ ,atomic_read(&sched->tokens)
+ ,sched->fill_rate
+ ,sched->interval
+ ,sched->tokens_min
+ ,sched->tokens_max
+ );
+}
+
+
+#else /* _VX_INFO_DEF_ */
+#ifndef _VX_SCHED_H
+#define _VX_SCHED_H
+
+#include "switch.h"
+
+/* sched vserver commands */
+
+#define VCMD_set_sched VC_CMD(SCHED, 1, 2)
+
+struct vcmd_set_sched_v2 {
+ int32_t fill_rate;
+ int32_t interval;
+ int32_t tokens;
+ int32_t tokens_min;
+ int32_t tokens_max;
+ uint64_t cpu_mask;
+};
+
+#define SCHED_KEEP (-2)
+
+#ifdef __KERNEL__
+
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
+
+
+#define VAVAVOOM_RATIO 50
+
+#include "context.h"
+
+
+/* scheduling stuff */
+
+int effective_vavavoom(struct task_struct *, int);
+
+int vx_tokens_recalc(struct vx_info *);
+
+/* new stuff ;) */
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+ return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+ atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+ struct vx_info *vxi = p->vx_info;
+
+ if (vxi) {
+ int tokens;
+
+ p->time_slice--;
+ if (atomic_read(&vxi->vx_refcount) < 1)
+ printk("need_resched: p=%p, s=%ld, ref=%d, id=%d/%d\n",
+ p, p->state, atomic_read(&vxi->vx_refcount),
+ vxi->vx_id, p->xid);
+ if ((tokens = vx_tokens_avail(vxi)) > 0)
+ vx_consume_token(vxi);
+ return ((p->time_slice == 0) || (tokens < 1));
+ }
+#endif
+ p->time_slice--;
+ return (p->time_slice == 0);
+}
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_SCHED_H */
+#endif
--- /dev/null
+#ifndef _VX_SIGNAL_H
+#define _VX_SIGNAL_H
+
+#include "switch.h"
+
+/* context signalling */
+
+#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
+
+struct vcmd_ctx_kill_v0 {
+ int32_t pid;
+ int32_t sig;
+};
+
+#ifdef __KERNEL__
+extern int vc_ctx_kill(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_SIGNAL_H */
--- /dev/null
+#ifndef _VX_SWITCH_H
+#define _VX_SWITCH_H
+
+#include <linux/types.h>
+
+#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
+#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
+#define VC_VERSION(c) ((c) & 0xFFF)
+
+#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
+ | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
+
+/*
+
+ Syscall Matrix V2.6
+
+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
+ |INFO |SETUP | |MOVE | | | | | |
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| |
+ HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
+ PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ MEMORY | | | | | | | |SWAP | |
+ | 16| 17| 18| 19| 20| 21| | 22| 23|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
+ | 24| 25| 26| 27| 28| 29| | 30| 31|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ DISK | | | | | | | |INODE | |
+ VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ OTHER | | | | | | | |VINFO | |
+ | 40| 41| 42| 43| 44| 45| | 46| 47|
+ =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
+ SPECIAL| | | | |FLAGS | | | | |
+ | 48| 49| 50| 51| 52| 53| | 54| 55|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+ SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT |
+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+
+*/
+
+#define VC_CAT_VERSION 0
+
+#define VC_CAT_VSETUP 1
+#define VC_CAT_VHOST 2
+
+#define VC_CAT_VPROC 9
+#define VC_CAT_PROCALT 10
+#define VC_CAT_PROCMIG 11
+#define VC_CAT_PROCTRL 12
+
+#define VC_CAT_SCHED 14
+
+#define VC_CAT_VNET 25
+#define VC_CAT_NETALT 26
+#define VC_CAT_NETMIG 27
+#define VC_CAT_NETCTRL 28
+
+#define VC_CAT_INODE 38
+
+#define VC_CAT_VINFO 46
+
+#define VC_CAT_FLAGS 52
+#define VC_CAT_RLIMIT 60
+
+#define VC_CAT_SYSTEST 61
+#define VC_CAT_COMPAT 63
+
+/* interface version */
+
+#define VCI_VERSION 0x00010016
+
+
+/* query version */
+
+#define VCMD_get_version VC_CMD(VERSION, 0, 0)
+
+
+#ifdef __KERNEL__
+
+#include <linux/errno.h>
+
+#define ENOTSUP -EOPNOTSUPP
+
+#else /* __KERNEL__ */
+#define __user
+#endif /* __KERNEL__ */
+
+#endif /* _VX_SWITCH_H */
--- /dev/null
+#ifndef _LINUX_XID_H_
+#define _LINUX_XID_H_
+
+#ifdef CONFIG_INOXID_NONE
+
+#define MAX_UID 0xFFFFFFFF
+#define MAX_GID 0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid) (0)
+
+#define XIDINO_UID(uid, xid) (uid)
+#define XIDINO_GID(gid, xid) (gid)
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID16
+
+#define MAX_UID 0xFFFFFFFF
+#define MAX_GID 0x0000FFFF
+
+#define INOXID_XID(uid, gid, xid) (((gid) >> 16) & 0xFFFF)
+
+#define XIDINO_UID(uid, xid) (uid)
+#define XIDINO_GID(gid, xid) (((gid) & 0xFFFF) | ((xid) << 16))
+
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID24
+
+#define MAX_UID 0x00FFFFFF
+#define MAX_GID 0x00FFFFFF
+
+#define INOXID_XID(uid, gid, xid) ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF))
+
+#define XIDINO_UID(uid, xid) (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16))
+#define XIDINO_GID(gid, xid) (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24))
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID32
+
+#define MAX_UID 0xFFFFFFFF
+#define MAX_GID 0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid) (xid)
+
+#define XIDINO_UID(uid, xid) (uid)
+#define XIDINO_GID(gid, xid) (gid)
+
+#endif
+
+
+#ifdef CONFIG_INOXID_RUNTIME
+
+#define MAX_UID 0xFFFFFFFF
+#define MAX_GID 0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid) (0)
+
+#define XIDINO_UID(uid, xid) (uid)
+#define XIDINO_GID(gid, xid) (gid)
+
+#endif
+
+
+#define INOXID_UID(uid, gid) ((uid) & MAX_UID)
+#define INOXID_GID(uid, gid) ((gid) & MAX_GID)
+
+static inline uid_t vx_map_uid(uid_t uid)
+{
+ if ((uid > MAX_UID) && (uid != -1))
+ uid = -2;
+ return (uid & MAX_UID);
+}
+
+static inline gid_t vx_map_gid(gid_t gid)
+{
+ if ((gid > MAX_GID) && (gid != -1))
+ gid = -2;
+ return (gid & MAX_GID);
+}
+
+
+#ifdef CONFIG_VSERVER_LEGACY
+#define FIOC_GETXID _IOR('x', 1, long)
+#define FIOC_SETXID _IOW('x', 2, long)
+#define FIOC_SETXIDJ _IOW('x', 3, long)
+#endif
+
+#endif /* _LINUX_XID_H_ */
--- /dev/null
+#
+# Linux VServer configuration
+#
+
+menu "Linux VServer"
+
+config VSERVER_LEGACY
+ bool "Enable Legacy Kernel API"
+ default y
+ help
+ This enables the legacy API used in vs1.xx, which allows
+ to use older tools (for migration purposes).
+
+config PROC_SECURE
+ bool "Enable Proc Security"
+ depends on PROC_FS
+ default y
+ help
+ Hide proc entries by default for xid>1
+
+config VSERVER_HARDCPU
+ bool "Enable Hard CPU Limits"
+ depends on EXPERIMENTAL
+ default n
+ help
+ Activate the Hard CPU Limits
+
+choice
+ prompt "Persistent Inode Context Tagging"
+ default INOXID_GID24
+ help
+ This adds persistent context information to filesystems
+ mounted with the tagxid option. Tagging is a requirement
+ for per context disk limits and per context quota.
+
+
+config INOXID_NONE
+ bool "Disabled"
+ help
+ no context information is store for inodes
+
+config INOXID_GID16
+ bool "UID32/GID16"
+ help
+ reduces GID to 16 bit, but leaves UID at 32 bit.
+
+config INOXID_GID24
+ bool "UID24/GID24"
+ help
+ uses the upper 8bit from UID and GID for XID tagging
+ which leaves 24bit for UID/GID each, which should be
+ more than sufficient for normal use.
+
+config INOXID_GID32
+ bool "UID32/GID32"
+ help
+ this uses otherwise reserved inode fields in the on
+ disk representation, which limits the use to a few
+ filesystems (currently ext2 and ext3)
+
+config INOXID_MAGIC
+ bool "Runtime"
+ depends on EXPERIMENTAL
+ help
+ inodes are tagged when first accessed, this doesn't
+ require any persistant information, but might give
+ funny results for mixed access.
+
+endchoice
+
+endmenu
+
--- /dev/null
+#
+# Makefile for the Linux vserver routines.
+#
+
+
+obj-y += vserver.o
+
+vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \
+ limit.o cvirt.o signal.o proc.o sysctl.o init.o
+
+vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
+
--- /dev/null
+/*
+ * linux/kernel/vserver/context.c
+ *
+ * Virtual Server: Context Support
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 context helper
+ * V0.02 vx_ctx_kill syscall command
+ * V0.03 replaced context_info calls
+ * V0.04 redesign of struct (de)alloc
+ * V0.05 rlimit basic implementation
+ * V0.06 task_xid and info commands
+ * V0.07 context flags and caps
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vinline.h>
+#include <linux/kernel_stat.h>
+#include <linux/namespace.h>
+
+#include <asm/errno.h>
+
+
+/* system functions */
+
+
+LIST_HEAD(vx_infos);
+
+spinlock_t vxlist_lock
+ __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * struct vx_info allocation and deallocation
+ */
+
+static struct vx_info *alloc_vx_info(int id)
+{
+ struct vx_info *new = NULL;
+
+ vxdprintk("alloc_vx_info(%d)\n", id);
+ /* would this benefit from a slab cache? */
+ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
+ if (!new)
+ return 0;
+
+ memset (new, 0, sizeof(struct vx_info));
+ new->vx_id = id;
+ INIT_LIST_HEAD(&new->vx_list);
+ /* rest of init goes here */
+
+ vx_info_init_limit(&new->limit);
+ vx_info_init_sched(&new->sched);
+ vx_info_init_cvirt(&new->cvirt);
+ vx_info_init_cacct(&new->cacct);
+
+ new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
+ new->vx_bcaps = CAP_INIT_EFF_SET;
+ new->vx_ccaps = 0;
+
+ vxdprintk("alloc_vx_info(%d) = %p\n", id, new);
+ return new;
+}
+
+void free_vx_info(struct vx_info *vxi)
+{
+ vxdprintk("free_vx_info(%p)\n", vxi);
+ if (vxi->vx_namespace)
+ put_namespace(vxi->vx_namespace);
+ if (vxi->vx_fs)
+ put_fs_struct(vxi->vx_fs);
+
+ vx_info_exit_limit(&vxi->limit);
+ vx_info_exit_sched(&vxi->sched);
+ vx_info_exit_cvirt(&vxi->cvirt);
+ vx_info_exit_cacct(&vxi->cacct);
+
+ BUG_ON(atomic_read(&vxi->vx_refcount));
+ vxi->vx_id = -1;
+
+ kfree(vxi);
+}
+
+
+/*
+ * struct vx_info search by id
+ * assumes vxlist_lock is held
+ */
+
+static __inline__ struct vx_info *__find_vx_info(int id)
+{
+ struct vx_info *vxi;
+
+ list_for_each_entry(vxi, &vx_infos, vx_list)
+ if (vxi->vx_id == id)
+ return vxi;
+ return 0;
+}
+
+
+/*
+ * struct vx_info ref stuff
+ */
+
+struct vx_info *find_vx_info(int id)
+{
+ struct vx_info *vxi;
+
+ if (id < 0) {
+ vxi = current->vx_info;
+ get_vx_info(vxi);
+ } else {
+ spin_lock(&vxlist_lock);
+ if ((vxi = __find_vx_info(id)))
+ get_vx_info(vxi);
+ spin_unlock(&vxlist_lock);
+ }
+ return vxi;
+}
+
+/*
+ * verify that id is a valid xid
+ */
+
+int vx_info_id_valid(int id)
+{
+ int valid;
+
+ spin_lock(&vxlist_lock);
+ valid = (__find_vx_info(id) != NULL);
+ spin_unlock(&vxlist_lock);
+ return valid;
+}
+
+
+/*
+ * dynamic context id ...
+ */
+
+static __inline__ xid_t __vx_dynamic_id(void)
+{
+ static xid_t seq = MAX_S_CONTEXT;
+ xid_t barrier = seq;
+
+ do {
+ if (++seq > MAX_S_CONTEXT)
+ seq = MIN_D_CONTEXT;
+ if (!__find_vx_info(seq))
+ return seq;
+ } while (barrier != seq);
+ return 0;
+}
+
+static struct vx_info * __foc_vx_info(int id, int *err)
+{
+ struct vx_info *new, *vxi = NULL;
+
+ vxdprintk("foc_vx_info(%d)\n", id);
+ if (!(new = alloc_vx_info(id))) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ /* dirty hack until Spectator becomes a cap */
+ if (id == 0 || id == 1) {
+ *err = -EBUSY;
+ return NULL;
+ }
+
+ spin_lock(&vxlist_lock);
+
+ /* dynamic context requested */
+ if (id == VX_DYNAMIC_ID) {
+ id = __vx_dynamic_id();
+ if (!id) {
+ printk(KERN_ERR "no dynamic context available.\n");
+ goto out_unlock;
+ }
+ new->vx_id = id;
+ }
+ /* existing context requested */
+ else if ((vxi = __find_vx_info(id))) {
+ /* context in setup is not available */
+ if (vxi->vx_flags & VXF_STATE_SETUP) {
+ vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi);
+ vxi = NULL;
+ *err = -EBUSY;
+ } else {
+ vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi);
+ get_vx_info(vxi);
+ *err = 0;
+ }
+ goto out_unlock;
+ }
+
+ /* new context requested */
+ vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new);
+ atomic_set(&new->vx_refcount, 1);
+ list_add(&new->vx_list, &vx_infos);
+ vxi = new, new = NULL;
+ *err = 1;
+
+out_unlock:
+ spin_unlock(&vxlist_lock);
+ if (new)
+ free_vx_info(new);
+ return vxi;
+}
+
+
+struct vx_info *find_or_create_vx_info(int id)
+{
+ int err;
+
+ return __foc_vx_info(id, &err);
+}
+
+
+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
+{
+ struct user_struct *new_user, *old_user;
+
+ if (!p || !vxi)
+ BUG();
+ new_user = alloc_uid(vxi->vx_id, p->uid);
+ if (!new_user)
+ return -ENOMEM;
+
+ old_user = p->user;
+ if (new_user != old_user) {
+ atomic_inc(&new_user->processes);
+ atomic_dec(&old_user->processes);
+ p->user = new_user;
+ }
+ free_uid(old_user);
+ return 0;
+}
+
+void vx_mask_bcaps(struct task_struct *p)
+{
+ struct vx_info *vxi = p->vx_info;
+
+ p->cap_effective &= vxi->vx_bcaps;
+ p->cap_inheritable &= vxi->vx_bcaps;
+ p->cap_permitted &= vxi->vx_bcaps;
+}
+
+
+#include <linux/file.h>
+
+static inline int vx_nofiles_task(struct task_struct *tsk)
+{
+ struct files_struct *files = tsk->files;
+ const unsigned long *obptr, *cbptr;
+ int count, total;
+
+ spin_lock(&files->file_lock);
+ obptr = files->open_fds->fds_bits;
+ cbptr = files->close_on_exec->fds_bits;
+ count = files->max_fds / (sizeof(unsigned long) * 8);
+ for (total = 0; count > 0; count--) {
+ if (*obptr)
+ total += hweight_long(*obptr);
+ obptr++;
+ /* if (*cbptr)
+ total += hweight_long(*cbptr);
+ cbptr++; */
+ }
+ spin_unlock(&files->file_lock);
+ return total;
+}
+
+static inline int vx_openfd_task(struct task_struct *tsk)
+{
+ struct files_struct *files = tsk->files;
+ const unsigned long *bptr;
+ int count, total;
+
+ spin_lock(&files->file_lock);
+ bptr = files->open_fds->fds_bits;
+ count = files->max_fds / (sizeof(unsigned long) * 8);
+ for (total = 0; count > 0; count--) {
+ if (*bptr)
+ total += hweight_long(*bptr);
+ bptr++;
+ }
+ spin_unlock(&files->file_lock);
+ return total;
+}
+
+/*
+ * migrate task to new context
+ * gets vxi, puts old_vxi on change
+ */
+
+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
+{
+ struct vx_info *old_vxi = task_get_vx_info(p);
+ int ret = 0;
+
+ if (!p || !vxi)
+ BUG();
+
+ vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
+ vxi->vx_id, atomic_read(&vxi->vx_refcount));
+ if (old_vxi == vxi)
+ goto out;
+
+ if (!(ret = vx_migrate_user(p, vxi))) {
+ task_lock(p);
+ if (old_vxi) {
+ atomic_dec(&old_vxi->cacct.nr_threads);
+ atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]);
+ }
+ atomic_inc(&vxi->cacct.nr_threads);
+ atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
+ atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]);
+ atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]);
+ set_vx_info(&p->vx_info, vxi);
+ p->xid = vxi->vx_id;
+ vx_mask_bcaps(p);
+ task_unlock(p);
+
+ put_vx_info(old_vxi);
+ }
+out:
+ put_vx_info(old_vxi);
+ return ret;
+}
+
+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
+{
+ if (!vxi)
+ return -EINVAL;
+ if (vxi->vx_initpid)
+ return -EPERM;
+
+ vxi->vx_initpid = p->tgid;
+ return 0;
+}
+
+
+/* vserver syscall commands below here */
+
+/* taks xid and vx_info functions */
+
+#include <asm/uaccess.h>
+
+
+int vc_task_xid(uint32_t id, void __user *data)
+{
+ xid_t xid;
+
+ if (id) {
+ struct task_struct *tsk;
+
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return -EPERM;
+
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(id);
+ xid = (tsk) ? tsk->xid : -ESRCH;
+ read_unlock(&tasklist_lock);
+ }
+ else
+ xid = current->xid;
+ return xid;
+}
+
+
+int vc_vx_info(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_vx_info_v0 vc_data;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ vc_data.xid = vxi->vx_id;
+ vc_data.initpid = vxi->vx_initpid;
+ put_vx_info(vxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+
+/* context functions */
+
+int vc_ctx_create(uint32_t xid, void __user *data)
+{
+ // int ret = -ENOMEM;
+ struct vx_info *new_vxi;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
+ return -EINVAL;
+
+ if (xid < 1)
+ return -EINVAL;
+
+ new_vxi = __foc_vx_info(xid, &ret);
+ if (!new_vxi)
+ return ret;
+ if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
+ ret = -EEXIST;
+ goto out_put;
+ }
+
+ ret = new_vxi->vx_id;
+ vx_migrate_task(current, new_vxi);
+out_put:
+ put_vx_info(new_vxi);
+ return ret;
+}
+
+
+int vc_ctx_migrate(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* dirty hack until Spectator becomes a cap */
+ if (id == 1) {
+ current->xid = 1;
+ return 0;
+ }
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+ vx_migrate_task(current, vxi);
+ put_vx_info(vxi);
+ return 0;
+}
+
+
+int vc_get_cflags(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_flags_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ vc_data.flagword = vxi->vx_flags;
+
+ // vc_data.mask = ~0UL;
+ /* special STATE flag handling */
+ vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
+
+ put_vx_info(vxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+int vc_set_cflags(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_flags_v0 vc_data;
+ uint64_t mask, trigger;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ /* special STATE flag handling */
+ mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
+ trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
+
+ if (trigger & VXF_STATE_SETUP)
+ vx_mask_bcaps(current);
+ if (trigger & VXF_STATE_INIT)
+ if (vxi == current->vx_info)
+ vx_set_init(vxi, current);
+
+ vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
+ vc_data.flagword, mask);
+ put_vx_info(vxi);
+ return 0;
+}
+
+int vc_get_ccaps(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_caps_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ vc_data.bcaps = vxi->vx_bcaps;
+ vc_data.ccaps = vxi->vx_ccaps;
+ vc_data.cmask = ~0UL;
+ put_vx_info(vxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+int vc_set_ccaps(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_caps_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ vxi->vx_bcaps &= vc_data.bcaps;
+ vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
+ vc_data.ccaps, vc_data.cmask);
+ put_vx_info(vxi);
+ return 0;
+}
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(free_vx_info);
+EXPORT_SYMBOL_GPL(vxlist_lock);
+
--- /dev/null
+/*
+ * linux/kernel/vserver/cvirt.c
+ *
+ * Virtual Server: Context Virtualization
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 broken out from limit.c
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/cvirt.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
+{
+ struct vx_info *vxi = current->vx_info;
+
+ set_normalized_timespec(uptime,
+ uptime->tv_sec - vxi->cvirt.bias_tp.tv_sec,
+ uptime->tv_nsec - vxi->cvirt.bias_tp.tv_nsec);
+ if (!idle)
+ return;
+ set_normalized_timespec(idle,
+ idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
+ idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
+ return;
+}
+
+uint64_t vx_idle_jiffies()
+{
+ return init_task.utime + init_task.stime;
+}
+
--- /dev/null
+/*
+ * linux/kernel/init.c
+ *
+ * Virtual Server Init
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 basic structure
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/vserver.h>
+// #include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+int vserver_register_sysctl(void);
+void vserver_unregister_sysctl(void);
+
+
+static int __init init_vserver(void)
+{
+ int ret = 0;
+
+ vserver_register_sysctl();
+ return ret;
+}
+
+
+static void __exit exit_vserver(void)
+{
+
+ vserver_unregister_sysctl();
+ return;
+}
+
+
+module_init(init_vserver);
+module_exit(exit_vserver);
+
--- /dev/null
+/*
+ * linux/kernel/vserver/inode.c
+ *
+ * Virtual Server: File System Support
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 separated from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vinline.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/namei.h>
+#include <linux/vserver/inode.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask)
+{
+ if (!in || !in->i_sb)
+ return -ESRCH;
+
+ *flags = IATTR_XID
+ | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
+ | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0)
+ | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0);
+ *mask = IATTR_IUNLINK | IATTR_IMMUTABLE;
+
+ if (S_ISDIR(in->i_mode))
+ *mask |= IATTR_BARRIER;
+
+ if (in->i_sb->s_flags & MS_TAGXID) {
+ *xid = in->i_xid;
+ *mask |= IATTR_XID;
+ }
+
+ if (in->i_sb->s_magic == PROC_SUPER_MAGIC) {
+ struct proc_dir_entry *entry = PROC_I(in)->pde;
+
+ // check for specific inodes ?
+ if (entry)
+ *mask |= IATTR_FLAGS;
+ if (entry)
+ *flags |= (entry->vx_flags & IATTR_FLAGS);
+ else
+ *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
+ }
+ return 0;
+}
+
+int vc_get_iattr(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_iattr_v1 vc_data;
+ int ret;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ ret = __vc_get_iattr(nd.dentry->d_inode,
+ &vc_data.xid, &vc_data.flags, &vc_data.mask);
+ path_release(&nd);
+ }
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ ret = -EFAULT;
+ return ret;
+}
+
+static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask)
+{
+ struct inode *in = de->d_inode;
+ int error = 0, is_proc = 0;
+
+ if (!in || !in->i_sb)
+ return -ESRCH;
+
+ is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
+ if ((*mask & IATTR_FLAGS) && !is_proc)
+ return -EINVAL;
+ if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID))
+ return -EINVAL;
+
+ down(&in->i_sem);
+ if (*mask & IATTR_XID)
+ in->i_xid = *xid;
+
+ if (*mask & IATTR_FLAGS) {
+ struct proc_dir_entry *entry = PROC_I(in)->pde;
+ unsigned int iflags = PROC_I(in)->vx_flags;
+
+ iflags = (iflags & ~(*mask & IATTR_FLAGS))
+ | (*flags & IATTR_FLAGS);
+ PROC_I(in)->vx_flags = iflags;
+ if (entry)
+ entry->vx_flags = iflags;
+ }
+
+ if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) {
+ struct iattr attr;
+
+ attr.ia_valid = ATTR_ATTR_FLAG;
+ attr.ia_attr_flags =
+ (IS_IMMUTABLE(in) ? ATTR_FLAG_IMMUTABLE : 0) |
+ (IS_IUNLINK(in) ? ATTR_FLAG_IUNLINK : 0) |
+ (IS_BARRIER(in) ? ATTR_FLAG_BARRIER : 0);
+
+ if (*mask & IATTR_IMMUTABLE) {
+ if (*flags & IATTR_IMMUTABLE)
+ attr.ia_attr_flags |= ATTR_FLAG_IMMUTABLE;
+ else
+ attr.ia_attr_flags &= ~ATTR_FLAG_IMMUTABLE;
+ }
+ if (*mask & IATTR_IUNLINK) {
+ if (*flags & IATTR_IUNLINK)
+ attr.ia_attr_flags |= ATTR_FLAG_IUNLINK;
+ else
+ attr.ia_attr_flags &= ~ATTR_FLAG_IUNLINK;
+ }
+ if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
+ if (*flags & IATTR_BARRIER)
+ attr.ia_attr_flags |= ATTR_FLAG_BARRIER;
+ else
+ attr.ia_attr_flags &= ~ATTR_FLAG_BARRIER;
+ }
+ if (in->i_op && in->i_op->setattr)
+ error = in->i_op->setattr(de, &attr);
+ else {
+ error = inode_change_ok(in, &attr);
+ if (!error)
+ error = inode_setattr(in, &attr);
+ }
+ }
+
+ mark_inode_dirty(in);
+ up(&in->i_sem);
+ return 0;
+}
+
+int vc_set_iattr(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_iattr_v1 vc_data;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ ret = __vc_set_iattr(nd.dentry,
+ &vc_data.xid, &vc_data.flags, &vc_data.mask);
+ path_release(&nd);
+ }
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ ret = -EFAULT;
+ return ret;
+}
+
+
+#ifdef CONFIG_VSERVER_LEGACY
+#include <linux/proc_fs.h>
+
+#define PROC_DYNAMIC_FIRST 0xF0000000UL
+
+int vx_proc_ioctl(struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct proc_dir_entry *entry;
+ int error = 0;
+ int flags;
+
+ if (inode->i_ino < PROC_DYNAMIC_FIRST)
+ return -ENOTTY;
+
+ entry = PROC_I(inode)->pde;
+
+ switch(cmd) {
+ case FIOC_GETXFLG: {
+ /* fixme: if stealth, return -ENOTTY */
+ error = -EPERM;
+ flags = entry->vx_flags;
+ if (capable(CAP_CONTEXT))
+ error = put_user(flags, (int *) arg);
+ break;
+ }
+ case FIOC_SETXFLG: {
+ /* fixme: if stealth, return -ENOTTY */
+ error = -EPERM;
+ if (!capable(CAP_CONTEXT))
+ break;
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ break;
+ error = -EFAULT;
+ if (get_user(flags, (int *) arg))
+ break;
+ error = 0;
+ entry->vx_flags = flags;
+ break;
+ }
+ default:
+ return -ENOTTY;
+ }
+ return error;
+}
+#endif
+
--- /dev/null
+/*
+ * linux/kernel/vserver/legacy.c
+ *
+ * Virtual Server: Legacy Funtions
+ *
+ * Copyright (C) 2001-2003 Jacques Gelinas
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 broken out from vcontext.c V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vserver.h>
+#include <linux/sched.h>
+#include <linux/namespace.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+
+static int vx_set_initpid(struct vx_info *vxi, int pid)
+{
+ if (vxi->vx_initpid)
+ return -EPERM;
+
+ vxi->vx_initpid = pid;
+ return 0;
+}
+
+int vc_new_s_context(uint32_t ctx, void __user *data)
+{
+ int ret = -ENOMEM;
+ struct vcmd_new_s_context_v1 vc_data;
+ struct vx_info *new_vxi;
+
+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ /* legacy hack, will be removed soon */
+ if (ctx == -2) {
+ /* assign flags and initpid */
+ if (!current->vx_info)
+ return -EINVAL;
+ ret = 0;
+ if (vc_data.flags & VX_INFO_INIT)
+ ret = vx_set_initpid(current->vx_info, current->tgid);
+ if (ret == 0) {
+ /* We keep the same vx_id, but lower the capabilities */
+ current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
+ // current->cap_bset &= (~vc_data.remove_cap);
+ ret = vx_current_xid();
+ current->vx_info->vx_flags |= vc_data.flags;
+ }
+ return ret;
+ }
+
+ if (!vx_check(0, VX_ADMIN) ||
+ !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_LOCK, 0))
+ return -EPERM;
+
+ /* ugly hack for Spectator */
+ if (ctx == 1) {
+ current->xid = 1;
+ return 0;
+ }
+
+ if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) ||
+ (ctx == 0))
+ return -EINVAL;
+
+ if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT))
+ new_vxi = find_or_create_vx_info(ctx);
+ else
+ new_vxi = find_vx_info(ctx);
+
+ if (!new_vxi)
+ return -EINVAL;
+ new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT);
+
+ ret = vx_migrate_task(current, new_vxi);
+ if (ret == 0) {
+ current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
+ // current->cap_bset &= (~vc_data.remove_cap);
+ new_vxi->vx_flags |= vc_data.flags;
+ if (vc_data.flags & VX_INFO_INIT)
+ vx_set_initpid(new_vxi, current->tgid);
+ if (vc_data.flags & VX_INFO_NAMESPACE)
+ vx_set_namespace(new_vxi,
+ current->namespace, current->fs);
+ if (vc_data.flags & VX_INFO_NPROC)
+ new_vxi->limit.rlim[RLIMIT_NPROC] =
+ current->rlim[RLIMIT_NPROC].rlim_max;
+ ret = new_vxi->vx_id;
+ }
+ put_vx_info(new_vxi);
+ return ret;
+}
+
+
+
+/* set ipv4 root (syscall) */
+
+int vc_set_ipv4root(uint32_t nbip, void __user *data)
+{
+ int i, err = -EPERM;
+ struct vcmd_set_ipv4root_v3 vc_data;
+ struct nx_info *new_nxi, *nxi = current->nx_info;
+
+ if (nbip < 0 || nbip > NB_IPV4ROOT)
+ return -EINVAL;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN))
+ // We are allowed to change everything
+ err = 0;
+ else if (nxi) {
+ int found = 0;
+
+ // We are allowed to select a subset of the currently
+ // installed IP numbers. No new one allowed
+ // We can't change the broadcast address though
+ for (i=0; i<nbip; i++) {
+ int j;
+ __u32 nxip = vc_data.nx_mask_pair[i].ip;
+ for (j=0; j<nxi->nbipv4; j++) {
+ if (nxip == nxi->ipv4[j]) {
+ found++;
+ break;
+ }
+ }
+ }
+ if ((found == nbip) &&
+ (vc_data.broadcast == nxi->v4_bcast))
+ err = 0;
+ }
+ if (err)
+ return err;
+
+ new_nxi = create_nx_info();
+ if (!new_nxi)
+ return -EINVAL;
+
+ new_nxi->nbipv4 = nbip;
+ for (i=0; i<nbip; i++) {
+ new_nxi->ipv4[i] = vc_data.nx_mask_pair[i].ip;
+ new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
+ }
+ new_nxi->v4_bcast = vc_data.broadcast;
+ current->nx_info = new_nxi;
+ current->nid = new_nxi->nx_id;
+ put_nx_info(nxi);
+ return 0;
+}
+
+
--- /dev/null
+/*
+ * linux/kernel/vserver/limit.c
+ *
+ * Virtual Server: Context Limits
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 broken out from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+static int is_valid_rlimit(int id)
+{
+ int valid = 0;
+
+ switch (id) {
+ case RLIMIT_NPROC:
+ case RLIMIT_AS:
+ case RLIMIT_RSS:
+ case RLIMIT_MEMLOCK:
+ case RLIMIT_NOFILE:
+ valid = 1;
+ break;
+ }
+ return valid;
+}
+
+static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id)
+{
+ unsigned long limit;
+
+ limit = vxi->limit.rlim[id];
+ if (limit == RLIM_INFINITY)
+ return CRLIM_INFINITY;
+ return limit;
+}
+
+int vc_get_rlimit(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_rlimit_v0 vc_data;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+ if (!is_valid_rlimit(vc_data.id))
+ return -ENOTSUPP;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ vc_data.maximum = vc_get_rlim(vxi, vc_data.id);
+ vc_data.minimum = CRLIM_UNSET;
+ vc_data.softlimit = CRLIM_UNSET;
+ put_vx_info(vxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+int vc_set_rlimit(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_ctx_rlimit_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+ if (!is_valid_rlimit(vc_data.id))
+ return -ENOTSUPP;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ if (vc_data.maximum != CRLIM_KEEP)
+ vxi->limit.rlim[vc_data.id] = vc_data.maximum;
+ printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum);
+ put_vx_info(vxi);
+
+ return 0;
+}
+
+int vc_get_rlimit_mask(uint32_t id, void __user *data)
+{
+ static struct vcmd_ctx_rlimit_mask_v0 mask = {
+ /* minimum */
+ 0
+ , /* softlimit */
+ 0
+ , /* maximum */
+ (1 << RLIMIT_NPROC) |
+ (1 << RLIMIT_NOFILE) |
+ (1 << RLIMIT_MEMLOCK) |
+ (1 << RLIMIT_AS) |
+ (1 << RLIMIT_RSS)
+ };
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+ if (copy_to_user(data, &mask, sizeof(mask)))
+ return -EFAULT;
+ return 0;
+}
+
+
+void vx_vsi_meminfo(struct sysinfo *val)
+{
+ struct vx_info *vxi = current->vx_info;
+ unsigned long v;
+
+ v = vxi->limit.rlim[RLIMIT_RSS];
+ if (v != RLIM_INFINITY)
+ val->totalram = min(val->totalram, v);
+ v = atomic_read(&vxi->limit.res[RLIMIT_RSS]);
+ val->freeram = (v < val->totalram) ? val->totalram - v : 0;
+ val->bufferram = 0;
+ val->totalhigh = 0;
+ val->freehigh = 0;
+ return;
+}
+
+void vx_vsi_swapinfo(struct sysinfo *val)
+{
+ struct vx_info *vxi = current->vx_info;
+ unsigned long w,v;
+
+ v = vxi->limit.rlim[RLIMIT_RSS];
+ w = vxi->limit.rlim[RLIMIT_AS];
+ if (w != RLIM_INFINITY)
+ val->totalswap = min(val->totalswap, w -
+ ((v != RLIM_INFINITY) ? v : 0));
+ w = atomic_read(&vxi->limit.res[RLIMIT_AS]);
+ val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0;
+ return;
+}
+
--- /dev/null
+/*
+ * linux/kernel/vserver/namespace.c
+ *
+ * Virtual Server: Context Namespace Support
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 broken out from context.c 0.07
+ * V0.02 added task locking for namespace
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vinline.h>
+#include <linux/namespace.h>
+#include <linux/dcache.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+/* virtual host info names */
+
+static char * vx_vhi_name(struct vx_info *vxi, int id)
+{
+ switch (id) {
+ case VHIN_CONTEXT:
+ return vxi->vx_name;
+ case VHIN_SYSNAME:
+ return vxi->cvirt.utsname.sysname;
+ case VHIN_NODENAME:
+ return vxi->cvirt.utsname.nodename;
+ case VHIN_RELEASE:
+ return vxi->cvirt.utsname.release;
+ case VHIN_VERSION:
+ return vxi->cvirt.utsname.version;
+ case VHIN_MACHINE:
+ return vxi->cvirt.utsname.machine;
+ case VHIN_DOMAINNAME:
+ return vxi->cvirt.utsname.domainname;
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+
+int vc_set_vhi_name(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_vx_vhi_name_v0 vc_data;
+ char *name;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ name = vx_vhi_name(vxi, vc_data.field);
+ if (name)
+ memcpy(name, vc_data.name, 65);
+ put_vx_info(vxi);
+ return (name ? 0 : -EFAULT);
+}
+
+int vc_get_vhi_name(uint32_t id, void __user *data)
+{
+ struct vx_info *vxi;
+ struct vcmd_vx_vhi_name_v0 vc_data;
+ char *name;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ name = vx_vhi_name(vxi, vc_data.field);
+ if (!name)
+ goto out_put;
+
+ memcpy(vc_data.name, name, 65);
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+out_put:
+ put_vx_info(vxi);
+ return (name ? 0 : -EFAULT);
+}
+
+/* namespace functions */
+
+#include <linux/namespace.h>
+
+int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs)
+{
+ struct fs_struct *fs_copy;
+
+ if (vxi->vx_namespace)
+ return -EPERM;
+ if (!ns || !fs)
+ return -EINVAL;
+
+ fs_copy = copy_fs_struct(fs);
+ if (!fs_copy)
+ return -ENOMEM;
+
+ get_namespace(ns);
+ vxi->vx_namespace = ns;
+ vxi->vx_fs = fs_copy;
+ return 0;
+}
+
+int vc_enter_namespace(uint32_t id, void *data)
+{
+ struct vx_info *vxi;
+ struct fs_struct *old_fs, *fs;
+ struct namespace *old_ns;
+ int ret = 0;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ ret = -EINVAL;
+ if (!vxi->vx_namespace)
+ goto out_put;
+
+ ret = -ENOMEM;
+ fs = copy_fs_struct(vxi->vx_fs);
+ if (!fs)
+ goto out_put;
+
+ ret = 0;
+ task_lock(current);
+ old_ns = current->namespace;
+ old_fs = current->fs;
+ get_namespace(vxi->vx_namespace);
+ current->namespace = vxi->vx_namespace;
+ current->fs = fs;
+ task_unlock(current);
+
+ put_namespace(old_ns);
+ put_fs_struct(old_fs);
+out_put:
+ put_vx_info(vxi);
+ return ret;
+}
+
+int vc_cleanup_namespace(uint32_t id, void *data)
+{
+ down_write(¤t->namespace->sem);
+ // spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
+ umount_unused(current->namespace->root, current->fs);
+ spin_unlock(&vfsmount_lock);
+ // spin_unlock(&dcache_lock);
+ up_write(¤t->namespace->sem);
+ return 0;
+}
+
+int vc_set_namespace(uint32_t id, void __user *data)
+{
+ struct fs_struct *fs;
+ struct namespace *ns;
+ struct vx_info *vxi;
+ int ret;
+
+ if (vx_check(0, VX_ADMIN|VX_WATCH))
+ return -ENOSYS;
+
+ task_lock(current);
+ vxi = get_vx_info(current->vx_info);
+ fs = current->fs;
+ atomic_inc(&fs->count);
+ ns = current->namespace;
+ get_namespace(current->namespace);
+ task_unlock(current);
+
+ ret = vx_set_namespace(vxi, ns, fs);
+
+ put_namespace(ns);
+ put_fs_struct(fs);
+ put_vx_info(vxi);
+ return ret;
+}
+
--- /dev/null
+/*
+ * linux/kernel/vserver/network.c
+ *
+ * Virtual Server: Network Support
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 broken out from vcontext V0.05
+ * V0.02 cleaned up implementation
+ * V0.03 added equiv nx commands
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/vserver/network.h>
+#include <linux/ninline.h>
+
+#include <asm/errno.h>
+
+
+LIST_HEAD(nx_infos);
+
+spinlock_t nxlist_lock
+ __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * struct nx_info allocation and deallocation
+ */
+
+static struct nx_info *alloc_nx_info(void)
+{
+ struct nx_info *new = NULL;
+
+ nxdprintk("alloc_nx_info()\n");
+ /* would this benefit from a slab cache? */
+ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
+ if (!new)
+ return 0;
+
+ memset (new, 0, sizeof(struct nx_info));
+ /* rest of init goes here */
+
+ nxdprintk("alloc_nx_info() = %p\n", new);
+ return new;
+}
+
+void free_nx_info(struct nx_info *nxi)
+{
+ nxdprintk("free_nx_info(%p)\n", nxi);
+ kfree(nxi);
+}
+
+struct nx_info *create_nx_info(void)
+{
+ struct nx_info *new;
+ static int gnid = 1;
+
+ nxdprintk("create_nx_info()\n");
+ if (!(new = alloc_nx_info()))
+ return 0;
+
+ spin_lock(&nxlist_lock);
+
+ /* new ip info */
+ atomic_set(&new->nx_refcount, 1);
+ new->nx_id = gnid++;
+ list_add(&new->nx_list, &nx_infos);
+
+ spin_unlock(&nxlist_lock);
+ return new;
+}
+
+
+/*
+ * struct nx_info search by id
+ * assumes nxlist_lock is held
+ */
+
+static __inline__ struct nx_info *__find_nx_info(int id)
+{
+ struct nx_info *nxi;
+
+ list_for_each_entry(nxi, &nx_infos, nx_list)
+ if (nxi->nx_id == id)
+ return nxi;
+ return 0;
+}
+
+
+/*
+ * struct nx_info ref stuff
+ */
+
+struct nx_info *find_nx_info(int id)
+{
+ struct nx_info *nxi;
+
+ if (id < 0) {
+ nxi = current->nx_info;
+ get_nx_info(nxi);
+ } else {
+ spin_lock(&nxlist_lock);
+ if ((nxi = __find_nx_info(id)))
+ get_nx_info(nxi);
+ spin_unlock(&nxlist_lock);
+ }
+ return nxi;
+}
+
+/*
+ * verify that id is a valid nid
+ */
+
+int nx_info_id_valid(int id)
+{
+ int valid;
+
+ spin_lock(&nxlist_lock);
+ valid = (__find_nx_info(id) != NULL);
+ spin_unlock(&nxlist_lock);
+ return valid;
+}
+
+
+/*
+ * dynamic context id ...
+ */
+
+static __inline__ nid_t __nx_dynamic_id(void)
+{
+ static nid_t seq = MAX_N_CONTEXT;
+ nid_t barrier = seq;
+
+ do {
+ if (++seq > MAX_N_CONTEXT)
+ seq = MIN_D_CONTEXT;
+ if (!__find_nx_info(seq))
+ return seq;
+ } while (barrier != seq);
+ return 0;
+}
+
+static struct nx_info * __foc_nx_info(int id, int *err)
+{
+ struct nx_info *new, *nxi = NULL;
+
+ nxdprintk("foc_nx_info(%d)\n", id);
+ // if (!(new = alloc_nx_info(id))) {
+ if (!(new = alloc_nx_info())) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ spin_lock(&nxlist_lock);
+
+ /* dynamic context requested */
+ if (id == IP_DYNAMIC_ID) {
+ id = __nx_dynamic_id();
+ if (!id) {
+ printk(KERN_ERR "no dynamic context available.\n");
+ goto out_unlock;
+ }
+ new->nx_id = id;
+ }
+ /* existing context requested */
+ else if ((nxi = __find_nx_info(id))) {
+ /* context in setup is not available */
+ if (nxi->nx_flags & VXF_STATE_SETUP) {
+ nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi);
+ nxi = NULL;
+ *err = -EBUSY;
+ } else {
+ nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi);
+ get_nx_info(nxi);
+ *err = 0;
+ }
+ goto out_unlock;
+ }
+
+ /* new context requested */
+ nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new);
+ atomic_set(&new->nx_refcount, 1);
+ list_add(&new->nx_list, &nx_infos);
+ nxi = new, new = NULL;
+ *err = 1;
+
+out_unlock:
+ spin_unlock(&nxlist_lock);
+ if (new)
+ free_nx_info(new);
+ return nxi;
+}
+
+
+struct nx_info *find_or_create_nx_info(int id)
+{
+ int err;
+
+ return __foc_nx_info(id, &err);
+}
+
+/*
+ * migrate task to new network
+ */
+
+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
+{
+ struct nx_info *old_nxi = task_get_nx_info(p);
+ int ret = 0;
+
+ if (!p || !nxi)
+ BUG();
+
+ nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi,
+ nxi->nx_id, atomic_read(&nxi->nx_refcount));
+ if (old_nxi == nxi)
+ goto out;
+
+ task_lock(p);
+ set_nx_info(&p->nx_info, nxi);
+ p->nid = nxi->nx_id;
+ task_unlock(p);
+
+ put_nx_info(old_nxi);
+out:
+ put_nx_info(old_nxi);
+ return ret;
+}
+
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+
+static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
+{
+ int i, nbip;
+
+ nbip = nxi->nbipv4;
+ for (i=0; i<nbip; i++)
+ if (nxi->ipv4[i] == addr)
+ return 1;
+ return 0;
+}
+
+int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
+{
+ if (!nxi)
+ return 1;
+
+ return __addr_in_nx_info(ifa->ifa_address, nxi);
+}
+
+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
+{
+ struct in_device *in_dev = __in_dev_get(dev);
+ struct in_ifaddr **ifap = NULL;
+ struct in_ifaddr *ifa = NULL;
+
+ if (!nxi)
+ return 1;
+ if (!in_dev)
+ return 0;
+
+ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+ ifap = &ifa->ifa_next) {
+ if (__addr_in_nx_info(ifa->ifa_address, nxi))
+ return 1;
+ }
+ return 0;
+}
+
+
+
+
+/* vserver syscall commands below here */
+
+/* taks nid and nx_info functions */
+
+#include <asm/uaccess.h>
+
+
+int vc_task_nid(uint32_t id, void __user *data)
+{
+ nid_t nid;
+
+ if (id) {
+ struct task_struct *tsk;
+
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return -EPERM;
+
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(id);
+ nid = (tsk) ? tsk->nid : -ESRCH;
+ read_unlock(&tasklist_lock);
+ }
+ else
+ nid = current->nid;
+ return nid;
+}
+
+
+int vc_nx_info(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_nx_info_v0 vc_data;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ vc_data.nid = nxi->nx_id;
+ put_nx_info(nxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+
+/* network functions */
+
+int vc_net_create(uint32_t nid, void __user *data)
+{
+ // int ret = -ENOMEM;
+ struct nx_info *new_nxi;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
+ return -EINVAL;
+
+ if (nid < 1)
+ return -EINVAL;
+
+ new_nxi = __foc_nx_info(nid, &ret);
+ if (!new_nxi)
+ return ret;
+ if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
+ ret = -EEXIST;
+ goto out_put;
+ }
+
+ ret = new_nxi->nx_id;
+ nx_migrate_task(current, new_nxi);
+out_put:
+ put_nx_info(new_nxi);
+ return ret;
+}
+
+
+int vc_net_migrate(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+ nx_migrate_task(current, nxi);
+ put_nx_info(nxi);
+ return 0;
+}
+
+int vc_net_add(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_nx_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ // add ip to net context here
+ put_nx_info(nxi);
+ return 0;
+}
+
+int vc_net_remove(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_nx_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ // rem ip from net context here
+ put_nx_info(nxi);
+ return 0;
+}
+
+
+
+int vc_get_nflags(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_flags_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ vc_data.flagword = nxi->nx_flags;
+
+ // vc_data.mask = ~0UL;
+ /* special STATE flag handling */
+ vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
+
+ put_nx_info(nxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+int vc_set_nflags(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_flags_v0 vc_data;
+ uint64_t mask, trigger;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ /* special STATE flag handling */
+ mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
+ trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
+ // if (trigger & IPF_STATE_SETUP)
+
+ nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
+ vc_data.flagword, mask);
+ put_nx_info(nxi);
+ return 0;
+}
+
+int vc_get_ncaps(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_caps_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ vc_data.ncaps = nxi->nx_ncaps;
+ vc_data.cmask = ~0UL;
+ put_nx_info(nxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+int vc_set_ncaps(uint32_t id, void __user *data)
+{
+ struct nx_info *nxi;
+ struct vcmd_net_caps_v0 vc_data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ nxi = find_nx_info(id);
+ if (!nxi)
+ return -ESRCH;
+
+ nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
+ vc_data.ncaps, vc_data.cmask);
+ put_nx_info(nxi);
+ return 0;
+}
+
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(free_nx_info);
+EXPORT_SYMBOL_GPL(nxlist_lock);
+
--- /dev/null
+/*
+ * linux/kernel/vserver/proc.c
+ *
+ * Virtual Context Support
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 basic structure
+ * V0.02 adaptation vs1.3.0
+ * V0.03 proc permissions
+ * V0.04 locking/generic
+ * V0.05 next generation procfs
+ * V0.06 inode validation
+ * V0.07 generic rewrite vid
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+static struct proc_dir_entry *proc_virtual;
+
+static struct proc_dir_entry *proc_vnet;
+
+
+enum vid_directory_inos {
+ PROC_XID_INO = 32,
+ PROC_XID_INFO,
+ PROC_XID_STATUS,
+ PROC_XID_LIMIT,
+ PROC_XID_SCHED,
+ PROC_XID_CVIRT,
+ PROC_XID_CACCT,
+
+ PROC_NID_INO = 64,
+ PROC_NID_INFO,
+ PROC_NID_STATUS,
+};
+
+#define PROC_VID_MASK 0x60
+
+
+/* first the actual feeds */
+
+
+static int proc_virtual_info(int vid, char *buffer)
+{
+ return sprintf(buffer,
+ "VCIVersion:\t%04x:%04x\n"
+ "VCISyscall:\t%d\n"
+ ,VCI_VERSION >> 16
+ ,VCI_VERSION & 0xFFFF
+ ,__NR_vserver
+ );
+}
+
+
+int proc_xid_info (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = sprintf(buffer,
+ "ID:\t%d\n"
+ "Info:\t%p\n"
+ "Init:\t%d\n"
+ ,vxi->vx_id
+ ,vxi
+ ,vxi->vx_initpid
+ );
+ put_vx_info(vxi);
+ return length;
+}
+
+int proc_xid_status (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = sprintf(buffer,
+ "RefC:\t%d\n"
+ "Flags:\t%016llx\n"
+ "BCaps:\t%016llx\n"
+ "CCaps:\t%016llx\n"
+ "Ticks:\t%d\n"
+ ,atomic_read(&vxi->vx_refcount)
+ ,vxi->vx_flags
+ ,vxi->vx_bcaps
+ ,vxi->vx_ccaps
+ ,atomic_read(&vxi->limit.ticks)
+ );
+ put_vx_info(vxi);
+ return length;
+}
+
+int proc_xid_limit (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = vx_info_proc_limit(&vxi->limit, buffer);
+ put_vx_info(vxi);
+ return length;
+}
+
+int proc_xid_sched (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = vx_info_proc_sched(&vxi->sched, buffer);
+ put_vx_info(vxi);
+ return length;
+}
+
+int proc_xid_cvirt (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
+ put_vx_info(vxi);
+ return length;
+}
+
+int proc_xid_cacct (int vid, char *buffer)
+{
+ struct vx_info *vxi;
+ int length;
+
+ vxi = find_vx_info(vid);
+ if (!vxi)
+ return 0;
+ length = vx_info_proc_cacct(&vxi->cacct, buffer);
+ put_vx_info(vxi);
+ return length;
+}
+
+
+static int proc_vnet_info(int vid, char *buffer)
+{
+ return sprintf(buffer,
+ "VCIVersion:\t%04x:%04x\n"
+ "VCISyscall:\t%d\n"
+ ,VCI_VERSION >> 16
+ ,VCI_VERSION & 0xFFFF
+ ,__NR_vserver
+ );
+}
+
+#define atoquad(a) \
+ (((a)>>0) & 0xff), (((a)>>8) & 0xff), \
+ (((a)>>16) & 0xff), (((a)>>24) & 0xff)
+
+int proc_nid_info (int vid, char *buffer)
+{
+ struct nx_info *nxi;
+ int length, i;
+
+ nxi = find_nx_info(vid);
+ if (!nxi)
+ return 0;
+ length = sprintf(buffer,
+ "ID:\t%d\n"
+ "Info:\t%p\n"
+ ,nxi->nx_id
+ ,nxi
+ );
+ for (i=0; i<nxi->nbipv4; i++) {
+ length += sprintf(buffer + length,
+ "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i,
+ atoquad(nxi->ipv4[i]),
+ atoquad(nxi->mask[i]));
+ }
+ put_nx_info(nxi);
+ return length;
+}
+
+int proc_nid_status (int vid, char *buffer)
+{
+ struct nx_info *nxi;
+ int length;
+
+ nxi = find_nx_info(vid);
+ if (!nxi)
+ return 0;
+ length = sprintf(buffer,
+ "RefC:\t%d\n"
+ ,atomic_read(&nxi->nx_refcount)
+ );
+ put_nx_info(nxi);
+ return length;
+}
+
+/* here the inode helpers */
+
+
+
+#define fake_ino(id,ino) (((id)<<16)|(ino))
+
+#define inode_vid(i) ((i)->i_ino >> 16)
+#define inode_type(i) ((i)->i_ino & 0xFFFF)
+
+#define MAX_MULBY10 ((~0U-9)/10)
+
+
+static struct inode *proc_vid_make_inode(struct super_block * sb,
+ int vid, int ino)
+{
+ struct inode *inode = new_inode(sb);
+
+ if (!inode)
+ goto out;
+
+ inode->i_mtime = inode->i_atime =
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_ino = fake_ino(vid, ino);
+
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ // inode->i_xid = xid;
+out:
+ return inode;
+}
+
+static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
+{
+ struct inode * inode = dentry->d_inode;
+ int vid, valid=0;
+
+ vid = inode_vid(inode);
+ switch (inode_type(inode) & PROC_VID_MASK) {
+ case PROC_XID_INO:
+ valid = vx_info_id_valid(vid);
+ break;
+ case PROC_NID_INO:
+ valid = nx_info_id_valid(vid);
+ break;
+ }
+ if (valid)
+ return 1;
+ d_drop(dentry);
+ return 0;
+}
+
+/*
+static int proc_vid_delete_dentry(struct dentry * dentry)
+{
+ return 1;
+}
+*/
+
+
+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
+
+static ssize_t proc_vid_info_read(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ unsigned long page;
+ ssize_t length;
+ ssize_t end;
+ int vid;
+
+ if (count > PROC_BLOCK_SIZE)
+ count = PROC_BLOCK_SIZE;
+ if (!(page = __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+
+ vid = inode_vid(inode);
+ length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page);
+
+ if (length < 0) {
+ free_page(page);
+ return length;
+ }
+ /* Static 4kB (or whatever) block capacity */
+ if (*ppos >= length) {
+ free_page(page);
+ return 0;
+ }
+ if (count + *ppos > length)
+ count = length - *ppos;
+ end = count + *ppos;
+ copy_to_user(buf, (char *) page + *ppos, count);
+ *ppos = end;
+ free_page(page);
+ return count;
+}
+
+
+
+
+
+/* here comes the lower level (vid) */
+
+static struct file_operations proc_vid_info_file_operations = {
+ read: proc_vid_info_read,
+};
+
+static struct dentry_operations proc_vid_dentry_operations = {
+ d_revalidate: proc_vid_revalidate,
+// d_delete: proc_vid_delete_dentry,
+};
+
+
+struct vid_entry {
+ int type;
+ int len;
+ char *name;
+ mode_t mode;
+};
+
+#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
+
+static struct vid_entry vx_base_stuff[] = {
+ E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO),
+ E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO),
+ E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO),
+ E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO),
+ E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO),
+ E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO),
+ {0,0,NULL,0}
+};
+
+static struct vid_entry vn_base_stuff[] = {
+ E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO),
+ E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO),
+ {0,0,NULL,0}
+};
+
+
+
+static struct dentry *proc_vid_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode;
+ struct vid_entry *p;
+ int error;
+
+ error = -ENOENT;
+ inode = NULL;
+
+ switch (inode_type(dir)) {
+ case PROC_XID_INO:
+ p = vx_base_stuff;
+ break;
+ case PROC_NID_INO:
+ p = vn_base_stuff;
+ break;
+ default:
+ goto out;
+ }
+
+ for (; p->name; p++) {
+ if (p->len != dentry->d_name.len)
+ continue;
+ if (!memcmp(dentry->d_name.name, p->name, p->len))
+ break;
+ }
+ if (!p->name)
+ goto out;
+
+ error = -EINVAL;
+ inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type);
+ if (!inode)
+ goto out;
+
+ switch(p->type) {
+ case PROC_XID_INFO:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_info;
+ break;
+ case PROC_XID_STATUS:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_status;
+ break;
+ case PROC_XID_LIMIT:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_limit;
+ break;
+ case PROC_XID_SCHED:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_sched;
+ break;
+ case PROC_XID_CVIRT:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt;
+ break;
+ case PROC_XID_CACCT:
+ PROC_I(inode)->op.proc_vid_read = proc_xid_cacct;
+ break;
+
+ case PROC_NID_INFO:
+ PROC_I(inode)->op.proc_vid_read = proc_nid_info;
+ break;
+ case PROC_NID_STATUS:
+ PROC_I(inode)->op.proc_vid_read = proc_nid_status;
+ break;
+
+ default:
+ printk("procfs: impossible type (%d)",p->type);
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
+ inode->i_mode = p->mode;
+// inode->i_op = &proc_vid_info_inode_operations;
+ inode->i_fop = &proc_vid_info_file_operations;
+ inode->i_nlink = 1;
+ inode->i_flags|=S_IMMUTABLE;
+
+ dentry->d_op = &proc_vid_dentry_operations;
+ d_add(dentry, inode);
+ error = 0;
+out:
+ return ERR_PTR(error);
+}
+
+
+static int proc_vid_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+{
+ int i, size;
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct vid_entry *p;
+
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ if (filldir(dirent, ".", 1, i,
+ inode->i_ino, DT_DIR) < 0)
+ return 0;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, i,
+ PROC_ROOT_INO, DT_DIR) < 0)
+ return 0;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ switch (inode_type(inode)) {
+ case PROC_XID_INO:
+ size = sizeof(vx_base_stuff);
+ p = vx_base_stuff + i;
+ break;
+ case PROC_NID_INO:
+ size = sizeof(vn_base_stuff);
+ p = vn_base_stuff + i;
+ break;
+ default:
+ return 1;
+ }
+ if (i >= size/sizeof(struct vid_entry))
+ return 1;
+ while (p->name) {
+ if (filldir(dirent, p->name, p->len,
+ filp->f_pos, fake_ino(inode_vid(inode),
+ p->type), p->mode >> 12) < 0)
+ return 0;
+ filp->f_pos++;
+ p++;
+ }
+ }
+ return 1;
+}
+
+
+
+
+/* now the upper level (virtual) */
+
+static struct file_operations proc_vid_file_operations = {
+ read: generic_read_dir,
+ readdir: proc_vid_readdir,
+};
+
+static struct inode_operations proc_vid_inode_operations = {
+ lookup: proc_vid_lookup,
+};
+
+
+
+static __inline__ int atovid(const char *str, int len)
+{
+ int vid, c;
+
+ vid = 0;
+ while (len-- > 0) {
+ c = *str - '0';
+ str++;
+ if (c > 9)
+ return -1;
+ if (vid >= MAX_MULBY10)
+ return -1;
+ vid *= 10;
+ vid += c;
+ if (!vid)
+ return -1;
+ }
+ return vid;
+}
+
+
+struct dentry *proc_virtual_lookup(struct inode *dir,
+ struct dentry * dentry, struct nameidata *nd)
+{
+ int xid, len, ret;
+ struct vx_info *vxi;
+ const char *name;
+ struct inode *inode;
+
+ name = dentry->d_name.name;
+ len = dentry->d_name.len;
+ ret = -ENOMEM;
+
+ if (len == 7 && !memcmp(name, "current", 7)) {
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ goto out;
+ inode->i_mtime = inode->i_atime =
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_ino = fake_ino(1, PROC_XID_INO);
+ inode->i_mode = S_IFLNK|S_IRWXUGO;
+ inode->i_uid = inode->i_gid = 0;
+ inode->i_size = 64;
+// inode->i_op = &proc_current_inode_operations;
+ d_add(dentry, inode);
+ return NULL;
+ }
+ if (len == 4 && !memcmp(name, "info", 4)) {
+ inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO);
+ if (!inode)
+ goto out;
+ inode->i_fop = &proc_vid_info_file_operations;
+ PROC_I(inode)->op.proc_vid_read = proc_virtual_info;
+ inode->i_mode = S_IFREG|S_IRUGO;
+// inode->i_size = 64;
+// inode->i_op = &proc_current_inode_operations;
+ d_add(dentry, inode);
+ return NULL;
+ }
+
+ ret = -ENOENT;
+ xid = atovid(name, len);
+ if (xid < 0)
+ goto out;
+ vxi = find_vx_info(xid);
+ if (!vxi)
+ goto out;
+
+ inode = NULL;
+ if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+ inode = proc_vid_make_inode(dir->i_sb,
+ vxi->vx_id, PROC_XID_INO);
+ if (!inode)
+ goto out_release;
+
+ inode->i_mode = S_IFDIR|S_IRUGO;
+ inode->i_op = &proc_vid_inode_operations;
+ inode->i_fop = &proc_vid_file_operations;
+ inode->i_nlink = 2;
+ inode->i_flags|=S_IMMUTABLE;
+
+ dentry->d_op = &proc_vid_dentry_operations;
+ d_add(dentry, inode);
+ ret = 0;
+
+out_release:
+ put_vx_info(vxi);
+out:
+ return ERR_PTR(ret);
+}
+
+
+struct dentry *proc_vnet_lookup(struct inode *dir,
+ struct dentry * dentry, struct nameidata *nd)
+{
+ int nid, len, ret;
+ struct nx_info *nxi;
+ const char *name;
+ struct inode *inode;
+
+ name = dentry->d_name.name;
+ len = dentry->d_name.len;
+ ret = -ENOMEM;
+ if (len == 7 && !memcmp(name, "current", 7)) {
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ goto out;
+ inode->i_mtime = inode->i_atime =
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_ino = fake_ino(1, PROC_NID_INO);
+ inode->i_mode = S_IFLNK|S_IRWXUGO;
+ inode->i_uid = inode->i_gid = 0;
+ inode->i_size = 64;
+// inode->i_op = &proc_current_inode_operations;
+ d_add(dentry, inode);
+ return NULL;
+ }
+ if (len == 4 && !memcmp(name, "info", 4)) {
+ inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO);
+ if (!inode)
+ goto out;
+ inode->i_fop = &proc_vid_info_file_operations;
+ PROC_I(inode)->op.proc_vid_read = proc_vnet_info;
+ inode->i_mode = S_IFREG|S_IRUGO;
+// inode->i_size = 64;
+// inode->i_op = &proc_current_inode_operations;
+ d_add(dentry, inode);
+ return NULL;
+ }
+
+ ret = -ENOENT;
+ nid = atovid(name, len);
+ if (nid < 0)
+ goto out;
+ nxi = find_nx_info(nid);
+ if (!nxi)
+ goto out;
+
+ inode = NULL;
+ if (1)
+ inode = proc_vid_make_inode(dir->i_sb,
+ nxi->nx_id, PROC_NID_INO);
+ if (!inode)
+ goto out_release;
+
+ inode->i_mode = S_IFDIR|S_IRUGO;
+ inode->i_op = &proc_vid_inode_operations;
+ inode->i_fop = &proc_vid_file_operations;
+ inode->i_nlink = 2;
+ inode->i_flags|=S_IMMUTABLE;
+
+ dentry->d_op = &proc_vid_dentry_operations;
+ d_add(dentry, inode);
+ ret = 0;
+
+out_release:
+ put_nx_info(nxi);
+out:
+ return ERR_PTR(ret);
+}
+
+
+
+
+#define PROC_NUMBUF 10
+#define PROC_MAXVIDS 32
+
+
+static int get_xid_list(int index, unsigned int *xids)
+{
+ struct vx_info *p;
+ int nr_xids = 0;
+
+ index--;
+ spin_lock(&vxlist_lock);
+ list_for_each_entry(p, &vx_infos, vx_list) {
+ int xid = p->vx_id;
+
+ if (--index >= 0)
+ continue;
+ xids[nr_xids] = xid;
+ if (++nr_xids >= PROC_MAXVIDS)
+ break;
+ }
+ spin_unlock(&vxlist_lock);
+ return nr_xids;
+}
+
+int proc_virtual_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+{
+ unsigned int xid_array[PROC_MAXVIDS];
+ char buf[PROC_NUMBUF];
+ unsigned int nr = filp->f_pos-3;
+ unsigned int nr_xids, i;
+ ino_t ino;
+
+ switch ((long)filp->f_pos) {
+ case 0:
+ ino = fake_ino(0, PROC_XID_INO);
+ if (filldir(dirent, ".", 1,
+ filp->f_pos, ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = filp->f_dentry->d_parent->d_inode->i_ino;
+ if (filldir(dirent, "..", 2,
+ filp->f_pos, ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 2:
+ ino = fake_ino(0, PROC_XID_INFO);
+ if (filldir(dirent, "info", 4,
+ filp->f_pos, ino, DT_LNK) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 3:
+ if (current->xid > 1) {
+ ino = fake_ino(1, PROC_XID_INO);
+ if (filldir(dirent, "current", 7,
+ filp->f_pos, ino, DT_LNK) < 0)
+ return 0;
+ }
+ filp->f_pos++;
+ }
+
+ nr_xids = get_xid_list(nr, xid_array);
+
+ for (i = 0; i < nr_xids; i++) {
+ int xid = xid_array[i];
+ ino_t ino = fake_ino(xid, PROC_XID_INO);
+ unsigned long j = PROC_NUMBUF;
+
+ do buf[--j] = '0' + (xid % 10); while (xid/=10);
+
+ if (filldir(dirent, buf+j, PROC_NUMBUF-j,
+ filp->f_pos, ino, DT_DIR) < 0)
+ break;
+ filp->f_pos++;
+ }
+ return 0;
+}
+
+
+static struct file_operations proc_virtual_dir_operations = {
+ read: generic_read_dir,
+ readdir: proc_virtual_readdir,
+};
+
+static struct inode_operations proc_virtual_dir_inode_operations = {
+ lookup: proc_virtual_lookup,
+};
+
+
+
+static int get_nid_list(int index, unsigned int *nids)
+{
+ struct nx_info *p;
+ int nr_nids = 0;
+
+ index--;
+ spin_lock(&nxlist_lock);
+ list_for_each_entry(p, &nx_infos, nx_list) {
+ int nid = p->nx_id;
+
+ if (--index >= 0)
+ continue;
+ nids[nr_nids] = nid;
+ if (++nr_nids >= PROC_MAXVIDS)
+ break;
+ }
+ spin_unlock(&nxlist_lock);
+ return nr_nids;
+}
+
+int proc_vnet_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+{
+ unsigned int nid_array[PROC_MAXVIDS];
+ char buf[PROC_NUMBUF];
+ unsigned int nr = filp->f_pos-3;
+ unsigned int nr_nids, i;
+ ino_t ino;
+
+ switch ((long)filp->f_pos) {
+ case 0:
+ ino = fake_ino(0, PROC_NID_INO);
+ if (filldir(dirent, ".", 1,
+ filp->f_pos, ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = filp->f_dentry->d_parent->d_inode->i_ino;
+ if (filldir(dirent, "..", 2,
+ filp->f_pos, ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 2:
+ ino = fake_ino(0, PROC_NID_INFO);
+ if (filldir(dirent, "info", 4,
+ filp->f_pos, ino, DT_LNK) < 0)
+ return 0;
+ filp->f_pos++;
+ /* fall through */
+ case 3:
+ if (current->xid > 1) {
+ ino = fake_ino(1, PROC_NID_INO);
+ if (filldir(dirent, "current", 7,
+ filp->f_pos, ino, DT_LNK) < 0)
+ return 0;
+ }
+ filp->f_pos++;
+ }
+
+ nr_nids = get_nid_list(nr, nid_array);
+
+ for (i = 0; i < nr_nids; i++) {
+ int nid = nid_array[i];
+ ino_t ino = fake_ino(nid, PROC_NID_INO);
+ unsigned long j = PROC_NUMBUF;
+
+ do buf[--j] = '0' + (nid % 10); while (nid/=10);
+
+ if (filldir(dirent, buf+j, PROC_NUMBUF-j,
+ filp->f_pos, ino, DT_DIR) < 0)
+ break;
+ filp->f_pos++;
+ }
+ return 0;
+}
+
+
+static struct file_operations proc_vnet_dir_operations = {
+ read: generic_read_dir,
+ readdir: proc_vnet_readdir,
+};
+
+static struct inode_operations proc_vnet_dir_inode_operations = {
+ lookup: proc_vnet_lookup,
+};
+
+
+
+void proc_vx_init(void)
+{
+ struct proc_dir_entry *ent;
+
+ ent = proc_mkdir("virtual", 0);
+ if (ent) {
+ ent->proc_fops = &proc_virtual_dir_operations;
+ ent->proc_iops = &proc_virtual_dir_inode_operations;
+ }
+ proc_virtual = ent;
+
+ ent = proc_mkdir("vnet", 0);
+ if (ent) {
+ ent->proc_fops = &proc_vnet_dir_operations;
+ ent->proc_iops = &proc_vnet_dir_inode_operations;
+ }
+ proc_vnet = ent;
+}
+
+
+
+
+/* per pid info */
+
+
+char *task_vx_info(struct task_struct *p, char *buffer)
+{
+ return buffer + sprintf(buffer,
+ "XID:\t%d\n"
+ ,p->xid);
+}
+
+int proc_pid_vx_info(struct task_struct *p, char *buffer)
+{
+ char * orig = buffer;
+
+ buffer = task_vx_info(p, buffer);
+ return buffer - orig;
+}
+
+char *task_nx_info(struct task_struct *p, char *buffer)
+{
+ return buffer + sprintf(buffer,
+ "NID:\t%d\n"
+ ,p->nid);
+}
+
+int proc_pid_nx_info(struct task_struct *p, char *buffer)
+{
+ char * orig = buffer;
+
+ buffer = task_nx_info(p, buffer);
+ return buffer - orig;
+}
+
--- /dev/null
+/*
+ * linux/kernel/vserver/sched.c
+ *
+ * Virtual Server: Scheduler Support
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 adapted Sam Vilains version to 2.6.3
+ * V0.02 removed legacy interface
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/vinline.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/sched.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+/*
+ * recalculate the context's scheduling tokens
+ *
+ * ret > 0 : number of tokens available
+ * ret = 0 : context is paused
+ * ret < 0 : number of jiffies until new tokens arrive
+ *
+ */
+int vx_tokens_recalc(struct vx_info *vxi)
+{
+ long delta, tokens = 0;
+
+ if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0))
+ /* we are paused */
+ return 0;
+
+ delta = jiffies - vxi->sched.jiffies;
+
+ if (delta >= vxi->sched.interval) {
+ /* lockdown scheduler info */
+ spin_lock(&vxi->sched.tokens_lock);
+
+ /* calc integral token part */
+ delta = jiffies - vxi->sched.jiffies;
+ tokens = delta / vxi->sched.interval;
+ delta = tokens * vxi->sched.interval;
+ tokens *= vxi->sched.fill_rate;
+
+ atomic_add(tokens, &vxi->sched.tokens);
+ vxi->sched.jiffies += delta;
+ tokens = atomic_read(&vxi->sched.tokens);
+
+ if (tokens > vxi->sched.tokens_max) {
+ tokens = vxi->sched.tokens_max;
+ atomic_set(&vxi->sched.tokens, tokens);
+ }
+ spin_unlock(&vxi->sched.tokens_lock);
+ } else {
+ /* no new tokens */
+ if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) {
+ /* enough tokens will be available in */
+ if (vxi->sched.tokens_min == 0)
+ return delta - vxi->sched.interval;
+ return delta - vxi->sched.interval *
+ vxi->sched.tokens_min / vxi->sched.fill_rate;
+ }
+ }
+ /* we have some tokens left */
+ return tokens;
+}
+
+/*
+ * effective_prio - return the priority that is based on the static
+ * priority but is modified by bonuses/penalties.
+ *
+ * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
+ * into a -4 ... 0 ... +4 bonus/penalty range.
+ *
+ * Additionally, we scale another amount based on the number of
+ * CPU tokens currently held by the context, if the process is
+ * part of a context (and the appropriate SCHED flag is set).
+ * This ranges from -5 ... 0 ... +15, quadratically.
+ *
+ * So, the total bonus is -9 .. 0 .. +19
+ * We use ~50% of the full 0...39 priority range so that:
+ *
+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
+ * unless that context is far exceeding its CPU allocation.
+ *
+ * Both properties are important to certain workloads.
+ */
+int effective_vavavoom(task_t *p, int max_prio)
+{
+ struct vx_info *vxi = p->vx_info;
+ int vavavoom, max;
+
+ /* lots of tokens = lots of vavavoom
+ * no tokens = no vavavoom */
+ if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
+ max = vxi->sched.tokens_max;
+ vavavoom = max - vavavoom;
+ max = max * max;
+ vavavoom = max_prio * VAVAVOOM_RATIO / 100
+ * (vavavoom*vavavoom - (max >> 2)) / max;
+ /* alternative, geometric mapping
+ vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
+ / vxi->sched.tokens_max -
+ MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
+ } else
+ vavavoom = 0;
+ /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
+ MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
+
+ return vavavoom;
+}
+
+
+int vc_set_sched(uint32_t xid, void __user *data)
+{
+ struct vcmd_set_sched_v2 vc_data;
+ struct vx_info *vxi;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = find_vx_info(xid);
+ if (!vxi)
+ return -EINVAL;
+
+ spin_lock(&vxi->sched.tokens_lock);
+
+ if (vc_data.interval != SCHED_KEEP)
+ vxi->sched.interval = vc_data.interval;
+ if (vc_data.fill_rate != SCHED_KEEP)
+ vxi->sched.fill_rate = vc_data.fill_rate;
+ if (vc_data.tokens_min != SCHED_KEEP)
+ vxi->sched.tokens_min = vc_data.tokens_min;
+ if (vc_data.tokens_max != SCHED_KEEP)
+ vxi->sched.tokens_max = vc_data.tokens_max;
+ if (vc_data.tokens != SCHED_KEEP)
+ atomic_set(&vxi->sched.tokens, vc_data.tokens);
+
+ /* Sanity check the resultant values */
+ if (vxi->sched.fill_rate <= 0)
+ vxi->sched.fill_rate = 1;
+ if (vxi->sched.interval <= 0)
+ vxi->sched.interval = HZ;
+ if (vxi->sched.tokens_max == 0)
+ vxi->sched.tokens_max = 1;
+ if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
+ atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
+ if (vxi->sched.tokens_min > vxi->sched.tokens_max)
+ vxi->sched.tokens_min = vxi->sched.tokens_max;
+
+ spin_unlock(&vxi->sched.tokens_lock);
+ put_vx_info(vxi);
+ return 0;
+}
+
--- /dev/null
+/*
+ * linux/kernel/vserver/signal.c
+ *
+ * Virtual Server: Signal Support
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 broken out from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+#include <linux/vinline.h>
+#include <linux/vserver/signal.h>
+
+
+int vc_ctx_kill(uint32_t id, void __user *data)
+{
+ int retval, count=0;
+ struct vcmd_ctx_kill_v0 vc_data;
+ struct siginfo info;
+ struct task_struct *p;
+ struct vx_info *vxi;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ info.si_signo = vc_data.sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = current->pid;
+ info.si_uid = current->uid;
+
+ vxi = find_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+ switch (vc_data.pid) {
+ case -1:
+ case 0:
+ for_each_process(p) {
+ int err = 0;
+
+ if (vx_task_xid(p) != id || p->pid <= 1 ||
+ (vc_data.pid && vxi->vx_initpid == p->pid) ||
+ !thread_group_leader(p))
+ continue;
+
+ err = send_sig_info(vc_data.sig, &info, p);
+ ++count;
+ if (err != -EPERM)
+ retval = err;
+ }
+ break;
+
+ default:
+ p = find_task_by_pid(vc_data.pid);
+ if (p) {
+ if (!thread_group_leader(p)) {
+ struct task_struct *tg;
+
+ tg = find_task_by_pid(p->tgid);
+ if (tg)
+ p = tg;
+ }
+ if ((id == -1) || (vx_task_xid(p) == id))
+ retval = send_sig_info(vc_data.sig, &info, p);
+ }
+ break;
+ }
+ read_unlock(&tasklist_lock);
+ put_vx_info(vxi);
+ return retval;
+}
+
+
--- /dev/null
+/*
+ * linux/kernel/vserver/switch.c
+ *
+ * Virtual Server: Syscall Switch
+ *
+ * Copyright (C) 2003-2004 Herbert Pötzl
+ *
+ * V0.01 syscall switch
+ * V0.02 added signal to context
+ * V0.03 added rlimit functions
+ * V0.04 added iattr, task/xid functions
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/errno.h>
+
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+
+static inline int
+vc_get_version(uint32_t id)
+{
+ return VCI_VERSION;
+}
+
+
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vserver/sched.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/signal.h>
+
+
+extern unsigned int vx_debug_switch;
+
+
+extern asmlinkage long
+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
+{
+
+ if (vx_debug_switch)
+ printk( "vc: VCMD_%02d_%d[%d], %d\n",
+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
+ VC_VERSION(cmd), id);
+
+ switch (cmd) {
+ case VCMD_get_version:
+ return vc_get_version(id);
+
+#ifdef CONFIG_VSERVER_LEGACY
+ case VCMD_new_s_context:
+ return vc_new_s_context(id, data);
+ case VCMD_set_ipv4root:
+ return vc_set_ipv4root(id, data);
+#endif
+
+ case VCMD_task_xid:
+ return vc_task_xid(id, data);
+ case VCMD_vx_info:
+ return vc_vx_info(id, data);
+
+ case VCMD_task_nid:
+ return vc_task_nid(id, data);
+ case VCMD_nx_info:
+ return vc_nx_info(id, data);
+
+ case VCMD_set_namespace:
+ return vc_set_namespace(id, data);
+ case VCMD_cleanup_namespace:
+ return vc_cleanup_namespace(id, data);
+ }
+
+ /* those are allowed while in setup too */
+ if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
+ !vx_flags(VXF_STATE_SETUP,0))
+ return -EPERM;
+
+#ifdef CONFIG_VSERVER_LEGACY
+ switch (cmd) {
+ case VCMD_set_cflags:
+ case VCMD_set_ccaps:
+ if (vx_check(0, VX_WATCH))
+ return 0;
+ }
+#endif
+
+ switch (cmd) {
+ case VCMD_get_rlimit:
+ return vc_get_rlimit(id, data);
+ case VCMD_set_rlimit:
+ return vc_set_rlimit(id, data);
+ case VCMD_get_rlimit_mask:
+ return vc_get_rlimit_mask(id, data);
+
+ case VCMD_vx_get_vhi_name:
+ return vc_get_vhi_name(id, data);
+ case VCMD_vx_set_vhi_name:
+ return vc_set_vhi_name(id, data);
+
+ case VCMD_set_cflags:
+ return vc_set_cflags(id, data);
+ case VCMD_get_cflags:
+ return vc_get_cflags(id, data);
+
+ case VCMD_set_ccaps:
+ return vc_set_ccaps(id, data);
+ case VCMD_get_ccaps:
+ return vc_get_ccaps(id, data);
+
+ case VCMD_set_nflags:
+ return vc_set_nflags(id, data);
+ case VCMD_get_nflags:
+ return vc_get_nflags(id, data);
+
+ case VCMD_set_ncaps:
+ return vc_set_ncaps(id, data);
+ case VCMD_get_ncaps:
+ return vc_get_ncaps(id, data);
+
+ case VCMD_set_sched:
+ return vc_set_sched(id, data);
+ }
+
+ /* below here only with VX_ADMIN */
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return -EPERM;
+
+ switch (cmd) {
+ case VCMD_ctx_kill:
+ return vc_ctx_kill(id, data);
+
+#ifdef CONFIG_VSERVER_LEGACY
+ case VCMD_create_context:
+ return vc_ctx_create(id, data);
+#endif
+
+ case VCMD_get_iattr:
+ return vc_get_iattr(id, data);
+ case VCMD_set_iattr:
+ return vc_set_iattr(id, data);
+
+ case VCMD_enter_namespace:
+ return vc_enter_namespace(id, data);
+
+ case VCMD_ctx_create:
+#ifdef CONFIG_VSERVER_LEGACY
+ if (id == 1) {
+ current->xid = 1;
+ return 1;
+ }
+#endif
+ return vc_ctx_create(id, data);
+ case VCMD_ctx_migrate:
+ return vc_ctx_migrate(id, data);
+
+ case VCMD_net_create:
+ return vc_net_create(id, data);
+ case VCMD_net_migrate:
+ return vc_net_migrate(id, data);
+
+ }
+ return -ENOSYS;
+}
+
--- /dev/null
+/*
+ * linux/kernel/sysctl.c
+ *
+ * Virtual Context Support
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 basic structure
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/vserver.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/sysctl.h>
+#include <linux/fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+#define CTL_VSERVER 4242 /* unused? */
+
+enum {
+ CTL_DEBUG_SWITCH = 1,
+ CTL_DEBUG_LIMIT,
+};
+
+
+unsigned int vx_debug_switch = 0;
+unsigned int vx_debug_limit = 0;
+
+
+static struct ctl_table_header *vserver_table_header;
+static ctl_table vserver_table[];
+
+
+void vserver_register_sysctl(void)
+{
+ if (!vserver_table_header) {
+ vserver_table_header = register_sysctl_table(vserver_table, 1);
+#ifdef CONFIG_PROC_FS
+// if (vserver_table[0].de)
+// vserver_table[0].de->owner = THIS_MODULE;
+#endif
+ }
+
+}
+
+void vserver_unregister_sysctl(void)
+{
+ if (vserver_table_header) {
+ unregister_sysctl_table(vserver_table_header);
+ vserver_table_header = NULL;
+ }
+}
+
+
+static int proc_dodebug(ctl_table *table, int write,
+ struct file *file, void *buffer, size_t *lenp)
+{
+ char tmpbuf[20], *p, c;
+ unsigned int value;
+ size_t left, len;
+
+ if ((file->f_pos && !write) || !*lenp) {
+ *lenp = 0;
+ return 0;
+ }
+
+ left = *lenp;
+
+ if (write) {
+ if (!access_ok(VERIFY_READ, buffer, left))
+ return -EFAULT;
+ p = (char *) buffer;
+ while (left && __get_user(c, p) >= 0 && isspace(c))
+ left--, p++;
+ if (!left)
+ goto done;
+
+ if (left > sizeof(tmpbuf) - 1)
+ return -EINVAL;
+ if (copy_from_user(tmpbuf, p, left))
+ return -EFAULT;
+ tmpbuf[left] = '\0';
+
+ for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
+ value = 10 * value + (*p - '0');
+ if (*p && !isspace(*p))
+ return -EINVAL;
+ while (left && isspace(*p))
+ left--, p++;
+ *(unsigned int *) table->data = value;
+ } else {
+ if (!access_ok(VERIFY_WRITE, buffer, left))
+ return -EFAULT;
+ len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+ if (len > left)
+ len = left;
+ if (__copy_to_user(buffer, tmpbuf, len))
+ return -EFAULT;
+ if ((left -= len) > 0) {
+ if (put_user('\n', (char *)buffer + len))
+ return -EFAULT;
+ left--;
+ }
+ }
+
+done:
+ *lenp -= left;
+ file->f_pos += *lenp;
+ return 0;
+}
+
+
+
+static ctl_table debug_table[] = {
+ {
+ .ctl_name = CTL_DEBUG_SWITCH,
+ .procname = "debug_switch",
+ .data = &vx_debug_switch,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dodebug
+ },
+ {
+ .ctl_name = CTL_DEBUG_LIMIT,
+ .procname = "debug_limit",
+ .data = &vx_debug_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dodebug
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table vserver_table[] = {
+ {
+ .ctl_name = CTL_VSERVER,
+ .procname = "vserver",
+ .mode = 0555,
+ .child = debug_table
+ },
+ { .ctl_name = 0 }
+};
+