Initial revision
authorMarc Fiuczynski <mef@cs.princeton.edu>
Tue, 13 Jul 2004 17:57:17 +0000 (17:57 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Tue, 13 Jul 2004 17:57:17 +0000 (17:57 +0000)
include/linux/vs_base.h [new file with mode: 0644]
include/linux/vs_context.h [new file with mode: 0644]
include/linux/vs_cvirt.h [new file with mode: 0644]
include/linux/vs_dlimit.h [new file with mode: 0644]
include/linux/vs_limit.h [new file with mode: 0644]
include/linux/vs_memory.h [new file with mode: 0644]
include/linux/vs_network.h [new file with mode: 0644]
include/linux/vs_socket.h [new file with mode: 0644]
include/linux/vserver/dlimit.h [new file with mode: 0644]
kernel/vserver/dlimit.c [new file with mode: 0644]
kernel/vserver/helper.c [new file with mode: 0644]

diff --git a/include/linux/vs_base.h b/include/linux/vs_base.h
new file mode 100644 (file)
index 0000000..4f04513
--- /dev/null
@@ -0,0 +1,78 @@
+#ifndef _VX_VS_BASE_H
+#define _VX_VS_BASE_H
+
+#include "vserver/context.h"
+
+// #define VX_DEBUG
+
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_task_xid(t) ((t)->xid)
+
+#define vx_current_xid() vx_task_xid(current)
+
+#define vx_check(c,m)  __vx_check(vx_current_xid(),c,m)
+
+#define vx_weak_check(c,m)     ((m) ? vx_check(c,m) : 1)
+
+
+/*
+ * check current context for ADMIN/WATCH and
+ * optionally agains supplied argument
+ */
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+{
+       if (mode & VX_ARG_MASK) {
+               if ((mode & VX_IDENT) &&
+                       (id == cid))
+                       return 1;
+       }
+       if (mode & VX_ATR_MASK) {
+               if ((mode & VX_DYNAMIC) &&
+                       (id >= MIN_D_CONTEXT) &&
+                       (id <= MAX_S_CONTEXT))
+                       return 1;
+               if ((mode & VX_STATIC) &&
+                       (id > 1) && (id < MIN_D_CONTEXT))
+                       return 1;
+       }
+       return (((mode & VX_ADMIN) && (cid == 0)) ||
+               ((mode & VX_WATCH) && (cid == 1)));
+}
+
+
+#define __vx_flags(v,m,f)      (((v) & (m)) ^ (f))
+
+#define        __vx_task_flags(t,m,f) \
+       (((t) && ((t)->vx_info)) ? \
+               __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+
+#define vx_current_flags() \
+       ((current->vx_info) ? current->vx_info->vx_flags : 0)
+
+#define vx_flags(m,f)  __vx_flags(vx_current_flags(),(m),(f))
+
+
+#define vx_current_ccaps() \
+       ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+
+#define vx_ccaps(c)    (vx_current_ccaps() & (c))
+
+#define vx_current_bcaps() \
+       (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
+       current->vx_info->vx_bcaps : cap_bset)
+
+
+/* generic flag merging */
+
+#define        vx_mask_flags(v,f,m)    (((v) & ~(m)) | ((f) & (m)))
+
+#define        vx_mask_mask(v,f,m)     (((v) & ~(m)) | ((v) & (f) & (m)))
+
+#endif
diff --git a/include/linux/vs_context.h b/include/linux/vs_context.h
new file mode 100644 (file)
index 0000000..727a16c
--- /dev/null
@@ -0,0 +1,128 @@
+#ifndef _VX_VS_CONTEXT_H
+#define _VX_VS_CONTEXT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+
+#undef vxdprintk
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
+#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
+
+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
+       const char *_file, int _line)
+{
+       if (!vxi)
+               return NULL;
+       vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+               _file, _line);
+       atomic_inc(&vxi->vx_usecnt);
+       return vxi;
+}
+
+
+#define        free_vx_info(i) \
+       call_rcu(&i->vx_rcu, rcu_free_vx_info, i);
+
+#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
+
+static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+{
+       if (!vxi)
+               return;
+       vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+               _file, _line);
+       if (atomic_dec_and_test(&vxi->vx_usecnt))
+               free_vx_info(vxi);
+}
+
+#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
+       const char *_file, int _line)
+{
+       BUG_ON(*vxp);
+       if (!vxi)
+               return;
+       vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0,
+               vxi?atomic_read(&vxi->vx_usecnt):0,
+               vxi?atomic_read(&vxi->vx_refcnt):0,
+               _file, _line);
+       atomic_inc(&vxi->vx_refcnt);
+       *vxp = __get_vx_info(vxi, _file, _line);
+}
+
+#define        clr_vx_info(p)  __clr_vx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_vx_info(struct vx_info **vxp,
+       const char *_file, int _line)
+{
+       struct vx_info *vxo = *vxp;
+
+       if (!vxo)
+               return;
+       vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               vxo, vxo?vxo->vx_id:0,
+               vxo?atomic_read(&vxo->vx_usecnt):0,
+               vxo?atomic_read(&vxo->vx_refcnt):0,
+               _file, _line);
+       *vxp = NULL;
+       wmb();
+       if (vxo && atomic_dec_and_test(&vxo->vx_refcnt))
+               unhash_vx_info(vxo);
+       __put_vx_info(vxo, _file, _line);
+}
+
+
+#define task_get_vx_info(i)    __task_get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
+       const char *_file, int _line)
+{
+       struct vx_info *vxi;
+       
+       task_lock(p);
+       vxi = __get_vx_info(p->vx_info, _file, _line);
+       task_unlock(p);
+       return vxi;
+}
+
+
+#define vx_verify_info(p,i)    \
+       __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __vx_verify_info(
+       struct vx_info *vxa, struct vx_info *vxb,
+       const char *_file, int _line)
+{
+       if (vxa == vxb)
+               return;
+       printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n",
+               vxa, vxb, _file, _line);
+}
+
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_cvirt.h b/include/linux/vs_cvirt.h
new file mode 100644 (file)
index 0000000..65f4303
--- /dev/null
@@ -0,0 +1,71 @@
+#ifndef _VX_VS_CVIRT_H
+#define _VX_VS_CVIRT_H
+
+
+// #define VX_DEBUG
+
+#include "vserver/cvirt.h"
+#include "vs_base.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+/* utsname virtualization */
+
+static inline struct new_utsname *vx_new_utsname(void)
+{
+       if (current->vx_info)
+               return &current->vx_info->cvirt.utsname;
+       return &system_utsname;
+}
+
+#define vx_new_uts(x)          ((vx_new_utsname())->x)
+
+
+/* pid faking stuff */
+
+
+#define vx_map_tgid(v,p) \
+       __vx_map_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
+       char *file, int line)
+{
+       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+               vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
+                       vxi, vxi->vx_flags, pid,
+                       (pid == vxi->vx_initpid)?1:pid,
+                       file, line);
+               if (pid == vxi->vx_initpid)
+                       return 1;
+       }
+       return pid;
+}
+
+#define vx_rmap_tgid(v,p) \
+       __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
+       char *file, int line)
+{
+       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+               vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
+                       vxi, vxi->vx_flags, pid,
+                       (pid == 1)?vxi->vx_initpid:pid,
+                       file, line);
+               if ((pid == 1) && vxi->vx_initpid)
+                       return vxi->vx_initpid;
+       }
+       return pid;
+}
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_dlimit.h b/include/linux/vs_dlimit.h
new file mode 100644 (file)
index 0000000..d80c563
--- /dev/null
@@ -0,0 +1,169 @@
+#ifndef _VX_VS_DLIMIT_H
+#define _VX_VS_DLIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/dlimit.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__)
+
+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
+       const char *_file, int _line)
+{
+       if (!dli)
+               return NULL;
+       vxdprintk("get_dl_info(%p[#%d.%d])\t%s:%d\n",
+               dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+               _file, _line);
+       atomic_inc(&dli->dl_usecnt);
+       return dli;
+}
+
+
+#define        free_dl_info(i) \
+       call_rcu(&i->dl_rcu, rcu_free_dl_info, i);
+
+#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__)
+
+static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _line)
+{
+       if (!dli)
+               return;
+       vxdprintk("put_dl_info(%p[#%d.%d])\t%s:%d\n",
+               dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+               _file, _line);
+       if (atomic_dec_and_test(&dli->dl_usecnt))
+               free_dl_info(dli);
+}
+
+
+extern int vx_debug_dlimit;
+
+#define        __dlimit_char(d)        ((d)?'*':' ')
+
+static inline int __dl_alloc_space(struct super_block *sb,
+       xid_t xid, dlsize_t nr, const char *file, int line)
+{
+       struct dl_info *dli = NULL;
+       int ret = 0;
+
+       if (nr == 0)
+               goto out;
+       dli = locate_dl_info(sb, xid);
+       if (!dli)
+               goto out;
+
+       spin_lock(&dli->dl_lock);
+       ret = (dli->dl_space_used + nr > dli->dl_space_total);
+       if (!ret)
+               dli->dl_space_used += nr;
+       spin_unlock(&dli->dl_lock);
+       put_dl_info(dli);
+out:
+       if (vx_debug_dlimit)
+               printk("ALLOC (%p,#%d)%c %lld bytes (%d)@ %s:%d\n",
+                       sb, xid, __dlimit_char(dli), nr, ret, file, line);
+       return ret;
+}
+
+static inline void __dl_free_space(struct super_block *sb,
+       xid_t xid, dlsize_t nr, const char *file, int line)
+{
+       struct dl_info *dli = NULL;
+
+       if (nr == 0)
+               goto out;
+       dli = locate_dl_info(sb, xid);
+       if (!dli)
+               goto out;
+
+       spin_lock(&dli->dl_lock);
+       dli->dl_space_used -= nr;
+       spin_unlock(&dli->dl_lock);
+       put_dl_info(dli);
+out:
+       if (vx_debug_dlimit)
+               printk("FREE  (%p,#%d)%c %lld bytes @ %s:%d\n",
+                       sb, xid, __dlimit_char(dli), nr, file, line);
+}
+
+static inline int __dl_alloc_inode(struct super_block *sb,
+       xid_t xid, const char *file, int line)
+{
+       struct dl_info *dli;
+       int ret = 0;
+
+       dli = locate_dl_info(sb, xid);
+       if (!dli)
+               goto out;
+
+       spin_lock(&dli->dl_lock);
+       ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
+       if (!ret)
+               dli->dl_inodes_used++;
+       spin_unlock(&dli->dl_lock);
+       put_dl_info(dli);
+out:
+       if (vx_debug_dlimit)
+               printk("ALLOC (%p,#%d)%c inode (%d)@ %s:%d\n",
+                       sb, xid, __dlimit_char(dli), ret, file, line);
+       return ret;
+}
+
+static inline void __dl_free_inode(struct super_block *sb,
+       xid_t xid, const char *file, int line)
+{
+       struct dl_info *dli;
+
+       dli = locate_dl_info(sb, xid);
+       if (!dli)
+               goto out;
+
+       spin_lock(&dli->dl_lock);
+       dli->dl_inodes_used--;
+       spin_unlock(&dli->dl_lock);
+       put_dl_info(dli);
+out:
+       if (vx_debug_dlimit)
+               printk("FREE  (%p,#%d)%c inode @ %s:%d\n",
+                       sb, xid, __dlimit_char(dli), file, line);
+}
+
+
+
+#define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \
+       __dl_alloc_space(sb, xid, \
+               ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+               __FILE__, __LINE__ )
+
+#define DLIMIT_FREE_BLOCK(sb, xid, nr) \
+       __dl_free_space(sb, xid, \
+               ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+               __FILE__, __LINE__ )
+
+#define DLIMIT_ALLOC_INODE(sb, xid) \
+       __dl_alloc_inode(sb, xid, __FILE__, __LINE__ )
+
+#define DLIMIT_FREE_INODE(sb, xid) \
+       __dl_free_inode(sb, xid, __FILE__, __LINE__ )
+
+
+#define        DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_limit.h b/include/linux/vs_limit.h
new file mode 100644 (file)
index 0000000..82e8de4
--- /dev/null
@@ -0,0 +1,119 @@
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+/* file limits */
+
+#define VX_DEBUG_ACC_FILE      0
+#define VX_DEBUG_ACC_OPENFD    0
+
+#if    (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_acc_cres(v,d,r) \
+       __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
+
+static inline void __vx_acc_cres(struct vx_info *vxi,
+       int res, int dir, char *file, int line)
+{
+        if (vxi) {
+       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+                       (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+       printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       (dir>0)?"++":"--", file, line);
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.rcur[res]);
+                else
+                        atomic_dec(&vxi->limit.rcur[res]);
+        }
+}
+
+#define vx_nproc_inc(p)        vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC)
+#define vx_nproc_dec(p)        vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC)
+
+#define vx_files_inc(f)        vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
+#define vx_files_dec(f)        vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+
+#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
+#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+
+/*
+#define vx_openfd_inc(f) do {                                  \
+       vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD);        \
+       printk("vx_openfd_inc: %d[#%d] in %s:%d\n",             \
+               f, current->xid, __FILE__, __LINE__);           \
+       } while (0)
+
+#define vx_openfd_dec(f) do {                                  \
+       vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD);        \
+       printk("vx_openfd_dec: %d[#%d] in %s:%d\n",             \
+               f, current->xid, __FILE__, __LINE__);           \
+       } while (0)
+*/
+
+#define vx_cres_avail(v,n,r) \
+        __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+
+static inline int __vx_cres_avail(struct vx_info *vxi,
+                int res, int num, char *file, int line)
+{
+       unsigned long value;
+
+       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+               (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+                printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                       (vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       num, file, line);
+        if (!vxi)
+                return 1;
+       value = atomic_read(&vxi->limit.rcur[res]);     
+       if (value > vxi->limit.rmax[res])
+               vxi->limit.rmax[res] = value;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (value + num <= vxi->limit.rlim[res])
+                return 1;
+       atomic_inc(&vxi->limit.lhit[res]);
+        return 0;
+}
+
+#define vx_nproc_avail(n) \
+       vx_cres_avail(current->vx_info, (n), RLIMIT_NPROC)
+
+#define vx_files_avail(n) \
+       vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
+
+#define vx_openfd_avail(n) \
+       vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
+
+
+/* socket limits */
+
+#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
+#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+
+#define vx_sock_avail(n) \
+       vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_memory.h b/include/linux/vs_memory.h
new file mode 100644 (file)
index 0000000..2fe9c08
--- /dev/null
@@ -0,0 +1,132 @@
+#ifndef _VX_VS_MEMORY_H
+#define _VX_VS_MEMORY_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+#define VX_DEBUG_ACC_RSS   0
+#define VX_DEBUG_ACC_VM    0
+#define VX_DEBUG_ACC_VML   0
+
+#if    (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+#define vx_acc_page(m, d, v, r) \
+       __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
+
+static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
+                int res, int dir, char *file, int line)
+{
+        if (v) {
+                if (dir > 0)
+                        ++(*v);
+                else
+                        --(*v);
+        }
+        if (vxi) {
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.rcur[res]);
+                else
+                        atomic_dec(&vxi->limit.rcur[res]);
+        }
+}
+
+
+#define vx_acc_pages(m, p, v, r) \
+       __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
+
+static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+               (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+               (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+               vxdprintk("vx_acc_pages  [%5d,%2d]: %5d += %5d in %s:%d\n",
+                       (vxi?vxi->vx_id:-1), res,
+                       (vxi?atomic_read(&vxi->limit.res[res]):0),
+                       pages, file, line);
+        if (pages == 0)
+                return;
+        if (v)
+                *v += pages;
+        if (vxi)
+                atomic_add(pages, &vxi->limit.rcur[res]);
+}
+
+
+
+#define vx_acc_vmpage(m,d)     vx_acc_page(m, d, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpage(m,d)    vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d)    vx_acc_page(m, d, rss,      RLIMIT_RSS)
+
+#define vx_acc_vmpages(m,p)    vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpages(m,p)   vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p)   vx_acc_pages(m, p, rss,       RLIMIT_RSS)
+
+#define vx_pages_add(s,r,p)    __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p)    __vx_pages_add(s, r, -(p))
+
+#define vx_vmpages_inc(m)      vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m)      vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p)    vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p)    vx_acc_vmpages(m,-(p))
+
+#define vx_vmlocked_inc(m)     vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m)     vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p)   vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p)   vx_acc_vmlpages(m,-(p))
+
+#define vx_rsspages_inc(m)     vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m)     vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p)   vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p)   vx_acc_rsspages(m,-(p))
+
+
+
+#define vx_pages_avail(m, p, r) \
+        __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+
+static inline int __vx_pages_avail(struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+       unsigned long value;
+
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+                (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+                (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+                printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                       (vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       pages, file, line);
+        if (!vxi)
+                return 1;
+       value = atomic_read(&vxi->limit.rcur[res]);     
+       if (value > vxi->limit.rmax[res])
+               vxi->limit.rmax[res] = value;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (value + pages <= vxi->limit.rlim[res])
+                return 1;
+       atomic_inc(&vxi->limit.lhit[res]);
+        return 0;
+}
+
+#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_network.h b/include/linux/vs_network.h
new file mode 100644 (file)
index 0000000..0a3349c
--- /dev/null
@@ -0,0 +1,154 @@
+#ifndef _NX_VS_NETWORK_H
+#define _NX_VS_NETWORK_H
+
+
+// #define NX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/network.h"
+
+#if defined(NX_DEBUG)
+#define nxdprintk(x...) printk("nxd: " x)
+#else
+#define nxdprintk(x...)
+#endif
+
+
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
+#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
+
+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
+       const char *_file, int _line)
+{
+       if (!nxi)
+               return NULL;
+       nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+               _file, _line);
+       atomic_inc(&nxi->nx_usecnt);
+       return nxi;
+}
+
+
+#define        free_nx_info(nxi)       \
+       call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi);
+
+#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
+
+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+       if (!nxi)
+               return;
+       nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+               _file, _line);
+       if (atomic_dec_and_test(&nxi->nx_usecnt))
+               free_nx_info(nxi);
+}
+
+
+#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
+       const char *_file, int _line)
+{
+       BUG_ON(*nxp);
+       if (!nxi)
+               return;
+       nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0,
+               nxi?atomic_read(&nxi->nx_usecnt):0,
+               nxi?atomic_read(&nxi->nx_refcnt):0,
+               _file, _line);
+       atomic_inc(&nxi->nx_refcnt);
+       *nxp = __get_nx_info(nxi, _file, _line);
+}
+
+#define        clr_nx_info(p)  __clr_nx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_nx_info(struct nx_info **nxp,
+       const char *_file, int _line)
+{
+       struct nx_info *nxo = *nxp;
+
+       if (!nxo)
+               return;
+       nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               nxo, nxo?nxo->nx_id:0,
+               nxo?atomic_read(&nxo->nx_usecnt):0,
+               nxo?atomic_read(&nxo->nx_refcnt):0,
+               _file, _line);
+       *nxp = NULL;
+       wmb();
+       if (nxo && atomic_dec_and_test(&nxo->nx_refcnt))
+               unhash_nx_info(nxo);
+       __put_nx_info(nxo, _file, _line);
+}
+
+
+#define task_get_nx_info(i)    __task_get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
+       const char *_file, int _line)
+{
+       struct nx_info *nxi;
+       
+       task_lock(p);
+       nxi = __get_nx_info(p->nx_info, _file, _line);
+       task_unlock(p);
+       return nxi;
+}
+
+#define nx_verify_info(p,i)    \
+       __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __nx_verify_info(
+       struct nx_info *ipa, struct nx_info *ipb,
+       const char *_file, int _line)
+{
+       if (ipa == ipb)
+               return;
+       printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n",
+               ipa, ipb, _file, _line);
+}
+
+
+#define nx_task_nid(t) ((t)->nid)
+
+#define nx_current_nid() nx_task_nid(current)
+
+#define nx_check(c,m)  __nx_check(nx_current_nid(),c,m)
+
+#define nx_weak_check(c,m)     ((m) ? nx_check(c,m) : 1)
+
+#undef nxdprintk
+#define nxdprintk(x...)
+
+
+#define __nx_flags(v,m,f)      (((v) & (m)) ^ (f))
+
+#define        __nx_task_flags(t,m,f) \
+       (((t) && ((t)->nx_info)) ? \
+               __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0)
+
+#define nx_current_flags() \
+       ((current->nx_info) ? current->nx_info->nx_flags : 0)
+
+#define nx_flags(m,f)  __nx_flags(nx_current_flags(),(m),(f))
+
+
+#define nx_current_ncaps() \
+       ((current->nx_info) ? current->nx_info->nx_ncaps : 0)
+
+#define nx_ncaps(c)    (nx_current_ncaps() & (c))
+
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_socket.h b/include/linux/vs_socket.h
new file mode 100644 (file)
index 0000000..4992458
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/network.h"
+
+
+/* socket accounting */
+
+#include <linux/socket.h>
+
+static inline int vx_sock_type(int family)
+{
+       int type = 4;
+
+       if (family > 0 && family < 3)
+               type = family;
+       else if (family == PF_INET6)
+               type = 3;
+       return type;
+}
+
+#define vx_acc_sock(v,f,p,s) \
+       __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
+
+static inline void __vx_acc_sock(struct vx_info *vxi,
+       int family, int pos, int size, char *file, int line)
+{
+        if (vxi) {
+               int type = vx_sock_type(family);
+
+               atomic_inc(&vxi->cacct.sock[type][pos].count);
+               atomic_add(size, &vxi->cacct.sock[type][pos].total);
+        }
+}
+
+#define vx_sock_recv(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
+#define vx_sock_send(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
+#define vx_sock_fail(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
+
+
+#define        sock_vx_init(s)  do {           \
+       (s)->sk_xid = 0;                \
+       (s)->sk_vx_info = NULL;         \
+       } while (0)
+
+#define        sock_nx_init(s)  do {           \
+       (s)->sk_nid = 0;                \
+       (s)->sk_nx_info = NULL;         \
+       } while (0)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vserver/dlimit.h b/include/linux/vserver/dlimit.h
new file mode 100644 (file)
index 0000000..74872ed
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef _VX_DLIMIT_H
+#define _VX_DLIMIT_H
+
+#include "switch.h"
+#include <linux/spinlock.h>
+
+/*  inode vserver commands */
+
+#define VCMD_add_dlimit                VC_CMD(DLIMIT, 1, 0)
+#define VCMD_rem_dlimit                VC_CMD(DLIMIT, 2, 0)
+
+#define VCMD_set_dlimit                VC_CMD(DLIMIT, 5, 0)
+#define VCMD_get_dlimit                VC_CMD(DLIMIT, 6, 0)
+
+
+struct  vcmd_ctx_dlimit_base_v0 {
+       const char __user *name;
+       uint32_t flags;
+};
+
+struct  vcmd_ctx_dlimit_v0 {
+       const char __user *name;
+       uint32_t space_used;                    /* used space in kbytes */
+       uint32_t space_total;                   /* maximum space in kbytes */
+       uint32_t inodes_used;                   /* used inodes */
+       uint32_t inodes_total;                  /* maximum inodes */
+       uint32_t reserved;                      /* reserved for root in % */
+       uint32_t flags;
+};
+
+#define CDLIM_UNSET             (0ULL)
+#define CDLIM_INFINITY          (~0ULL)
+#define CDLIM_KEEP              (~1ULL)
+
+
+#ifdef __KERNEL__
+
+struct super_block;
+
+struct dl_info {
+       struct hlist_node dl_hlist;             /* linked list of contexts */
+       struct rcu_head dl_rcu;                 /* the rcu head */
+       xid_t dl_xid;                           /* context id */
+       atomic_t dl_usecnt;                     /* usage count */
+       atomic_t dl_refcnt;                     /* reference count */
+
+       struct super_block *dl_sb;              /* associated superblock */
+
+//     struct rw_semaphore dl_sem;             /* protect the values */
+       spinlock_t dl_lock;                     /* protect the values */
+
+       uint64_t dl_space_used;                 /* used space in bytes */
+       uint64_t dl_space_total;                /* maximum space in bytes */
+       uint32_t dl_inodes_used;                /* used inodes */
+       uint32_t dl_inodes_total;               /* maximum inodes */
+
+       unsigned int dl_nrlmult;                /* non root limit mult */
+};
+
+extern void rcu_free_dl_info(void *);
+extern void unhash_dl_info(struct dl_info *);
+
+extern struct dl_info *locate_dl_info(struct super_block *, xid_t);
+
+
+struct kstatfs;
+
+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
+
+
+extern int vc_add_dlimit(uint32_t, void __user *);
+extern int vc_rem_dlimit(uint32_t, void __user *);
+
+extern int vc_set_dlimit(uint32_t, void __user *);
+extern int vc_get_dlimit(uint32_t, void __user *);
+
+
+typedef        uint64_t dlsize_t;
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_DLIMIT_H */
diff --git a/kernel/vserver/dlimit.c b/kernel/vserver/dlimit.c
new file mode 100644 (file)
index 0000000..eb9282f
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ *  linux/kernel/vserver/dlimit.c
+ *
+ *  Virtual Server: Context Disk Limits
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  initial version
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/vserver/switch.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_dlimit.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+/*     __alloc_dl_info()
+
+       * allocate an initialized dl_info struct
+       * doesn't make it visible (hash)                        */
+
+static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid)
+{
+       struct dl_info *new = NULL;
+       
+       vxdprintk("alloc_dl_info(%p,%d)\n", sb, xid);
+
+       /* would this benefit from a slab cache? */
+       new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
+       if (!new)
+               return 0;
+
+       memset (new, 0, sizeof(struct dl_info));
+       new->dl_xid = xid;
+       new->dl_sb = sb;
+       INIT_RCU_HEAD(&new->dl_rcu);
+       INIT_HLIST_NODE(&new->dl_hlist);
+       spin_lock_init(&new->dl_lock);
+       atomic_set(&new->dl_refcnt, 0);
+       atomic_set(&new->dl_usecnt, 0);
+
+       /* rest of init goes here */
+
+       vxdprintk("alloc_dl_info(%p,%d) = %p\n", sb, xid, new);
+       return new;
+}
+
+/*     __dealloc_dl_info()
+
+       * final disposal of dl_info                             */
+
+static void __dealloc_dl_info(struct dl_info *dli)
+{
+       vxdprintk("dealloc_dl_info(%p)\n", dli);
+
+       dli->dl_hlist.next = LIST_POISON1;
+       dli->dl_xid = -1;
+       dli->dl_sb = 0;
+
+       BUG_ON(atomic_read(&dli->dl_usecnt));
+       BUG_ON(atomic_read(&dli->dl_refcnt));
+
+       kfree(dli);
+}
+
+
+/*     hash table for dl_info hash */
+
+#define        DL_HASH_SIZE    13
+
+struct hlist_head dl_info_hash[DL_HASH_SIZE];
+
+static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
+
+
+static inline unsigned int __hashval(struct super_block *sb, xid_t xid)
+{
+       return ((xid ^ (unsigned int)sb) % DL_HASH_SIZE);
+}
+
+
+
+/*     __hash_dl_info()
+
+       * add the dli to the global hash table
+       * requires the hash_lock to be held                     */
+
+static inline void __hash_dl_info(struct dl_info *dli)
+{
+       struct hlist_head *head;
+       
+       vxdprintk("__hash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+       get_dl_info(dli);
+       head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)];
+       hlist_add_head_rcu(&dli->dl_hlist, head);
+}
+
+/*     __unhash_dl_info()
+
+       * remove the dli from the global hash table
+       * requires the hash_lock to be held                     */
+
+static inline void __unhash_dl_info(struct dl_info *dli)
+{
+       vxdprintk("__unhash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+       hlist_del_rcu(&dli->dl_hlist);
+       put_dl_info(dli);
+}
+
+
+#define hlist_for_each_rcu(pos, head) \
+       for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+               pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+
+/*     __lookup_dl_info()
+
+       * requires the rcu_read_lock()
+       * doesn't increment the dl_refcnt                       */
+
+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid)
+{
+       struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)];
+       struct hlist_node *pos;
+
+       hlist_for_each_rcu(pos, head) {
+               struct dl_info *dli =
+                       hlist_entry(pos, struct dl_info, dl_hlist);
+
+               if (dli->dl_xid == xid && dli->dl_sb == sb) {
+                       return dli;
+               }
+       }
+       return NULL;
+}
+
+
+struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid)
+{
+        struct dl_info *dli;
+
+       rcu_read_lock();
+       dli = get_dl_info(__lookup_dl_info(sb, xid));
+       rcu_read_unlock();
+        return dli;
+}
+
+void rcu_free_dl_info(void *obj)
+{
+       struct dl_info *dli = obj;
+       int usecnt, refcnt;
+
+       BUG_ON(!dli);
+
+       usecnt = atomic_read(&dli->dl_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&dli->dl_refcnt);
+       BUG_ON(refcnt < 0);
+
+       if (!usecnt)
+               __dealloc_dl_info(dli);
+       else
+               printk("!!! rcu didn't free\n");
+}
+
+
+
+
+int vc_add_dlimit(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_dlimit_base_v0 vc_data;
+       int ret;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               struct super_block *sb;
+               struct dl_info *dli;
+
+               ret = -EINVAL;
+               if (!nd.dentry->d_inode)
+                       goto out_release;
+               if (!(sb = nd.dentry->d_inode->i_sb))
+                       goto out_release;       
+               
+               dli = __alloc_dl_info(sb, id);
+               spin_lock(&dl_info_hash_lock);          
+
+               ret = -EEXIST;
+               if (__lookup_dl_info(sb, id))
+                       goto out_unlock;        
+               __hash_dl_info(dli);
+               dli = NULL;
+               ret = 0;
+
+       out_unlock:
+               spin_unlock(&dl_info_hash_lock);                
+               if (dli)
+                       __dealloc_dl_info(dli);
+       out_release:
+               path_release(&nd);
+       }
+       return ret;
+}
+
+
+int vc_rem_dlimit(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_dlimit_base_v0 vc_data;
+       int ret;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               struct super_block *sb;
+               struct dl_info *dli;
+
+               ret = -EINVAL;
+               if (!nd.dentry->d_inode)
+                       goto out_release;
+               if (!(sb = nd.dentry->d_inode->i_sb))
+                       goto out_release;       
+               
+               spin_lock(&dl_info_hash_lock);          
+               dli = __lookup_dl_info(sb, id);
+
+               ret = -ESRCH;
+               if (!dli)
+                       goto out_unlock;
+               
+               __unhash_dl_info(dli);
+               ret = 0;
+
+       out_unlock:
+               spin_unlock(&dl_info_hash_lock);                
+       out_release:
+               path_release(&nd);
+       }
+       return ret;
+}
+
+
+int vc_set_dlimit(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_dlimit_v0 vc_data;
+       int ret;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               struct super_block *sb;
+               struct dl_info *dli;
+
+               ret = -EINVAL;
+               if (!nd.dentry->d_inode)
+                       goto out_release;
+               if (!(sb = nd.dentry->d_inode->i_sb))
+                       goto out_release;       
+               if (vc_data.reserved > 100 ||
+                       vc_data.inodes_used > vc_data.inodes_total ||
+                       vc_data.space_used > vc_data.space_total)
+                       goto out_release;
+
+               ret = -ESRCH;
+               dli = locate_dl_info(sb, id);
+               if (!dli)
+                       goto out_release;
+
+               spin_lock(&dli->dl_lock);               
+
+               if (vc_data.inodes_used != (uint32_t)CDLIM_KEEP)
+                       dli->dl_inodes_used = vc_data.inodes_used;
+               if (vc_data.inodes_total != (uint32_t)CDLIM_KEEP)
+                       dli->dl_inodes_total = vc_data.inodes_total;
+               if (vc_data.space_used != (uint32_t)CDLIM_KEEP) {
+                       dli->dl_space_used = vc_data.space_used;
+                       dli->dl_space_used <<= 10;
+               }
+               if (vc_data.space_total == (uint32_t)CDLIM_INFINITY)
+                       dli->dl_space_total = (uint64_t)CDLIM_INFINITY;
+               else if (vc_data.space_total != (uint32_t)CDLIM_KEEP) {
+                       dli->dl_space_total = vc_data.space_total;
+                       dli->dl_space_total <<= 10;
+               }
+               if (vc_data.reserved != (uint32_t)CDLIM_KEEP)
+                       dli->dl_nrlmult = (1 << 10) * (100 - vc_data.reserved) / 100;
+
+               spin_unlock(&dli->dl_lock);             
+               
+               put_dl_info(dli);
+               ret = 0;
+
+       out_release:
+               path_release(&nd);
+       }
+       return ret;
+}
+
+int vc_get_dlimit(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_dlimit_v0 vc_data;
+       int ret;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               struct super_block *sb;
+               struct dl_info *dli;
+
+               ret = -EINVAL;
+               if (!nd.dentry->d_inode)
+                       goto out_release;
+               if (!(sb = nd.dentry->d_inode->i_sb))
+                       goto out_release;       
+               if (vc_data.reserved > 100 ||
+                       vc_data.inodes_used > vc_data.inodes_total ||
+                       vc_data.space_used > vc_data.space_total)
+                       goto out_release;
+
+               ret = -ESRCH;
+               dli = locate_dl_info(sb, id);
+               if (!dli)
+                       goto out_release;
+
+               spin_lock(&dli->dl_lock);               
+               vc_data.inodes_used = dli->dl_inodes_used;
+               vc_data.inodes_total = dli->dl_inodes_total;
+               vc_data.space_used = dli->dl_space_used >> 10;
+               if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+                       vc_data.space_total = (uint32_t)CDLIM_INFINITY;
+               else
+                       vc_data.space_total = dli->dl_space_total >> 10;
+
+               vc_data.reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
+               spin_unlock(&dli->dl_lock);             
+               
+               put_dl_info(dli);
+               ret = -EFAULT;
+               if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+                       goto out_release;
+
+               ret = 0;
+       out_release:
+               path_release(&nd);
+       }
+       return ret;
+}
+
+
+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+       struct dl_info *dli;
+        __u64 blimit, bfree, bavail;
+        __u32 ifree;
+               
+       dli = locate_dl_info(sb, current->xid);
+       if (!dli)
+               return;
+
+       spin_lock(&dli->dl_lock);
+       if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY)
+               goto no_ilim;
+
+       /* reduce max inodes available to limit */
+       if (buf->f_files > dli->dl_inodes_total)
+               buf->f_files = dli->dl_inodes_total;
+
+       ifree = dli->dl_inodes_total - dli->dl_inodes_used;
+       /* reduce free inodes to min */
+       if (ifree < buf->f_ffree)
+               buf->f_ffree = ifree;
+
+no_ilim:
+       if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+               goto no_blim;
+
+       blimit = dli->dl_space_total >> sb->s_blocksize_bits;
+
+       if (dli->dl_space_total < dli->dl_space_used)
+               bfree = 0;
+       else
+               bfree = (dli->dl_space_total - dli->dl_space_used)
+                       >> sb->s_blocksize_bits;
+
+       bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
+       if (bavail < dli->dl_space_used)
+               bavail = 0;
+       else
+               bavail = (bavail - dli->dl_space_used)
+                       >> sb->s_blocksize_bits;
+
+       /* reduce max space available to limit */
+       if (buf->f_blocks > blimit)
+               buf->f_blocks = blimit;
+
+       /* reduce free space to min */
+       if (bfree < buf->f_bfree)
+               buf->f_bfree = bfree;
+
+       /* reduce avail space to min */
+       if (bavail < buf->f_bavail)
+               buf->f_bavail = bavail;
+
+no_blim:
+       spin_unlock(&dli->dl_lock);
+       put_dl_info(dli);
+       
+       return; 
+}
+
diff --git a/kernel/vserver/helper.c b/kernel/vserver/helper.c
new file mode 100644 (file)
index 0000000..880b843
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ *  linux/kernel/vserver/helper.c
+ *
+ *  Virtual Context Support
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  basic helper
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/kmod.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+char vshelper_path[255] = "/sbin/vshelper";
+
+
+/*
+ *      vshelper path is set via /proc/sys
+ *      invoked by vserver sys_reboot(), with
+ *      the following arguments
+ *
+ *      argv [0] = vshelper_path;
+ *      argv [1] = action: "restart", "halt", "poweroff", ...
+ *      argv [2] = context identifier
+ *      argv [3] = additional argument (restart2)
+ *
+ *      envp [*] = type-specific parameters
+ */
+
+long vs_reboot(unsigned int cmd, void * arg)
+{
+       char id_buf[8], cmd_buf[32];
+       char uid_buf[32], pid_buf[32];
+       char buffer[256];
+
+       char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+       char *envp[] = {"HOME=/", "TERM=linux",
+                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+                       uid_buf, pid_buf, cmd_buf, 0};
+
+       snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+
+       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
+       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
+
+       switch (cmd) {
+       case LINUX_REBOOT_CMD_RESTART:
+               argv[1] = "restart";
+               break;  
+
+       case LINUX_REBOOT_CMD_HALT:
+               argv[1] = "halt";
+               break;  
+
+       case LINUX_REBOOT_CMD_POWER_OFF:
+               argv[1] = "poweroff";
+               break;  
+
+       case LINUX_REBOOT_CMD_SW_SUSPEND:
+               argv[1] = "swsusp";
+               break;  
+
+       case LINUX_REBOOT_CMD_RESTART2:
+               if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
+                       return -EFAULT;
+               argv[3] = buffer;
+       default:
+               argv[1] = "restart2";
+               break;  
+       }
+
+       /* maybe we should wait ? */
+       if (call_usermodehelper(*argv, argv, envp, 0)) {
+               printk( KERN_WARNING
+                       "vs_reboot(): failed to exec (%s %s %s %s)\n",
+                       vshelper_path, argv[1], argv[2], argv[3]);
+               return -EPERM;
+       }
+       return 0;
+}
+