upgrade to vserver 1.9.3.17
authorMarc Fiuczynski <mef@cs.princeton.edu>
Fri, 11 Feb 2005 05:34:41 +0000 (05:34 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Fri, 11 Feb 2005 05:34:41 +0000 (05:34 +0000)
135 files changed:
arch/i386/Kconfig
arch/i386/kernel/traps.c
arch/ia64/kernel/perfmon.c
arch/ia64/mm/fault.c
arch/mips/kernel/syscall.c
arch/ppc64/mm/hugetlbpage.c
arch/sh64/mm/hugetlbpage.c
arch/um/kernel/process_kern.c
fs/attr.c
fs/devpts/inode.c
fs/exec.c
fs/ext2/balloc.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext2/namei.c
fs/ext3/balloc.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/file_table.c
fs/inode.c
fs/ioctl.c
fs/jfs/jfs_imap.c
fs/namei.c
fs/namespace.c
fs/nfs/dir.c
fs/nfs/inode.c
fs/open.c
fs/proc/array.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/proc_misc.c
fs/reiserfs/namei.c
fs/super.c
fs/sysfs/mount.c
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/xfs_dinode.h
fs/xfs/xfs_fs.h
include/asm-i386/param.h
include/asm-parisc/unistd.h
include/asm-sparc64/tlb.h
include/linux/devpts_fs.h
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/fs.h
include/linux/mount.h
include/linux/reiserfs_fs_sb.h
include/linux/sched.h
include/linux/sysctl.h
include/linux/sysfs.h
include/linux/vs_base.h
include/linux/vs_context.h
include/linux/vs_cvirt.h
include/linux/vs_dlimit.h
include/linux/vs_limit.h
include/linux/vs_memory.h
include/linux/vs_network.h
include/linux/vs_sched.h [new file with mode: 0644]
include/linux/vs_socket.h
include/linux/vserver.h [deleted file]
include/linux/vserver/context.h
include/linux/vserver/context_cmd.h [new file with mode: 0644]
include/linux/vserver/cvirt.h
include/linux/vserver/cvirt_cmd.h [new file with mode: 0644]
include/linux/vserver/cvirt_def.h [new file with mode: 0644]
include/linux/vserver/debug.h
include/linux/vserver/debug_cmd.h [new file with mode: 0644]
include/linux/vserver/dlimit.h
include/linux/vserver/inode.h
include/linux/vserver/legacy.h
include/linux/vserver/limit.h
include/linux/vserver/limit_cmd.h [new file with mode: 0644]
include/linux/vserver/limit_def.h [new file with mode: 0644]
include/linux/vserver/namespace.h
include/linux/vserver/network.h
include/linux/vserver/network_cmd.h [new file with mode: 0644]
include/linux/vserver/sched.h
include/linux/vserver/sched_cmd.h [new file with mode: 0644]
include/linux/vserver/sched_def.h [new file with mode: 0644]
include/linux/vserver/switch.h
include/linux/vserver/xid.h
include/net/route.h
ipc/msg.c
ipc/sem.c
ipc/shm.c
kernel/Makefile
kernel/capability.c
kernel/exit.c
kernel/fork.c
kernel/posix-timers.c
kernel/printk.c
kernel/sched.c
kernel/signal.c
kernel/sys.c
kernel/vserver/Kconfig
kernel/vserver/Makefile
kernel/vserver/context.c
kernel/vserver/cvirt.c
kernel/vserver/cvirt_init.h [new file with mode: 0644]
kernel/vserver/cvirt_proc.h [new file with mode: 0644]
kernel/vserver/dlimit.c
kernel/vserver/helper.c
kernel/vserver/init.c
kernel/vserver/inode.c
kernel/vserver/legacy.c
kernel/vserver/limit.c
kernel/vserver/limit_init.h [new file with mode: 0644]
kernel/vserver/limit_proc.h [new file with mode: 0644]
kernel/vserver/namespace.c
kernel/vserver/network.c
kernel/vserver/proc.c
kernel/vserver/sched.c
kernel/vserver/sched_init.h [new file with mode: 0644]
kernel/vserver/sched_proc.h [new file with mode: 0644]
kernel/vserver/signal.c
kernel/vserver/switch.c
kernel/vserver/sysctl.c
mm/fremap.c
mm/memory.c
mm/mlock.c
mm/mmap.c
mm/mremap.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/swapfile.c
mm/vmscan.c
net/core/dev.c
net/core/rtnetlink.c
net/socket.c
net/sunrpc/auth.c
net/sunrpc/auth_unix.c
security/security.c

index 6880e96..1e4f78c 100644 (file)
@@ -330,14 +330,6 @@ config MVIAC3_2
 
 endchoice
 
-config X86_HZ
-       int "Clock Tick Rate"
-       default 1000 if !(M386 || M486 || M586 || M586TSC || M586MMX)   
-       default 100 if (M386 || M486 || M586 || M586TSC || M586MMX)     
-       help
-         Select the kernel clock tick rate in interrupts per second.
-         Slower processors should choose 100; everything else 1000.
-
 config X86_GENERIC
        bool "Generic x86 support"
        help
@@ -561,6 +553,14 @@ config X86_IO_APIC
        depends on !SMP && X86_UP_IOAPIC
        default y
 
+config KERNEL_HZ
+       int "Timer Frequency (100-20000)"
+       range 100 20000
+       default "1000"
+       help
+         This allows you to specify the frequency at which the
+         kernel timer interrupt will occur.
+
 config X86_TSC
        bool
        depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
index 2c4351d..adeaef6 100644 (file)
@@ -53,6 +53,7 @@
 
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/vserver/debug.h>
 
 #include "mach_traps.h"
 
@@ -306,6 +307,7 @@ void die(const char * str, struct pt_regs * regs, long err)
        };
        static int die_counter;
 
+       vxh_throw_oops();
        if (die.lock_owner != smp_processor_id()) {
                console_verbose();
                spin_lock_irq(&die.lock);
@@ -341,6 +343,7 @@ void die(const char * str, struct pt_regs * regs, long err)
        bust_spinlocks(0);
        die.lock_owner = -1;
        spin_unlock_irq(&die.lock);
+       vxh_dump_history();
        if (in_interrupt())
                panic("Fatal exception in interrupt");
 
index e3e7077..82d0682 100644 (file)
@@ -41,6 +41,8 @@
 #include <linux/vs_memory.h>
 #include <linux/vs_cvirt.h>
 #include <linux/bitops.h>
+#include <linux/vs_memory.h>
+#include <linux/vs_cvirt.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
index 25da1d4..8dce894 100644 (file)
@@ -44,10 +44,9 @@ expand_backing_store (struct vm_area_struct *vma, unsigned long address)
        vma->vm_end += PAGE_SIZE;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED) {
+       if (vma->vm_flags & VM_LOCKED)
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
-       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
index 84e2ee6..5d8ec65 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/shm.h>
 #include <linux/vs_cvirt.h>
 #include <linux/compiler.h>
+#include <linux/vs_cvirt.h>
 
 #include <asm/branch.h>
 #include <asm/cachectl.h>
index 0c96be5..e9ecc72 100644 (file)
@@ -154,7 +154,7 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        pte_t entry;
 
        // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-       vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
        if (write_access) {
                entry =
                    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -422,7 +422,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
 
                put_page(page);
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_pending();
 }
 
index 50b2573..edbbc43 100644 (file)
@@ -62,8 +62,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long i;
        pte_t entry;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
        if (write_access)
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
                                                       vma->vm_page_prot)));
@@ -115,7 +115,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        pte_val(entry) += PAGE_SIZE;
                        dst_pte++;
                }
-               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -206,7 +207,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                        pte++;
                }
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_range(vma, start, end);
 }
 
index b701cb2..bae4b73 100644 (file)
@@ -22,6 +22,8 @@
 #include "linux/vs_cvirt.h"
 #include "linux/proc_fs.h"
 #include "linux/ptrace.h"
+#include "linux/vs_cvirt.h"
+
 #include "asm/unistd.h"
 #include "asm/mman.h"
 #include "asm/segment.h"
index 5f78d75..fed1192 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,9 +14,9 @@
 #include <linux/fcntl.h>
 #include <linux/quotaops.h>
 #include <linux/security.h>
-#include <linux/vs_base.h>
 #include <linux/proc_fs.h>
 #include <linux/devpts_fs.h>
+#include <linux/vserver/debug.h>
 
 /* Taken over from the old code... */
 
@@ -64,22 +64,19 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
                goto fine;
 
        if (IS_BARRIER(inode)) {
-               printk(KERN_WARNING
-                       "VSW: xid=%d messing with the barrier.\n",
+               vxwprintk(1, "xid=%d messing with the barrier.",
                        vx_current_xid());
                goto error;
        }
        switch (inode->i_sb->s_magic) {
                case PROC_SUPER_MAGIC:
-                       printk(KERN_WARNING
-                               "VSW: xid=%d messing with the procfs.\n",
+                       vxwprintk(1, "xid=%d messing with the procfs.",
                                vx_current_xid());
                        goto error;
                case DEVPTS_SUPER_MAGIC:
                        if (vx_check(inode->i_xid, VX_IDENT))
                                goto fine;
-                       printk(KERN_WARNING
-                               "VSW: xid=%d messing with the devpts.\n",
+                       vxwprintk(1, "xid=%d messing with the devpts.",
                                vx_current_xid());
                        goto error;
        }
index 6fb3d1f..004d7ac 100644 (file)
@@ -32,6 +32,25 @@ static struct xattr_handler *devpts_xattr_handlers[] = {
        NULL
 };
 
+static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+       int ret = -EACCES;
+
+       if (vx_check(inode->i_xid, VX_IDENT))
+               ret = generic_permission(inode, mask, NULL);
+       return ret;
+}
+
+struct inode_operations devpts_file_inode_operations = {
+#ifdef CONFIG_DEVPTS_FS_XATTR
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = generic_listxattr,
+       .removexattr    = generic_removexattr,
+#endif
+       .permission     = devpts_permission,
+};
+
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
 
@@ -208,26 +227,6 @@ static struct dentry *get_node(int num)
        return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
 
-#ifdef CONFIG_DEVPTS_FS_XATTR
-static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-       int ret = -EACCES;
-
-       if (vx_check(inode->i_xid, VX_IDENT))
-               ret = generic_permission(inode, mask, NULL);
-       return ret;
-}
-#endif
-
-struct inode_operations devpts_file_inode_operations = {
-#ifdef CONFIG_DEVPTS_FS_XATTR
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
-       .permission     = devpts_permission,
-#endif
-};
 
 int devpts_pty_new(struct tty_struct *tty)
 {
index b9888ba..b8b650a 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -48,8 +48,8 @@
 #include <linux/syscalls.h>
 #include <linux/rmap.h>
 #include <linux/ckrm.h>
-#include <linux/vs_memory.h>
 #include <linux/ckrm_mem.h>
+#include <linux/vs_memory.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
index 5fbe1ca..2aa5850 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/quotaops.h>
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
-#include <linux/vs_base.h>
 #include <linux/vs_dlimit.h>
 
 /*
index 3d9fa57..3272b02 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/random.h>
-#include <linux/vs_base.h>
+
 #include <linux/vs_dlimit.h>
 
 #include "ext2.h"
@@ -470,7 +470,7 @@ struct inode *ext2_new_inode(struct inode *dir, int mode)
                return ERR_PTR(-ENOMEM);
 
        if (sb->s_flags & MS_TAGXID)
-               inode->i_xid = current->xid;
+               inode->i_xid = vx_current_xid();
        else
                inode->i_xid = 0;
 
index ba3cc99..ffd30ed 100644 (file)
@@ -1191,7 +1191,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
        raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
 #endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
index 594c16c..96bfa89 100644 (file)
@@ -50,11 +50,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if (((oldflags & EXT2_IMMUTABLE_FL) ||
-                       ((flags ^ oldflags) &
-                        (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL)))
-                   && !capable(CAP_LINUX_IMMUTABLE)) {
-                       return -EPERM;          
+               if ((oldflags & EXT2_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
+                       EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) {
+                       if (!capable(CAP_LINUX_IMMUTABLE))
+                               return -EPERM;
                }
 
                flags = flags & EXT2_FL_USER_MODIFIABLE;
index 4c61667..bb62484 100644 (file)
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -81,6 +82,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
                inode = iget(dir->i_sb, ino);
                if (!inode)
                        return ERR_PTR(-EACCES);
+               vx_propagate_xid(nd, inode);
        }
        if (inode)
                return d_splice_alias(inode, dentry);
index 4839138..47fff3b 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/ext3_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
-#include <linux/vs_base.h>
 #include <linux/vs_dlimit.h>
 
 /*
index 8c6456a..b7d4e57 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/random.h>
 #include <linux/vs_dlimit.h>
 #include <linux/bitops.h>
+#include <linux/vs_dlimit.h>
 
 #include <asm/byteorder.h>
 
@@ -447,7 +448,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
                return ERR_PTR(-ENOMEM);
 
        if (sb->s_flags & MS_TAGXID)
-               inode->i_xid = current->xid;
+               inode->i_xid = vx_current_xid();
        else
                inode->i_xid = 0;
 
index fac1e98..2a45280 100644 (file)
@@ -2582,7 +2582,7 @@ static int ext3_do_update_inode(handle_t *handle,
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
        raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
 #endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
index a040edf..aaf679c 100644 (file)
@@ -60,11 +60,11 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if (((oldflags & EXT3_IMMUTABLE_FL) ||
-                       ((flags ^ oldflags) &
-                        (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL)))
-                   && !capable(CAP_LINUX_IMMUTABLE)) {
-                       return -EPERM;          
+               if ((oldflags & EXT3_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
+                       EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) {
+                       if (!capable(CAP_LINUX_IMMUTABLE))
+                               return -EPERM;
                }
 
                /*
@@ -156,38 +156,6 @@ flags_err:
                        remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
                        return ret;
                }
-#endif
-#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
-       case EXT3_IOC_SETXID: {
-               handle_t *handle;
-               struct ext3_iloc iloc;
-               int xid;
-               int err;
-
-               /* fixme: if stealth, return -ENOTTY */
-               if (!capable(CAP_CONTEXT))
-                       return -EPERM;
-               if (IS_RDONLY(inode))
-                       return -EROFS;
-               if (!(inode->i_sb->s_flags & MS_TAGXID))
-                       return -ENOSYS;
-               if (get_user(xid, (int *) arg))
-                       return -EFAULT;
-
-               handle = ext3_journal_start(inode, 1);
-               if (IS_ERR(handle))
-                       return PTR_ERR(handle);
-               err = ext3_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       return err;
-
-               inode->i_xid = (xid & 0xFFFF);
-               inode->i_ctime = CURRENT_TIME;
-
-               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-               ext3_journal_stop(handle);
-               return err;
-       }
 #endif
        case EXT3_IOC_GETRSVSZ:
                if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode)) {
@@ -256,6 +224,39 @@ flags_err:
                return err;
        }
 
+#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
+       case EXT3_IOC_SETXID: {
+               handle_t *handle;
+               struct ext3_iloc iloc;
+               int xid;
+               int err;
+
+               /* fixme: if stealth, return -ENOTTY */
+               if (!capable(CAP_CONTEXT))
+                       return -EPERM;
+               if (IS_RDONLY(inode))
+                       return -EROFS;
+               if (!(inode->i_sb->s_flags & MS_TAGXID))
+                       return -ENOSYS;
+               if (get_user(xid, (int *) arg))
+                       return -EFAULT;
+
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               err = ext3_reserve_inode_write(handle, inode, &iloc);
+               if (err)
+                       return err;
+
+               inode->i_xid = (xid & 0xFFFF);
+               inode->i_ctime = CURRENT_TIME;
+
+               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+               ext3_journal_stop(handle);
+               return err;
+       }
+#endif
+
        default:
                return -ENOTTY;
        }
index bfaf8a4..b0b8e10 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
+#include <linux/vserver/xid.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -989,6 +990,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 
                if (!inode)
                        return ERR_PTR(-EACCES);
+               vx_propagate_xid(nd, inode);
        }
        if (inode)
                return d_splice_alias(inode, dentry);
index 75c94a4..d68ac33 100644 (file)
@@ -88,7 +88,7 @@ static int old_max;
                        /* f->f_version: 0 */
                        INIT_LIST_HEAD(&f->f_list);
                        // set_vx_info(&f->f_vx_info, current->vx_info);
-                       f->f_xid = current->xid;
+                       f->f_xid = vx_current_xid();
                        vx_files_inc(f);
                        return f;
                }
index a93f58c..471010b 100644 (file)
@@ -118,7 +118,7 @@ static struct inode *alloc_inode(struct super_block *sb)
                inode->i_sb = sb;
                // inode->i_dqh = dqhget(sb->s_dqh);
 
-               /* important because of inode slab reuse */
+               /* essential because of inode slab reuse */
                inode->i_xid = 0;
                inode->i_blkbits = sb->s_blocksize_bits;
                inode->i_flags = 0;
index 6af7a74..19e902d 100644 (file)
@@ -174,19 +174,6 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
                                error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
                        break;
 #endif
-               case FIOC_SETIATTR:
-               case FIOC_GETIATTR:
-                       /*
-                        * Verify that this filp is a file object,
-                        * not (say) a socket.
-                        */
-                       error = -ENOTTY;
-                       if (S_ISREG(filp->f_dentry->d_inode->i_mode) ||
-                           S_ISDIR(filp->f_dentry->d_inode->i_mode))
-                               error = vc_iattr_ioctl(filp->f_dentry,
-                                                      cmd, arg);
-                       break;
-
                default:
                        error = -ENOTTY;
                        if (S_ISREG(filp->f_dentry->d_inode->i_mode))
index 9c483a6..efba306 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/vserver/xid.h>
-#include <linux/quotaops.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
index 6fb8c25..6e74636 100644 (file)
@@ -28,7 +28,9 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
-#include <linux/vs_base.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/debug.h>
 
 #include <asm/namei.h>
 #include <asm/uaccess.h>
@@ -230,6 +232,24 @@ int generic_permission(struct inode *inode, int mask,
        return -EACCES;
 }
 
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) {
+               vxwprintk(1, "xid=%d did hit the barrier.",
+                       vx_current_xid());
+               return -EACCES;
+       }
+       if (inode->i_xid == 0)
+               return 0;
+       if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+               return 0;
+
+       vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] Â»%s«.",
+               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+               vxd_path(nd->dentry, nd->mnt));
+       return -EACCES;
+}
+
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
        int retval;
@@ -243,6 +263,9 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
                (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
                return -EROFS;
 
+       if ((retval = xid_permission(inode, mask, nd)))
+               return retval;
+
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
        else
@@ -645,15 +668,33 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 {
        struct vfsmount *mnt = nd->mnt;
        struct dentry *dentry = __d_lookup(nd->dentry, name);
+       struct inode *inode;
 
        if (!dentry)
                goto need_lookup;
        if (dentry->d_op && dentry->d_op->d_revalidate)
                goto need_revalidate;
+       inode = dentry->d_inode;
+       if (!inode)
+               goto done;
+       if (!vx_check(inode->i_xid, VX_WATCH|VX_HOSTID|VX_IDENT))
+               goto hidden;
+       if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
+               struct proc_dir_entry *de = PDE(inode);
+
+               if (de && !vx_hide_check(0, de->vx_flags))
+                       goto hidden;
+       }
 done:
        path->mnt = mnt;
        path->dentry = dentry;
        return 0;
+hidden:
+       vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] Â»%s«.",
+               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+               vxd_path(dentry, mnt));
+       dput(dentry);
+       return -ENOENT;
 
 need_lookup:
        if (atomic)
index ed977eb..da22d93 100644 (file)
@@ -22,8 +22,8 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
-#include <linux/vs_base.h>
 #include <linux/vserver/namespace.h>
+#include <linux/vserver/xid.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -164,6 +164,7 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
                mnt->mnt_mountpoint = mnt->mnt_root;
                mnt->mnt_parent = mnt;
                mnt->mnt_namespace = old->mnt_namespace;
+               mnt->mnt_xid = old->mnt_xid;
 
                /* stick the duplicate mount on the same expiry list
                 * as the original if that was on one */
@@ -244,6 +245,11 @@ static int show_vfsmnt(struct seq_file *m, void *v)
        unsigned long s_flags = mnt->mnt_sb->s_flags;
        int mnt_flags = mnt->mnt_flags;
 
+       if (vx_flags(VXF_HIDE_MOUNT, 0))
+               return 0;
+       if (!vx_check_vfsmount(current->vx_info, mnt))
+               return 0;
+
        if (vx_flags(VXF_HIDE_MOUNT, 0))
                return 0;
        if (!vx_check_vfsmount(current->vx_info, mnt))
@@ -264,6 +270,8 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                                seq_puts(m, p->unset_str);
                }
        }
+       if (mnt->mnt_flags & MNT_XID)
+               seq_printf(m, ",xid=%d", mnt->mnt_xid);
        if (mnt->mnt_sb->s_op->show_options)
                err = mnt->mnt_sb->s_op->show_options(m, mnt);
        seq_puts(m, " 0 0\n");
@@ -349,8 +357,10 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill)
+static inline void __umount_list(struct list_head *kill)
 {
+       struct vfsmount *mnt;
+
        while (!list_empty(kill)) {
                mnt = list_entry(kill->next, struct vfsmount, mnt_list);
                list_del_init(&mnt->mnt_list);
@@ -377,7 +387,7 @@ void umount_tree(struct vfsmount *mnt)
                list_del(&p->mnt_list);
                list_add(&p->mnt_list, &kill);
        }
-       __umount_tree(mnt, &kill);
+       __umount_list(&kill);
 }
 
 void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
@@ -391,7 +401,7 @@ void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
                list_del(&p->mnt_list);
                list_add(&p->mnt_list, &kill);
        }
-       __umount_tree(mnt, &kill);
+       __umount_list(&kill);
 }
 
 static int do_umount(struct vfsmount *mnt, int flags)
@@ -650,7 +660,7 @@ out_unlock:
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
+static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, unsigned long flags, int mnt_flags)
 {
        struct nameidata old_nd;
        struct vfsmount *mnt = NULL;
@@ -681,6 +691,10 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags
                list_del_init(&mnt->mnt_fslink);
                spin_unlock(&vfsmount_lock);
 
+               if (flags & MS_XID) {
+                       mnt->mnt_xid = xid;
+                       mnt->mnt_flags |= MNT_XID;
+               }
                err = graft_tree(mnt, nd);
                if (err) {
                        spin_lock(&vfsmount_lock);
@@ -703,7 +717,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags
  */
 
 static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
-                     void *data)
+                     void *data, xid_t xid)
 {
        int err;
        struct super_block * sb = nd->mnt->mnt_sb;
@@ -721,8 +735,11 @@ static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
                mnt_flags |= MNT_NODEV;
        down_write(&sb->s_umount);
        err = do_remount_sb(sb, flags, data, 0);
-       if (!err)
+       if (!err) {
                nd->mnt->mnt_flags=mnt_flags;
+               if (flags & MS_XID)
+                       nd->mnt->mnt_xid = xid;
+       }
        up_write(&sb->s_umount);
        if (!err)
                security_sb_post_remount(nd->mnt, flags, data);
@@ -1048,6 +1065,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
        struct nameidata nd;
        int retval = 0;
        int mnt_flags = 0;
+       xid_t xid = 0;
 
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -1063,6 +1081,14 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
 
+       retval = vx_parse_xid(data_page, &xid, 1);
+       if (retval) {
+               mnt_flags |= MNT_XID;
+               /* bind and re-mounts get xid flag */
+               if (flags & (MS_BIND|MS_REMOUNT))
+                       flags |= MS_XID;
+       }
+
        /* Separate the per-mountpoint flags */
        if (flags & MS_RDONLY)
                mnt_flags |= MNT_RDONLY;
@@ -1092,9 +1118,10 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 
        if (flags & MS_REMOUNT)
                retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
+                                   data_page, xid);
        else if (flags & MS_BIND)
-               retval = do_loopback(&nd, dev_name, flags, mnt_flags);
+               retval = do_loopback(&nd, dev_name, xid, flags, mnt_flags);
+
        else if (flags & MS_MOVE)
                retval = do_move_mount(&nd, dev_name);
        else
index 34a3c1f..0547efd 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/vserver/xid.h>
 
 #include "delegation.h"
 
@@ -759,6 +760,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
        inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
        if (!inode)
                goto out_unlock;
+       vx_propagate_xid(nd, inode);
 no_entry:
        error = 0;
        d_add(dentry, inode);
index 60b3074..38318ce 100644 (file)
@@ -723,7 +723,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 out:
        return inode;
-/*
+/*     FIXME
 fail_dlim:
        make_bad_inode(inode);
        iput(inode);
index 39b0d45..f09f648 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -27,6 +27,9 @@
 #include <linux/vs_dlimit.h>
 #include <linux/vserver/xid.h>
 #include <linux/syscalls.h>
+#include <linux/vs_limit.h>
+#include <linux/vs_dlimit.h>
+#include <linux/vserver/xid.h>
 
 #include <asm/unistd.h>
 
index a29937c..909c8ab 100644 (file)
@@ -73,7 +73,6 @@
 #include <linux/highmem.h>
 #include <linux/file.h>
 #include <linux/times.h>
-#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
 #include <linux/vs_cvirt.h>
@@ -146,8 +145,8 @@ static inline const char * get_task_state(struct task_struct *tsk)
                                            TASK_INTERRUPTIBLE |
                                            TASK_UNINTERRUPTIBLE |
                                            TASK_STOPPED |
-                                           TASK_TRACED |
-                                           TASK_ONHOLD)) |
+                                          TASK_TRACED |
+                                          TASK_ONHOLD)) |
                        (tsk->exit_state & (EXIT_ZOMBIE |
                                            EXIT_DEAD));
        const char **p = &task_state_array[0];
@@ -163,12 +162,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 {
        struct group_info *group_info;
        int g;
-       pid_t pid, ppid, tppid, tgid;
+       pid_t pid, ptgid, tppid, tgid;
 
        read_lock(&tasklist_lock);
        tgid = vx_map_tgid(p->tgid);
        pid = vx_map_pid(p->pid);
-       ppid = vx_map_pid(p->real_parent->pid);
+       ptgid = vx_map_pid(p->group_leader->real_parent->tgid);
        tppid = vx_map_pid(p->parent->pid);
        buffer += sprintf(buffer,
                "State:\t%s\n"
@@ -181,8 +180,8 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
-               tgid, pid, (pid > 1) ? ppid : 0,
-               p->pid && p->ptrace ? tppid : 0,
+               tgid, pid, (pid > 1) ? ptgid : 0,
+               pid_alive(p) && p->ptrace ? tppid : 0,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
        read_unlock(&tasklist_lock);
@@ -418,10 +417,11 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        stime += task->signal->stime;
                }
        }
-       if (task_vx_flags(task, VXF_VIRT_UPTIME, 0)) {
-               bias_uptime = task->vx_info->cvirt.bias_uptime.tv_sec * NSEC_PER_SEC
-                       + task->vx_info->cvirt.bias_uptime.tv_nsec;
-       }
+       pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0);
+       ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info,
+               task->group_leader->real_parent->tgid);
+       pgid = vx_info_map_pid(task->vx_info, pgid);
+
        read_unlock(&tasklist_lock);
 
        if (!whole || num_threads<2) {
@@ -453,9 +453,21 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        /* convert timespec -> nsec*/
        start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
                                + task->start_time.tv_nsec;
+
        /* convert nsec -> ticks */
        start_time = nsec_to_clock_t(start_time - bias_uptime);
 
+       /* fixup start time for virt uptime */
+       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
+               unsigned long long bias =
+                       current->vx_info->cvirt.bias_clock;
+
+               if (start_time > bias)
+                       start_time -= bias;
+               else
+                       start_time = 0;
+       }
+
        res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
index 0a5916c..a4caaae 100644 (file)
@@ -1268,6 +1268,9 @@ static struct file_operations proc_tgid_attr_operations;
 static struct inode_operations proc_tgid_attr_inode_operations;
 #endif
 
+extern int proc_pid_vx_info(struct task_struct *, char *);
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
 /* SMP-safe */
 static struct dentry *proc_pident_lookup(struct inode *dir, 
                                         struct dentry *dentry,
@@ -1530,14 +1533,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
                              int buflen)
 {
        char tmp[30];
-       sprintf(tmp, "%d", vx_map_pid(current->tgid));
+       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
        return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        char tmp[30];
-       sprintf(tmp, "%d", vx_map_pid(current->tgid));
+       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
        return vfs_follow_link(nd,tmp);
 }      
 
index 97e6b98..f42a812 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/vs_base.h>
 #include <linux/vserver/inode.h>
 #include <linux/bitops.h>
+#include <linux/vserver/inode.h>
 #include <asm/uaccess.h>
 
 static ssize_t proc_file_read(struct file *file, char __user *buf,
@@ -388,7 +389,8 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 
                                error = -EINVAL;
                                inode = proc_get_inode(dir->i_sb, ino, de);
-                               inode->i_xid = vx_current_xid();
+                               /* generic proc entries belong to the host */
+                               inode->i_xid = 0;
                                break;
                        }
                }
index dbe1fa7..e042c20 100644 (file)
@@ -53,6 +53,8 @@
 #include <asm/tlb.h>
 #include <asm/div64.h>
 
+#include <linux/vs_cvirt.h>
+
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 /*
index 30e19a1..de207dc 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/reiserfs_xattr.h>
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
+#include <linux/vserver/xid.h>
 
 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
@@ -350,6 +351,7 @@ static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dent
            reiserfs_write_unlock(dir->i_sb);
            return ERR_PTR(-EACCES);
         }
+       vx_propagate_xid(nd, inode);
 
        /* Propogate the priv_object flag so we know we're in the priv tree */
        if (is_reiserfs_priv_object (dir))
index 47d461a..035abec 100644 (file)
@@ -39,6 +39,8 @@
 #include <linux/devpts_fs.h>
 #include <linux/proc_fs.h>
 #include <linux/kobject.h>
+#include <linux/devpts_fs.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 
index 57b6991..07a29a2 100644 (file)
@@ -11,8 +11,6 @@
 
 #include "sysfs.h"
 
-/* Random magic number */
-#define SYSFS_MAGIC 0x62656572
 
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
@@ -36,7 +34,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_blocksize = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-       sb->s_magic = SYSFS_MAGIC;
+       sb->s_magic = SYSFS_SUPER_MAGIC;
        sb->s_op = &sysfs_ops;
        sysfs_sb = sb;
 
index 17debc1..4c9a2fb 100644 (file)
@@ -1013,7 +1013,7 @@ xfs_ioc_fsgeometry(
 #define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
 #define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
 #define LINUX_XFLAG_BARRIER    0x00004000 /* chroot() barrier */
-#define LINUX_XFLAG_IUNLINK    0x00008000 /* Immutable unlink */
+#define LINUX_XFLAG_IUNLINK    0x00008000 /* immutable unlink */
 
 STATIC unsigned int
 xfs_merge_ioc_xflags(
@@ -1056,6 +1056,8 @@ xfs_di2lxflags(
                flags |= LINUX_XFLAG_IMMUTABLE;
        if (di_flags & XFS_DIFLAG_IUNLINK)
                flags |= LINUX_XFLAG_IUNLINK;
+       if (di_flags & XFS_DIFLAG_BARRIER)
+               flags |= LINUX_XFLAG_BARRIER;
        if (di_flags & XFS_DIFLAG_APPEND)
                flags |= LINUX_XFLAG_APPEND;
        if (di_flags & XFS_DIFLAG_SYNC)
index 1a46def..425dafd 100644 (file)
@@ -460,7 +460,7 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_PROJINHERIT_BIT  9  /* create with parents projid */
 #define XFS_DIFLAG_NOSYMLINKS_BIT  10  /* disallow symlink creation */
 #define XFS_DIFLAG_BARRIER_BIT  12     /* chroot() barrier */
-#define XFS_DIFLAG_IUNLINK_BIT  13     /* inode has iunlink */
+#define XFS_DIFLAG_IUNLINK_BIT  13     /* immutable unlink */
 
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
@@ -476,6 +476,7 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_BARRIER      (1 << XFS_DIFLAG_BARRIER_BIT)
 #define XFS_DIFLAG_IUNLINK      (1 << XFS_DIFLAG_IUNLINK_BIT)
 
+
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
index 8290ea7..94a596a 100644 (file)
@@ -80,7 +80,7 @@ struct fsxattr {
 #define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
 #define XFS_XFLAG_BARRIER      0x00004000      /* chroot() barrier */
-#define XFS_XFLAG_IUNLINK      0x00008000      /* Immutable unlink */
+#define XFS_XFLAG_IUNLINK      0x00008000      /* immutable unlink */
 #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
 
 /*
index 01e616e..209cda1 100644 (file)
@@ -4,7 +4,21 @@
 #include <linux/config.h>
 
 #ifdef __KERNEL__
-# define HZ            (CONFIG_X86_HZ)
+
+#if defined(CONFIG_X86_HZ) && defined(CONFIG_KERNEL_HZ)
+#error MEF: fix up CONFIG to only use one of these
+#endif
+
+#ifdef CONFIG_X86_HZ
+# define HZ            CONFIG_X86_HZ
+#else
+# ifdef CONFIG_KERNEL_HZ
+#  define HZ           CONFIG_KERNEL_HZ
+# else
+#  define HZ           1000            /* Internal kernel timer frequency */
+# endif
+#endif
+
 # define USER_HZ       100             /* .. some user interfaces are in "ticks" */
 # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
 #endif
index 80c2db1..9fe32c4 100644 (file)
 #define __NR_get_mempolicy     (__NR_Linux + 261)
 #define __NR_set_mempolicy     (__NR_Linux + 262)
 #define __NR_vserver           (__NR_Linux + 273)
-#define __NR_Linux_syscalls    274
+
 
 #define HPUX_GATEWAY_ADDR       0xC0000004
 #define LINUX_GATEWAY_ADDR      0x100
index d224b21..87eff2f 100644 (file)
@@ -86,7 +86,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
 
        if (rss < freed)
                freed = rss;
-       mm->rss = rss - freed;
+       // mm->rss = rss - freed;
+       vx_rsspages_sub(mm, freed);
 
        tlb_flush_mmu(mp);
 
index 5f82699..907c3c6 100644 (file)
@@ -30,7 +30,6 @@ static inline void devpts_pty_kill(int number) { }
 
 #endif
 
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-
+#define DEVPTS_SUPER_MAGIC     0x1cd1
 
 #endif /* _LINUX_DEVPTS_FS_H */
index c2bd10f..a985802 100644 (file)
@@ -320,7 +320,7 @@ struct ext2_inode {
 #define EXT2_MOUNT_NO_UID32            0x0200  /* Disable 32-bit UIDs */
 #define EXT2_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
 #define EXT2_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
-#define EXT2_MOUNT_TAG_XID             (1<<16) /* Enable Context Tags */
+#define EXT2_MOUNT_TAG_XID             (1<<24) /* Enable Context Tags */
 
 #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
 #define set_opt(o, opt)                        o |= EXT2_MOUNT_##opt
index d11f5d1..f2d1cd9 100644 (file)
@@ -196,6 +196,9 @@ struct ext3_group_desc
 #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
 #endif
+#ifdef CONFIG_VSERVER_LEGACY
+#define EXT3_IOC_SETXID                        FIOC_SETXIDJ
+#endif
 
 /*
  * Inode dynamic state flags
@@ -366,7 +369,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL           0x08000 /* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION         0x10000 /* Preallocation */
 #define EXT3_MOUNT_BARRIER             0x20000 /* Use block barriers */
-#define EXT3_MOUNT_TAG_XID             0x40000 /* Enable Context Tags */
+#define EXT3_MOUNT_TAG_XID             (1<<24) /* Enable Context Tags */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
index 667bf73..93a6a10 100644 (file)
@@ -126,6 +126,7 @@ extern int dir_notify_enable;
 #define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
 #define MS_ONE_SECOND  (1<<17) /* fs has 1 sec a/m/ctime resolution */
 #define MS_TAGXID      (1<<24) /* tag inodes with context information */
+#define MS_XID         (1<<25) /* use specific xid for this mount */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
@@ -152,8 +153,8 @@ extern int dir_notify_enable;
 #define S_DIRSYNC      64      /* Directory modifications are synchronous */
 #define S_NOCMTIME     128     /* Do not update file c/mtime */
 #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
-#define S_BARRIER      512     /* Barrier for chroot() */
-#define S_IUNLINK      1024    /* Immutable unlink */
+#define S_BARRIER      1024    /* Barrier for chroot() */
+#define S_IUNLINK      2048    /* Immutable unlink */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
index 8821af0..03c6f6d 100644 (file)
@@ -22,6 +22,7 @@
 #define MNT_RDONLY     8
 #define MNT_NOATIME    16
 #define MNT_NODIRATIME 32
+#define MNT_XID                256
 
 struct vfsmount
 {
@@ -39,6 +40,7 @@ struct vfsmount
        struct list_head mnt_list;
        struct list_head mnt_fslink;    /* link in fs-specific expiry list */
        struct namespace *mnt_namespace; /* containing namespace */
+       xid_t mnt_xid;                  /* xid tagging used for vfsmount */
 };
 
 #define        MNT_IS_RDONLY(m)        ((m) && ((m)->mnt_flags & MNT_RDONLY))
index 553a799..9446bc5 100644 (file)
@@ -458,6 +458,7 @@ enum reiserfs_mount_options {
     REISERFS_BARRIER_NONE,
     REISERFS_BARRIER_FLUSH,
     REISERFS_TAGXID,
+
     /* Actions on error */
     REISERFS_ERROR_PANIC,
     REISERFS_ERROR_RO,
index 96b615c..9cb07d1 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
+#include <linux/vs_base.h>
 
 struct exec_domain;
 extern int exec_shield;
@@ -949,15 +950,28 @@ static inline int sas_ss_flags(unsigned long sp)
 #ifdef CONFIG_SECURITY
 /* code is in security.c */
 extern int capable(int cap);
+extern int vx_capable(int cap, int ccap);
 #else
 static inline int capable(int cap)
 {
+       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
+               return 0;
        if (cap_raised(current->cap_effective, cap)) {
                current->flags |= PF_SUPERPRIV;
                return 1;
        }
        return 0;
 }
+
+static inline int vx_capable(int cap, int ccap)
+{
+       if (cap_raised(current->cap_effective, cap) &&
+               vx_ccaps(ccap)) {
+               current->flags |= PF_SUPERPRIV;
+               return 1;
+       }
+       return 0;
+}
 #endif
 
 
index a7f776e..cf93d31 100644 (file)
@@ -134,8 +134,8 @@ enum
        KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
        KERN_HZ_TIMER=65,       /* int: hz timer on or off */
        KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
-       KERN_SETUID_DUMPABLE=67, /* int: behaviour of dumps for setuid core */
-       KERN_VSHELPER=68,       /* string: path to vshelper policy agent */
+       KERN_VSHELPER=67,       /* string: path to vshelper policy agent */
+       KERN_SETUID_DUMPABLE=68, /* int: behaviour of dumps for setuid core */
        KERN_DUMP=69,           /* dir: dump parameters */
 };
 
index d12ee2b..acb39e2 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <asm/atomic.h>
 
+#define SYSFS_SUPER_MAGIC      0x62656572
+
 struct kobject;
 struct module;
 
index 4f04513..a1d34b6 100644 (file)
@@ -1,16 +1,8 @@
 #ifndef _VX_VS_BASE_H
 #define _VX_VS_BASE_H
 
-#include "vserver/context.h"
-
-// #define VX_DEBUG
 
-
-#if defined(VX_DEBUG)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
+#include "vserver/context.h"
 
 
 #define vx_task_xid(t) ((t)->xid)
@@ -26,7 +18,7 @@
  * check current context for ADMIN/WATCH and
  * optionally agains supplied argument
  */
-static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
 {
        if (mode & VX_ARG_MASK) {
                if ((mode & VX_IDENT) &&
@@ -43,36 +35,70 @@ static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
                        return 1;
        }
        return (((mode & VX_ADMIN) && (cid == 0)) ||
-               ((mode & VX_WATCH) && (cid == 1)));
+               ((mode & VX_WATCH) && (cid == 1)) ||
+               ((mode & VX_HOSTID) && (id == 0)));
 }
 
 
-#define __vx_flags(v,m,f)      (((v) & (m)) ^ (f))
+#define __vx_state(v)  ((v) ? ((v)->vx_state) : 0)
+
+#define vx_info_state(v,m)     (__vx_state(v) & (m))
+
+
+/* generic flag merging */
+
+#define vx_check_flags(v,m,f)  (((v) & (m)) ^ (f))
+
+#define vx_mask_flags(v,f,m)   (((v) & ~(m)) | ((f) & (m)))
+
+#define vx_mask_mask(v,f,m)    (((v) & ~(m)) | ((v) & (f) & (m)))
+
+#define vx_check_bit(v,n)      ((v) & (1LL << (n)))
+
 
-#define        __vx_task_flags(t,m,f) \
-       (((t) && ((t)->vx_info)) ? \
-               __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+/* context flags */
 
-#define vx_current_flags() \
-       ((current->vx_info) ? current->vx_info->vx_flags : 0)
+#define __vx_flags(v)  ((v) ? (v)->vx_flags : 0)
 
-#define vx_flags(m,f)  __vx_flags(vx_current_flags(),(m),(f))
+#define vx_current_flags()     __vx_flags(current->vx_info)
 
+#define vx_info_flags(v,m,f) \
+       vx_check_flags(__vx_flags(v),(m),(f))
 
-#define vx_current_ccaps() \
-       ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+#define task_vx_flags(t,m,f) \
+       ((t) && vx_info_flags((t)->vx_info, (m), (f)))
+
+#define vx_flags(m,f)  vx_info_flags(current->vx_info,(m),(f))
+
+
+/* context caps */
+
+#define __vx_ccaps(v)  ((v) ? (v)->vx_ccaps : 0)
+
+#define vx_current_ccaps()     __vx_ccaps(current->vx_info)
+
+#define vx_info_ccaps(v,c)     (__vx_ccaps(v) & (c))
+
+#define vx_ccaps(c)    vx_info_ccaps(current->vx_info,(c))
+
+
+#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
+
+#define vx_info_mcaps(v,c)     (__vx_mcaps(v) & (c))
+
+#define vx_mcaps(c)    vx_info_mcaps(current->vx_info,(c))
 
-#define vx_ccaps(c)    (vx_current_ccaps() & (c))
 
 #define vx_current_bcaps() \
        (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
        current->vx_info->vx_bcaps : cap_bset)
 
 
-/* generic flag merging */
-
-#define        vx_mask_flags(v,f,m)    (((v) & ~(m)) | ((f) & (m)))
+#define vx_current_initpid(n) \
+       (current->vx_info && \
+       (current->vx_info->vx_initpid == (n)))
 
-#define        vx_mask_mask(v,f,m)     (((v) & ~(m)) | ((v) & (f) & (m)))
 
+#else
+#warning duplicate inclusion
 #endif
index 9d119cd..cc41014 100644 (file)
@@ -3,16 +3,9 @@
 
 
 #include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
 #include "vserver/debug.h"
 
 
-extern int proc_pid_vx_info(struct task_struct *, char *);
-
-
 #define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
 
 static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
@@ -20,25 +13,28 @@ static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
 {
        if (!vxi)
                return NULL;
+
        vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
                vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
+       vxh_get_vx_info(vxi);
+
        atomic_inc(&vxi->vx_usecnt);
        return vxi;
 }
 
-
-extern void free_vx_info(struct vx_info *);
-
 #define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
 
 static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
 {
        if (!vxi)
                return;
+
        vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
                vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
+       vxh_put_vx_info(vxi);
+
        if (atomic_dec_and_test(&vxi->vx_usecnt))
                free_vx_info(vxi);
 }
@@ -58,6 +54,7 @@ static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
                vxi?atomic_read(&vxi->vx_usecnt):0,
                vxi?atomic_read(&vxi->vx_refcnt):0,
                _file, _line);
+       vxh_set_vx_info(vxi, vxp);
 
        atomic_inc(&vxi->vx_refcnt);
        vxo = xchg(vxp, __get_vx_info(vxi, _file, _line));
@@ -80,6 +77,7 @@ static inline void __clr_vx_info(struct vx_info **vxp,
                vxo?atomic_read(&vxo->vx_usecnt):0,
                vxo?atomic_read(&vxo->vx_refcnt):0,
                _file, _line);
+       vxh_clr_vx_info(vxo, vxp);
 
        if (atomic_dec_and_test(&vxo->vx_refcnt))
                unhash_vx_info(vxo);
@@ -87,7 +85,7 @@ static inline void __clr_vx_info(struct vx_info **vxp,
 }
 
 
-#define task_get_vx_info(i)    __task_get_vx_info(i,__FILE__,__LINE__)
+#define task_get_vx_info(p)    __task_get_vx_info(p,__FILE__,__LINE__)
 
 static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
        const char *_file, int _line)
index 65f4303..64b38c2 100644 (file)
@@ -2,16 +2,8 @@
 #define _VX_VS_CVIRT_H
 
 
-// #define VX_DEBUG
-
 #include "vserver/cvirt.h"
-#include "vs_base.h"
-
-#if defined(VX_DEBUG)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
+#include "vserver/debug.h"
 
 
 /* utsname virtualization */
@@ -29,42 +21,88 @@ static inline struct new_utsname *vx_new_utsname(void)
 /* pid faking stuff */
 
 
-#define vx_map_tgid(v,p) \
-       __vx_map_tgid((v), (p), __FILE__, __LINE__)
+#define vx_info_map_pid(v,p) \
+       __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__)
+#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
+#define vx_map_pid(p)  vx_info_map_pid(current->vx_info, p)
+#define vx_map_tgid(p) vx_map_pid(p)
 
-static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
-       char *file, int line)
+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
+       const char *func, const char *file, int line)
 {
-       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
-               vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
-                       vxi, vxi->vx_flags, pid,
-                       (pid == vxi->vx_initpid)?1:pid,
-                       file, line);
+       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
+               vxfprintk(VXD_CBIT(cvirt, 2),
+                       "vx_map_tgid: %p/%llx: %d -> %d",
+                       vxi, (long long)vxi->vx_flags, pid,
+                       (pid && pid == vxi->vx_initpid)?1:pid,
+                       func, file, line);
+               if (pid == 0)
+                       return 0;
                if (pid == vxi->vx_initpid)
                        return 1;
        }
        return pid;
 }
 
-#define vx_rmap_tgid(v,p) \
-       __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+#define vx_info_rmap_pid(v,p) \
+       __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__)
+#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p)
+#define vx_rmap_tgid(p) vx_rmap_pid(p)
 
-static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
-       char *file, int line)
+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
+       const char *func, const char *file, int line)
 {
-       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
-               vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
-                       vxi, vxi->vx_flags, pid,
+       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
+               vxfprintk(VXD_CBIT(cvirt, 2),
+                       "vx_rmap_tgid: %p/%llx: %d -> %d",
+                       vxi, (long long)vxi->vx_flags, pid,
                        (pid == 1)?vxi->vx_initpid:pid,
-                       file, line);
+                       func, file, line);
                if ((pid == 1) && vxi->vx_initpid)
                        return vxi->vx_initpid;
+               if (pid == vxi->vx_initpid)
+                       return ~0U;
        }
        return pid;
 }
 
-#undef vxdprintk
-#define vxdprintk(x...)
+
+static inline void vx_activate_task(struct task_struct *p)
+{
+       struct vx_info *vxi;
+
+       if ((vxi = p->vx_info)) {
+               vx_update_load(vxi);
+               atomic_inc(&vxi->cvirt.nr_running);
+       }
+}
+
+static inline void vx_deactivate_task(struct task_struct *p)
+{
+       struct vx_info *vxi;
+
+       if ((vxi = p->vx_info)) {
+               vx_update_load(vxi);
+               atomic_dec(&vxi->cvirt.nr_running);
+       }
+}
+
+static inline void vx_uninterruptible_inc(struct task_struct *p)
+{
+       struct vx_info *vxi;
+
+       if ((vxi = p->vx_info))
+               atomic_inc(&vxi->cvirt.nr_uninterruptible);
+}
+
+static inline void vx_uninterruptible_dec(struct task_struct *p)
+{
+       struct vx_info *vxi;
+
+       if ((vxi = p->vx_info))
+               atomic_dec(&vxi->cvirt.nr_uninterruptible);
+}
+
 
 #else
 #warning duplicate inclusion
index 805c257..b927687 100644 (file)
@@ -1,11 +1,7 @@
 #ifndef _VX_VS_DLIMIT_H
 #define _VX_VS_DLIMIT_H
 
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
 
-#include "vserver/context.h"
 #include "vserver/dlimit.h"
 #include "vserver/debug.h"
 
@@ -112,7 +108,7 @@ static inline int __dl_alloc_inode(struct super_block *sb,
                dli->dl_inodes_used++;
 #if 0
        else
-               printk("VSW: DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d\n",
+               vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d",
                        sb, xid,
                        dli->dl_inodes_used, dli->dl_inodes_total,
                        file, line);
index 82e8de4..561df5a 100644 (file)
@@ -2,97 +2,78 @@
 #define _VX_VS_LIMIT_H
 
 
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
 #include "vserver/limit.h"
+#include "vserver/debug.h"
 
 
 /* file limits */
 
-#define VX_DEBUG_ACC_FILE      0
-#define VX_DEBUG_ACC_OPENFD    0
-
-#if    (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
-
-
-#define vx_acc_cres(v,d,r) \
-       __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
 
 static inline void __vx_acc_cres(struct vx_info *vxi,
-       int res, int dir, char *file, int line)
+       int res, int dir, void *_data, char *_file, int _line)
 {
-        if (vxi) {
-       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
-                       (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
-       printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
-                        (vxi?vxi->vx_id:-1), res,
-                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
-                       (dir>0)?"++":"--", file, line);
-                if (dir > 0)
-                        atomic_inc(&vxi->limit.rcur[res]);
-                else
-                        atomic_dec(&vxi->limit.rcur[res]);
-        }
+       if (VXD_RLIMIT(res, RLIMIT_NOFILE) ||
+               VXD_RLIMIT(res, RLIMIT_NPROC) ||
+               VXD_RLIMIT(res, VLIMIT_NSOCK))
+               vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5d%s (%p)",
+                       (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+                       (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       (dir>0)?"++":"--", _data, _file, _line);
+       if (vxi) {
+               if (dir > 0)
+                       atomic_inc(&vxi->limit.rcur[res]);
+               else
+                       atomic_dec(&vxi->limit.rcur[res]);
+       }
 }
 
-#define vx_nproc_inc(p)        vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC)
-#define vx_nproc_dec(p)        vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC)
+#define vx_acc_cres(v,d,p,r) \
+       __vx_acc_cres((v), (r), (d), (p), __FILE__, __LINE__)
+
+#define vx_acc_cres_cond(x,d,p,r) \
+       __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0,\
+       (r), (d), (p), __FILE__, __LINE__)
+
+#define vx_nproc_inc(p) \
+       vx_acc_cres((p)->vx_info, 1, (p), RLIMIT_NPROC)
 
-#define vx_files_inc(f)        vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
-#define vx_files_dec(f)        vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+#define vx_nproc_dec(p) \
+       vx_acc_cres((p)->vx_info,-1, (p), RLIMIT_NPROC)
 
-#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
-#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+#define vx_files_inc(f) \
+       vx_acc_cres_cond((f)->f_xid, 1, (f), RLIMIT_NOFILE)
 
-/*
-#define vx_openfd_inc(f) do {                                  \
-       vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD);        \
-       printk("vx_openfd_inc: %d[#%d] in %s:%d\n",             \
-               f, current->xid, __FILE__, __LINE__);           \
-       } while (0)
+#define vx_files_dec(f) \
+       vx_acc_cres_cond((f)->f_xid,-1, (f), RLIMIT_NOFILE)
 
-#define vx_openfd_dec(f) do {                                  \
-       vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD);        \
-       printk("vx_openfd_dec: %d[#%d] in %s:%d\n",             \
-               f, current->xid, __FILE__, __LINE__);           \
-       } while (0)
-*/
 
 #define vx_cres_avail(v,n,r) \
-        __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+       __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
 
 static inline int __vx_cres_avail(struct vx_info *vxi,
-                int res, int num, char *file, int line)
+               int res, int num, char *_file, int _line)
 {
        unsigned long value;
 
-       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
-               (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
-                printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
-                        (vxi?vxi->vx_id:-1), res,
+       if (VXD_RLIMIT(res, RLIMIT_NOFILE) ||
+               VXD_RLIMIT(res, RLIMIT_NPROC) ||
+               VXD_RLIMIT(res, VLIMIT_NSOCK))
+               vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d",
+                       (vxi?vxi->vx_id:-1), vlimit_name[res], res,
                        (vxi?vxi->limit.rlim[res]:1),
-                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
-                       num, file, line);
-        if (!vxi)
-                return 1;
-       value = atomic_read(&vxi->limit.rcur[res]);     
+                       (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       num, _file, _line);
+       if (!vxi)
+               return 1;
+       value = atomic_read(&vxi->limit.rcur[res]);
        if (value > vxi->limit.rmax[res])
                vxi->limit.rmax[res] = value;
-        if (vxi->limit.rlim[res] == RLIM_INFINITY)
-                return 1;
-        if (value + num <= vxi->limit.rlim[res])
-                return 1;
+       if (vxi->limit.rlim[res] == RLIM_INFINITY)
+               return 1;
+       if (value + num <= vxi->limit.rlim[res])
+               return 1;
        atomic_inc(&vxi->limit.lhit[res]);
-        return 0;
+       return 0;
 }
 
 #define vx_nproc_avail(n) \
@@ -101,18 +82,16 @@ static inline int __vx_cres_avail(struct vx_info *vxi,
 #define vx_files_avail(n) \
        vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
 
-#define vx_openfd_avail(n) \
-       vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
-
 
 /* socket limits */
 
-#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
-#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+#define vx_sock_inc(s) \
+       vx_acc_cres((s)->sk_vx_info, 1, (s), VLIMIT_NSOCK)
+#define vx_sock_dec(s) \
+       vx_acc_cres((s)->sk_vx_info,-1, (s), VLIMIT_NSOCK)
 
 #define vx_sock_avail(n) \
-       vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
-
+       vx_cres_avail(current->vx_info, (n), VLIMIT_NSOCK)
 
 #else
 #warning duplicate inclusion
index 2fe9c08..2509432 100644 (file)
@@ -2,44 +2,35 @@
 #define _VX_VS_MEMORY_H
 
 
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
 #include "vserver/limit.h"
+#include "vserver/debug.h"
 
 
-#define VX_DEBUG_ACC_RSS   0
-#define VX_DEBUG_ACC_VM    0
-#define VX_DEBUG_ACC_VML   0
-
-#if    (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
-
 #define vx_acc_page(m, d, v, r) \
        __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
 
 static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
-                int res, int dir, char *file, int line)
+               int res, int dir, char *file, int line)
 {
-        if (v) {
-                if (dir > 0)
-                        ++(*v);
-                else
-                        --(*v);
-        }
-        if (vxi) {
-                if (dir > 0)
-                        atomic_inc(&vxi->limit.rcur[res]);
-                else
-                        atomic_dec(&vxi->limit.rcur[res]);
-        }
+       if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+               VXD_RLIMIT(res, RLIMIT_AS) ||
+               VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+               vxlprintk(1, "vx_acc_page[%5d,%s,%2d]: %5d%s",
+                       (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+                       (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       (dir?"++":"--"), file, line);
+       if (v) {
+               if (dir > 0)
+                       ++(*v);
+               else
+                       --(*v);
+       }
+       if (vxi) {
+               if (dir > 0)
+                       atomic_inc(&vxi->limit.rcur[res]);
+               else
+                       atomic_dec(&vxi->limit.rcur[res]);
+       }
 }
 
 
@@ -47,85 +38,85 @@ static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
        __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
 
 static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
-                int res, int pages, char *file, int line)
+               int res, int pages, char *_file, int _line)
 {
-        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
-               (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
-               (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
-               vxdprintk("vx_acc_pages  [%5d,%2d]: %5d += %5d in %s:%d\n",
-                       (vxi?vxi->vx_id:-1), res,
-                       (vxi?atomic_read(&vxi->limit.res[res]):0),
-                       pages, file, line);
-        if (pages == 0)
-                return;
-        if (v)
-                *v += pages;
-        if (vxi)
-                atomic_add(pages, &vxi->limit.rcur[res]);
+       if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+               VXD_RLIMIT(res, RLIMIT_AS) ||
+               VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+               vxlprintk(1, "vx_acc_pages[%5d,%s,%2d]: %5d += %5d",
+                       (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+                       (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       pages, _file, _line);
+       if (pages == 0)
+               return;
+       if (v)
+               *v += pages;
+       if (vxi)
+               atomic_add(pages, &vxi->limit.rcur[res]);
 }
 
 
 
-#define vx_acc_vmpage(m,d)     vx_acc_page(m, d, total_vm,  RLIMIT_AS)
-#define vx_acc_vmlpage(m,d)    vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
-#define vx_acc_rsspage(m,d)    vx_acc_page(m, d, rss,      RLIMIT_RSS)
+#define vx_acc_vmpage(m,d)     vx_acc_page(m, d, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpage(m,d)    vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d)    vx_acc_page(m, d, rss,       RLIMIT_RSS)
 
-#define vx_acc_vmpages(m,p)    vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
-#define vx_acc_vmlpages(m,p)   vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
-#define vx_acc_rsspages(m,p)   vx_acc_pages(m, p, rss,       RLIMIT_RSS)
+#define vx_acc_vmpages(m,p)    vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpages(m,p)   vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p)   vx_acc_pages(m, p, rss,       RLIMIT_RSS)
 
-#define vx_pages_add(s,r,p)    __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
-#define vx_pages_sub(s,r,p)    __vx_pages_add(s, r, -(p))
+#define vx_pages_add(s,r,p)    __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p)    vx_pages_add(s, r, -(p))
 
-#define vx_vmpages_inc(m)      vx_acc_vmpage(m, 1)
-#define vx_vmpages_dec(m)      vx_acc_vmpage(m,-1)
-#define vx_vmpages_add(m,p)    vx_acc_vmpages(m, p)
-#define vx_vmpages_sub(m,p)    vx_acc_vmpages(m,-(p))
+#define vx_vmpages_inc(m)      vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m)      vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p)    vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p)    vx_acc_vmpages(m,-(p))
 
-#define vx_vmlocked_inc(m)     vx_acc_vmlpage(m, 1)
-#define vx_vmlocked_dec(m)     vx_acc_vmlpage(m,-1)
-#define vx_vmlocked_add(m,p)   vx_acc_vmlpages(m, p)
-#define vx_vmlocked_sub(m,p)   vx_acc_vmlpages(m,-(p))
+#define vx_vmlocked_inc(m)     vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m)     vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p)   vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p)   vx_acc_vmlpages(m,-(p))
 
-#define vx_rsspages_inc(m)     vx_acc_rsspage(m, 1)
-#define vx_rsspages_dec(m)     vx_acc_rsspage(m,-1)
-#define vx_rsspages_add(m,p)   vx_acc_rsspages(m, p)
-#define vx_rsspages_sub(m,p)   vx_acc_rsspages(m,-(p))
+#define vx_rsspages_inc(m)     vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m)     vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p)   vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p)   vx_acc_rsspages(m,-(p))
 
 
 
 #define vx_pages_avail(m, p, r) \
-        __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+       __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
 
 static inline int __vx_pages_avail(struct vx_info *vxi,
-                int res, int pages, char *file, int line)
+               int res, int pages, char *_file, int _line)
 {
        unsigned long value;
 
-        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
-                (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
-                (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
-                printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
-                        (vxi?vxi->vx_id:-1), res,
+       if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+               VXD_RLIMIT(res, RLIMIT_AS) ||
+               VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+               vxlprintk(1, "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d",
+                       (vxi?vxi->vx_id:-1), vlimit_name[res], res,
                        (vxi?vxi->limit.rlim[res]:1),
-                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
-                       pages, file, line);
-        if (!vxi)
-                return 1;
-       value = atomic_read(&vxi->limit.rcur[res]);     
+                       (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+                       pages, _file, _line);
+       if (!vxi)
+               return 1;
+       value = atomic_read(&vxi->limit.rcur[res]);
        if (value > vxi->limit.rmax[res])
                vxi->limit.rmax[res] = value;
-        if (vxi->limit.rlim[res] == RLIM_INFINITY)
-                return 1;
-        if (value + pages <= vxi->limit.rlim[res])
-                return 1;
+       if (vxi->limit.rlim[res] == RLIM_INFINITY)
+               return 1;
+       if (value + pages <= vxi->limit.rlim[res])
+               return 1;
        atomic_inc(&vxi->limit.lhit[res]);
-        return 0;
+       return 0;
 }
 
-#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
-#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
-#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
 
 #else
 #warning duplicate inclusion
index 4bbf923..9461b86 100644 (file)
@@ -1,17 +1,11 @@
 #ifndef _NX_VS_NETWORK_H
 #define _NX_VS_NETWORK_H
 
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
 
 #include "vserver/network.h"
 #include "vserver/debug.h"
 
 
-extern int proc_pid_nx_info(struct task_struct *, char *);
-
-
 #define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
 
 static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
@@ -26,10 +20,6 @@ static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
        return nxi;
 }
 
-
-#define free_nx_info(i) \
-       call_rcu(&i->nx_rcu, rcu_free_nx_info);
-
 #define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
 
 static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
diff --git a/include/linux/vs_sched.h b/include/linux/vs_sched.h
new file mode 100644 (file)
index 0000000..0eb1ee6
--- /dev/null
@@ -0,0 +1,73 @@
+#ifndef _VX_VS_SCHED_H
+#define _VX_VS_SCHED_H
+
+
+#include "vserver/sched.h"
+
+
+#define VAVAVOOM_RATIO          50
+
+#define MAX_PRIO_BIAS           20
+#define MIN_PRIO_BIAS          -20
+
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+       return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+       atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+       struct vx_info *vxi = p->vx_info;
+#endif
+       int slice = --p->time_slice;
+
+#ifdef CONFIG_VSERVER_HARDCPU
+       if (vxi) {
+               int tokens;
+
+               if ((tokens = vx_tokens_avail(vxi)) > 0)
+                       vx_consume_token(vxi);
+               /* for tokens > 0, one token was consumed */
+               if (tokens < 2)
+                       return 1;
+       }
+#endif
+       return (slice == 0);
+}
+
+
+static inline void vx_onhold_inc(struct vx_info *vxi)
+{
+       int onhold = atomic_read(&vxi->cvirt.nr_onhold);
+
+       atomic_inc(&vxi->cvirt.nr_onhold);
+       if (!onhold)
+               vxi->cvirt.onhold_last = jiffies;
+}
+
+static inline void __vx_onhold_update(struct vx_info *vxi)
+{
+       int cpu = smp_processor_id();
+       uint32_t now = jiffies;
+       uint32_t delta = now - vxi->cvirt.onhold_last;
+
+       vxi->cvirt.onhold_last = now;
+       vxi->sched.cpu[cpu].hold_ticks += delta;
+}
+
+static inline void vx_onhold_dec(struct vx_info *vxi)
+{
+       if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
+               __vx_onhold_update(vxi);
+}
+
+#else
+#warning duplicate inclusion
+#endif
index 4992458..d5505c5 100644 (file)
@@ -1,15 +1,8 @@
-#ifndef _VX_VS_LIMIT_H
-#define _VX_VS_LIMIT_H
+#ifndef _VX_VS_SOCKET_H
+#define _VX_VS_SOCKET_H
 
 
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
-#include "vserver/network.h"
+#include "vserver/debug.h"
 
 
 /* socket accounting */
@@ -33,12 +26,12 @@ static inline int vx_sock_type(int family)
 static inline void __vx_acc_sock(struct vx_info *vxi,
        int family, int pos, int size, char *file, int line)
 {
-        if (vxi) {
+       if (vxi) {
                int type = vx_sock_type(family);
 
                atomic_inc(&vxi->cacct.sock[type][pos].count);
                atomic_add(size, &vxi->cacct.sock[type][pos].total);
-        }
+       }
 }
 
 #define vx_sock_recv(sk,s) \
@@ -49,12 +42,12 @@ static inline void __vx_acc_sock(struct vx_info *vxi,
        vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
 
 
-#define        sock_vx_init(s)  do {           \
+#define sock_vx_init(s) do {           \
        (s)->sk_xid = 0;                \
        (s)->sk_vx_info = NULL;         \
        } while (0)
 
-#define        sock_nx_init(s)  do {           \
+#define sock_nx_init(s) do {           \
        (s)->sk_nid = 0;                \
        (s)->sk_nx_info = NULL;         \
        } while (0)
diff --git a/include/linux/vserver.h b/include/linux/vserver.h
deleted file mode 100644 (file)
index 2c39ebb..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _LINUX_VSERVER_H
-#define _LINUX_VSERVER_H
-
-#include <linux/vserver/context.h>
-#include <linux/vserver/network.h>
-#include <linux/vinline.h>
-#include <linux/ninline.h>
-
-#endif
index 4061e7b..1fe76e7 100644 (file)
@@ -3,22 +3,75 @@
 
 #include <linux/types.h>
 
+
 #define MAX_S_CONTEXT  65535   /* Arbitrary limit */
 #define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
 
 #define VX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
 
+/* context flags */
+
+#define VXF_INFO_LOCK          0x00000001
+#define VXF_INFO_SCHED         0x00000002
+#define VXF_INFO_NPROC         0x00000004
+#define VXF_INFO_PRIVATE       0x00000008
+
+#define VXF_INFO_INIT          0x00000010
+#define VXF_INFO_HIDE          0x00000020
+#define VXF_INFO_ULIMIT                0x00000040
+#define VXF_INFO_NSPACE                0x00000080
+
+#define VXF_SCHED_HARD         0x00000100
+#define VXF_SCHED_PRIO         0x00000200
+#define VXF_SCHED_PAUSE                0x00000400
+
+#define VXF_VIRT_MEM           0x00010000
+#define VXF_VIRT_UPTIME                0x00020000
+#define VXF_VIRT_CPU           0x00040000
+#define VXF_VIRT_LOAD          0x00080000
+
+#define VXF_HIDE_MOUNT         0x01000000
+#define VXF_HIDE_NETIF         0x02000000
+
+#define VXF_STATE_SETUP                (1ULL<<32)
+#define VXF_STATE_INIT         (1ULL<<33)
+
+#define VXF_FORK_RSS           (1ULL<<48)
+#define VXF_PROLIFIC           (1ULL<<49)
+
+#define VXF_IGNEG_NICE         (1ULL<<52)
+
+#define VXF_ONE_TIME           (0x0003ULL<<32)
+
+
+/* context caps */
+
+#define        VXC_CAP_MASK            0x00000000
+
+#define VXC_SET_UTSNAME                0x00000001
+#define VXC_SET_RLIMIT         0x00000002
+
+#define VXC_RAW_ICMP           0x00000100
+
+#define VXC_SECURE_MOUNT       0x00010000
+#define VXC_SECURE_REMOUNT     0x00020000
+
+
+/* vshelper sync commands */
+
+#define        VS_CONTEXT_CREATED      1
+#define        VS_CONTEXT_DESTROY      2
+
+
 #ifdef __KERNEL__
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 
-#define _VX_INFO_DEF_
-#include "cvirt.h"
-#include "limit.h"
-#include "sched.h"
-#undef _VX_INFO_DEF_
+#include "limit_def.h"
+#include "sched_def.h"
+#include "cvirt_def.h"
 
 struct vx_info {
        struct hlist_node vx_hlist;             /* linked list of contexts */
@@ -48,6 +101,7 @@ struct vx_info {
        char vx_name[65];                       /* vserver name */
 };
 
+
 /* status flags */
 
 #define VXS_HASHED     0x0001
@@ -61,7 +115,8 @@ struct vx_info {
 
 #define VX_ADMIN       0x0001
 #define VX_WATCH       0x0002
-#define VX_DUMMY       0x0008
+#define VX_HIDE                0x0004
+#define VX_HOSTID      0x0008
 
 #define VX_IDENT       0x0010
 #define VX_EQUIV       0x0020
@@ -78,9 +133,10 @@ struct vx_info {
 
 struct rcu_head;
 
-// extern void rcu_free_vx_info(struct rcu_head *);
 extern void unhash_vx_info(struct vx_info *);
 
+extern void free_vx_info(struct vx_info *);
+
 extern struct vx_info *locate_vx_info(int);
 extern struct vx_info *locate_or_create_vx_info(int);
 
@@ -89,111 +145,11 @@ extern int vx_info_is_hashed(xid_t);
 
 extern int vx_migrate_task(struct task_struct *, struct vx_info *);
 
-#endif /* __KERNEL__ */
-
-#include "switch.h"
-
-/* vinfo commands */
-
-#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
-#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_xid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
-#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
-
-struct vcmd_vx_info_v0 {
-       uint32_t xid;
-       uint32_t initpid;
-       /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_vx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_ctx_create                VC_CMD(VPROC, 1, 0)
-#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 0)
-
-#ifdef __KERNEL__
-extern int vc_ctx_create(uint32_t, void __user *);
-extern int vc_ctx_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
-#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
-
-struct vcmd_ctx_flags_v0 {
-       uint64_t flagword;
-       uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_cflags(uint32_t, void __user *);
-extern int vc_set_cflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VXF_INFO_LOCK          0x00000001
-#define VXF_INFO_SCHED         0x00000002
-#define VXF_INFO_NPROC         0x00000004
-#define VXF_INFO_PRIVATE       0x00000008
-
-#define VXF_INFO_INIT          0x00000010
-#define VXF_INFO_HIDE          0x00000020
-#define VXF_INFO_ULIMIT                0x00000040
-#define VXF_INFO_NSPACE                0x00000080
-
-#define VXF_SCHED_HARD         0x00000100
-#define VXF_SCHED_PRIO         0x00000200
-#define VXF_SCHED_PAUSE                0x00000400
-
-#define VXF_VIRT_MEM           0x00010000
-#define VXF_VIRT_UPTIME                0x00020000
-#define VXF_VIRT_CPU           0x00040000
-#define VXF_VIRT_LOAD          0x00080000
-
-#define VXF_HIDE_MOUNT         0x01000000
-#define VXF_HIDE_NETIF         0x02000000
-
-#define VXF_STATE_SETUP                (1ULL<<32)
-#define VXF_STATE_INIT         (1ULL<<33)
-
-#define VXF_FORK_RSS           (1ULL<<48)
-#define VXF_PROLIFIC           (1ULL<<49)
-
-#define VXF_IGNEG_NICE         (1ULL<<52)
+// extern int proc_pid_vx_info(struct task_struct *, char *);
 
-#define VXF_ONE_TIME           (0x0003ULL<<32)
-
-#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 0)
-#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 0)
-
-struct vcmd_ctx_caps_v0 {
-       uint64_t bcaps;
-       uint64_t ccaps;
-       uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ccaps(uint32_t, void __user *);
-extern int vc_set_ccaps(uint32_t, void __user *);
+extern long vs_context_state(unsigned int);
 
 #endif /* __KERNEL__ */
-
-#define VXC_SET_UTSNAME                0x00000001
-#define VXC_SET_RLIMIT         0x00000002
-
-#define VXC_RAW_ICMP           0x00000100
-
-#define VXC_SECURE_MOUNT       0x00010000
-#define VXC_SECURE_REMOUNT     0x00020000
-
-
+#else  /* _VX_CONTEXT_H */
+#warning duplicate inclusion
 #endif /* _VX_CONTEXT_H */
diff --git a/include/linux/vserver/context_cmd.h b/include/linux/vserver/context_cmd.h
new file mode 100644 (file)
index 0000000..637a0d8
--- /dev/null
@@ -0,0 +1,73 @@
+#ifndef _VX_CONTEXT_CMD_H
+#define _VX_CONTEXT_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
+
+struct vcmd_vx_info_v0 {
+       uint32_t xid;
+       uint32_t initpid;
+       /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context commands */
+
+#define VCMD_ctx_create                VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* flag commands */
+
+#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
+
+struct vcmd_ctx_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context caps commands */
+
+#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 0)
+
+struct vcmd_ctx_caps_v0 {
+       uint64_t bcaps;
+       uint64_t ccaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
index ba3a253..31c47a7 100644 (file)
-#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <linux/utsname.h>
-#include <linux/rwsem.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-#include <asm/atomic.h>
-
-/* context sub struct */
-
-struct _vx_cvirt {
-       int max_threads;
-
-       unsigned int bias_cswtch;
-       struct timespec bias_idle;
-       struct timespec bias_tp;
-       uint64_t bias_jiffies;
-
-       struct new_utsname utsname;
-};
-
-struct sock_acc {
-       atomic_t count;
-       atomic_t total;
-};
-
-struct _vx_cacct {
-       atomic_t nr_threads;
-       int nr_running;
-
-       unsigned long total_forks;
-
-       struct sock_acc sock[5][3];
-};
-
-
-static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
-{
-       return atomic_read(&cacct->sock[type][pos].count);
-}
-
-
-static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
-{
-       return atomic_read(&cacct->sock[type][pos].total);
-}
-
-
-extern uint64_t vx_idle_jiffies(void);
-
-static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
-{
-       uint64_t idle_jiffies = vx_idle_jiffies();
-
-       // new->virt.bias_cswtch = kstat.context_swtch;
-       cvirt->bias_jiffies = get_jiffies_64();
-
-       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
-       do_posix_clock_monotonic_gettime(&cvirt->bias_tp);
-
-       down_read(&uts_sem);
-       cvirt->utsname = system_utsname;
-       up_read(&uts_sem);
-}
-
-static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
-{
-       return;
-}
-
-static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
-{
-       int i,j;
-
-       atomic_set(&cacct->nr_threads, 1);
-       for (i=0; i<5; i++) {
-               for (j=0; j<3; j++) {
-                       atomic_set(&cacct->sock[i][j].count, 0);
-                       atomic_set(&cacct->sock[i][j].total, 0);
-               }
-       }
-}
-
-static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
-{
-       return;
-}
-
-static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
-{
-       int length = 0;
-       return length;
-}
-
-static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
-{
-       int i,j, length = 0;
-       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
-
-       for (i=0; i<5; i++) {
-               length += sprintf(buffer + length,
-                       "%s:", type[i]);
-               for (j=0; j<3; j++) {
-                       length += sprintf(buffer + length,
-                               "\t%12lu/%-12lu"
-                               ,vx_sock_count(cacct, i, j)
-                               ,vx_sock_total(cacct, i, j)
-                               );
-               }       
-               buffer[length++] = '\n';
-       }
-       return length;
-}
-
-#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_CVIRT_H
 #define _VX_CVIRT_H
 
-#include "switch.h"
+#ifdef __KERNEL__
 
-/*  cvirt vserver commands */
+struct timespec;
 
+void vx_vsi_uptime(struct timespec *, struct timespec *);
 
-#ifdef __KERNEL__
 
-struct timespec;
+struct vx_info;
 
-void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle);
+void vx_update_load(struct vx_info *);
 
 #endif /* __KERNEL__ */
-
+#else  /* _VX_CVIRT_H */
+#warning duplicate inclusion
 #endif /* _VX_CVIRT_H */
-#endif
diff --git a/include/linux/vserver/cvirt_cmd.h b/include/linux/vserver/cvirt_cmd.h
new file mode 100644 (file)
index 0000000..368f527
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _VX_CVIRT_CMD_H
+#define _VX_CVIRT_CMD_H
+
+/*  cvirt vserver commands */
+
+
+#endif /* _VX_CVIRT_CMD_H */
diff --git a/include/linux/vserver/cvirt_def.h b/include/linux/vserver/cvirt_def.h
new file mode 100644 (file)
index 0000000..bf4bd84
--- /dev/null
@@ -0,0 +1,59 @@
+#ifndef _VX_CVIRT_DEF_H
+#define _VX_CVIRT_DEF_H
+
+#include <linux/jiffies.h>
+#include <linux/utsname.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <asm/atomic.h>
+
+
+struct _vx_usage_stat {
+       uint64_t user;
+       uint64_t nice;
+       uint64_t system;
+       uint64_t softirq;
+       uint64_t irq;
+       uint64_t idle;
+       uint64_t iowait;
+};
+
+/* context sub struct */
+
+struct _vx_cvirt {
+       int max_threads;                /* maximum allowed threads */
+       atomic_t nr_threads;            /* number of current threads */
+       atomic_t nr_running;            /* number of running threads */
+       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
+
+       atomic_t nr_onhold;             /* processes on hold */
+       uint32_t onhold_last;           /* jiffies when put on hold */
+
+       struct timespec bias_idle;
+       struct timespec bias_uptime;    /* context creation point */
+       uint64_t bias_clock;            /* offset in clock_t */
+
+       struct new_utsname utsname;
+
+       spinlock_t load_lock;           /* lock for the load averages */
+       atomic_t load_updates;          /* nr of load updates done so far */
+       uint32_t load_last;             /* last time load was cacled */
+       uint32_t load[3];               /* load averages 1,5,15 */
+
+       struct _vx_usage_stat cpustat[NR_CPUS];
+};
+
+struct _vx_sock_acc {
+       atomic_t count;
+       atomic_t total;
+};
+
+/* context sub struct */
+
+struct _vx_cacct {
+       unsigned long total_forks;
+
+       struct _vx_sock_acc sock[5][3];
+};
+
+#endif /* _VX_CVIRT_DEF_H */
index 15b52c9..f6b27cf 100644 (file)
@@ -2,6 +2,19 @@
 #define _VX_DEBUG_H
 
 
+#define VXD_CBIT(n,m)  (vx_debug_ ## n & (1 << (m)))
+#define VXD_CMIN(n,m)  (vx_debug_ ## n > (m))
+#define VXD_MASK(n,m)  (vx_debug_ ## n & (m))
+
+#define VXD_QPOS(v,p)  (((uint32_t)(v) >> ((p)*8)) & 0xFF)
+#define VXD_QUAD(v)    VXD_QPOS(v,0), VXD_QPOS(v,1),           \
+                       VXD_QPOS(v,2), VXD_QPOS(v,3)
+
+#define __FUNC__       __func__
+
+
+#ifdef CONFIG_VSERVER_DEBUG
+
 extern unsigned int vx_debug_switch;
 extern unsigned int vx_debug_xid;
 extern unsigned int vx_debug_nid;
@@ -11,36 +24,245 @@ extern unsigned int vx_debug_dlim;
 extern unsigned int vx_debug_cvirt;
 
 
-#define        VXD_CBIT(n,m)   (vx_debug_ ## n & (1 << (m)))
-#define        VXD_CMIN(n,m)   (vx_debug_ ## n > (m))
-#define        VXD_MASK(n,m)   (vx_debug_ ## n & (m))
-
-// #define     VXD_HERE        __FILE__, __LINE__
-
-
-#ifdef CONFIG_VSERVER_DEBUG
-
-#define        VX_LOGLEVEL     "vxD: "
+#define VX_LOGLEVEL    "vxD: "
+#define VX_WARNLEVEL   KERN_WARNING "vxW: "
 
 #define vxdprintk(c,f,x...)                                    \
        do {                                                    \
                if (c)                                          \
-                       printk(VX_LOGLEVEL f "\n", x);          \
-       } while (0)     
+                       printk(VX_LOGLEVEL f "\n" , ##x);       \
+       } while (0)
 
 #define vxlprintk(c,f,x...)                                    \
        do {                                                    \
                if (c)                                          \
                        printk(VX_LOGLEVEL f " @%s:%d\n", x);   \
-       } while (0)     
+       } while (0)
+
+#define vxfprintk(c,f,x...)                                    \
+       do {                                                    \
+               if (c)                                          \
+                       printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
+       } while (0)
+
+
+#define vxwprintk(c,f,x...)                                    \
+       do {                                                    \
+               if (c)                                          \
+                       printk(VX_WARNLEVEL f "\n" , ##x);      \
+       } while (0)
+
+
+#define vxd_path(d,m)                                          \
+       ({ static char _buffer[PATH_MAX];                       \
+          d_path((d), (m), _buffer, sizeof(_buffer)); })
+
+#else  /* CONFIG_VSERVER_DEBUG */
+
+#define vx_debug_switch 0
+#define vx_debug_xid   0
+#define vx_debug_nid   0
+#define vx_debug_net   0
+#define vx_debug_limit 0
+#define vx_debug_dlim  0
+#define vx_debug_cvirt 0
+
+#define vxdprintk(x...) do { } while (0)
+#define vxlprintk(x...) do { } while (0)
+#define vxfprintk(x...) do { } while (0)
+#define vxwprintk(x...) do { } while (0)
+
+#define vxd_path       "<none>"
+
+#endif /* CONFIG_VSERVER_DEBUG */
+
+
+/* history stuff */
+
+#ifdef CONFIG_VSERVER_HISTORY
+
+
+extern unsigned volatile int vxh_active;
+
+struct _vxhe_vxi {
+       struct vx_info *ptr;
+       unsigned xid;
+       unsigned usecnt;
+       unsigned refcnt;
+};
+
+struct _vxhe_set_clr {
+       void *data;
+};
+
+struct _vxhe_loc_lookup {
+       unsigned arg;
+};
+
+enum {
+       VXH_UNUSED=0,
+       VXH_THROW_OOPS=1,
+
+       VXH_GET_VX_INFO,
+       VXH_PUT_VX_INFO,
+       VXH_SET_VX_INFO,
+       VXH_CLR_VX_INFO,
+       VXH_ALLOC_VX_INFO,
+       VXH_DEALLOC_VX_INFO,
+       VXH_HASH_VX_INFO,
+       VXH_UNHASH_VX_INFO,
+       VXH_LOC_VX_INFO,
+       VXH_LOOKUP_VX_INFO,
+};
+
+struct _vx_hist_entry {
+       void *loc;
+       unsigned short seq;
+       unsigned short type;
+       struct _vxhe_vxi vxi;
+       union {
+               struct _vxhe_set_clr sc;
+               struct _vxhe_loc_lookup ll;
+       };
+};
+
+struct _vx_hist_entry *vxh_advance(void *loc);
+
+#define        VXH_HERE()              \
+       ({ __label__ here;      \
+               here:;          \
+               &&here; })
+
+
+
+static inline void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
+{
+       entry->vxi.ptr = vxi;
+       if (vxi) {
+               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
+               entry->vxi.refcnt = atomic_read(&vxi->vx_refcnt);
+               entry->vxi.xid = vxi->vx_id;
+       }
+}
+
+static inline void vxh_throw_oops(void)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       entry->type = VXH_THROW_OOPS;
+
+       /* prevent further acquisition */
+       vxh_active = 0;
+}
+
+static inline void vxh_get_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_GET_VX_INFO;
+}
+
+static inline void vxh_put_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_PUT_VX_INFO;
+}
+
+static inline void vxh_set_vx_info(struct vx_info *vxi, void *data)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->sc.data = data;
+       entry->type = VXH_SET_VX_INFO;
+}
+
+static inline void vxh_clr_vx_info(struct vx_info *vxi, void *data)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->sc.data = data;
+       entry->type = VXH_CLR_VX_INFO;
+}
+
+static inline void vxh_alloc_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_ALLOC_VX_INFO;
+}
+
+static inline void vxh_dealloc_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_DEALLOC_VX_INFO;
+}
+
+static inline void vxh_hash_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_HASH_VX_INFO;
+}
+
+static inline void vxh_unhash_vx_info(struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->type = VXH_UNHASH_VX_INFO;
+}
+
+static inline void vxh_loc_vx_info(unsigned arg, struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->ll.arg = arg;
+       entry->type = VXH_LOC_VX_INFO;
+}
+
+static inline void vxh_lookup_vx_info(unsigned arg, struct vx_info *vxi)
+{
+       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+       __vxh_copy_vxi(entry, vxi);
+       entry->ll.arg = arg;
+       entry->type = VXH_LOOKUP_VX_INFO;
+}
+
+extern void vxh_dump_history(void);
+
+#else  /* CONFIG_VSERVER_HISTORY */
+
+#define        vxh_throw_oops()        do { } while (0)
+
+#define vxh_get_vx_info(v)     do { } while (0)
+#define vxh_put_vx_info(v)     do { } while (0)
+
+#define vxh_set_vx_info(v,d)   do { } while (0)
+#define vxh_clr_vx_info(v,d)   do { } while (0)
+
+#define vxh_alloc_vx_info(v)   do { } while (0)
+#define vxh_dealloc_vx_info(v) do { } while (0)
 
-#else
+#define vxh_hash_vx_info(v)    do { } while (0)
+#define vxh_unhash_vx_info(v)  do { } while (0)
 
-#define vxdprintk(x...)        do { } while (0)
-#define vxlprintk(x...)        do { } while (0)
+#define vxh_loc_vx_info(a,v)   do { } while (0)
+#define vxh_lookup_vx_info(a,v) do { } while (0)
 
-#endif
+#define vxh_dump_history()     do { } while (0)
 
 
+#endif /* CONFIG_VSERVER_HISTORY */
 
 #endif /* _VX_DEBUG_H */
diff --git a/include/linux/vserver/debug_cmd.h b/include/linux/vserver/debug_cmd.h
new file mode 100644 (file)
index 0000000..c0cbd08
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _VX_DEBUG_CMD_H
+#define _VX_DEBUG_CMD_H
+
+
+/* debug commands */
+
+#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
+
+#ifdef __KERNEL__
+
+extern int vc_dump_history(uint32_t);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_DEBUG_CMD_H */
index 14a68fd..0c6587e 100644 (file)
@@ -79,7 +79,7 @@ extern int vc_get_dlimit(uint32_t, void __user *);
 
 typedef uint64_t dlsize_t;
 
-
 #endif /* __KERNEL__ */
-
+#else  /* _VX_DLIMIT_H */
+#warning duplicate inclusion
 #endif /* _VX_DLIMIT_H */
index dac07ea..a1054e8 100644 (file)
@@ -57,10 +57,6 @@ extern int vc_set_iattr_v0(uint32_t, void __user *);
 extern int vc_get_iattr(uint32_t, void __user *);
 extern int vc_set_iattr(uint32_t, void __user *);
 
-extern int vc_iattr_ioctl(struct dentry *de,
-                         unsigned int cmd,
-                         unsigned long arg);
-
 #endif /* __KERNEL__ */
 
 /* inode ioctls */
@@ -68,7 +64,6 @@ extern int vc_iattr_ioctl(struct dentry *de,
 #define FIOC_GETXFLG   _IOR('x', 5, long)
 #define FIOC_SETXFLG   _IOW('x', 6, long)
 
-#define FIOC_GETIATTR   _IOR('x', 7, long)
-#define FIOC_SETIATTR   _IOR('x', 8, long)
-
+#else  /* _VX_INODE_H */
+#warning duplicate inclusion
 #endif /* _VX_INODE_H */
index 1372c0f..d5b8a3f 100644 (file)
@@ -2,7 +2,6 @@
 #define _VX_LEGACY_H
 
 #include "switch.h"
-#include "network.h"
 
 /*  compatibiliy vserver commands */
 
 
 /*  compatibiliy vserver arguments */
 
-struct  vcmd_new_s_context_v1 {
+struct vcmd_new_s_context_v1 {
        uint32_t remove_cap;
        uint32_t flags;
 };
 
-struct  vcmd_set_ipv4root_v3 {
+struct vcmd_set_ipv4root_v3 {
        /* number of pairs in id */
        uint32_t broadcast;
        struct {
@@ -40,7 +39,7 @@ struct  vcmd_set_ipv4root_v3 {
                                        /* of the context */
 #define VX_INFO_NAMESPACE      128     /* save private namespace */
 
-       
+
 #define NB_S_CONTEXT   16
 
 #define NB_IPV4ROOT    16
index 27496c1..0ed0e2b 100644 (file)
-#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <asm/atomic.h>
-#include <asm/resource.h>
-
-/* context sub struct */
-
-#define        RLIMIT_OPENFD   12
-
-#define NUM_RLIMITS    16
-
-#define VLIMIT_SOCK    16
-
-
-struct _vx_limit {
-       atomic_t ticks;
-
-       unsigned long rlim[NUM_RLIMITS];        /* Per context limit */
-       atomic_t res[NUM_RLIMITS];              /* Current value */
-};
-
-static inline void vx_info_init_limit(struct _vx_limit *limit)
-{
-       int lim;
-
-       for (lim=0; lim<NUM_RLIMITS; lim++) {
-               limit->rlim[lim] = RLIM_INFINITY;
-               atomic_set(&limit->res[lim], 0);
-       }
-}
-
-extern unsigned int vx_debug_limit;
-
-static inline void vx_info_exit_limit(struct _vx_limit *limit)
-{
-       int lim, value;
-
-       for (lim=0; lim<NUM_RLIMITS; lim++) {
-               value = atomic_read(&limit->res[lim]);
-               if (value && vx_debug_limit)
-                       printk("!!! limit: %p[%d] = %d on exit.\n",
-                               limit, lim, value);
-       }
-}
-
-
-static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
-{
-       return sprintf(buffer,
-               "PROC:\t%8d/%ld\n"
-               "VM:\t%8d/%ld\n"
-               "VML:\t%8d/%ld\n"               
-               "RSS:\t%8d/%ld\n"
-               "FILES:\t%8d/%ld\n"
-               "OFD:\t%8d/%ld\n"
-               ,atomic_read(&limit->res[RLIMIT_NPROC])
-               ,limit->rlim[RLIMIT_NPROC]
-               ,atomic_read(&limit->res[RLIMIT_AS])
-               ,limit->rlim[RLIMIT_AS]
-               ,atomic_read(&limit->res[RLIMIT_MEMLOCK])
-               ,limit->rlim[RLIMIT_MEMLOCK]
-               ,atomic_read(&limit->res[RLIMIT_RSS])
-               ,limit->rlim[RLIMIT_RSS]
-               ,atomic_read(&limit->res[RLIMIT_NOFILE])
-               ,limit->rlim[RLIMIT_NOFILE]
-               ,atomic_read(&limit->res[RLIMIT_OPENFD])
-               ,limit->rlim[RLIMIT_OPENFD]
-               );
-}
-
-#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_LIMIT_H
 #define _VX_LIMIT_H
 
-#include "switch.h"
-
-/*  rlimit vserver commands */
-
-#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
-#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
-#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
-
-struct  vcmd_ctx_rlimit_v0 {
-       uint32_t id;
-       uint64_t minimum;
-       uint64_t softlimit;
-       uint64_t maximum;
-};
-
-struct  vcmd_ctx_rlimit_mask_v0 {
-       uint32_t minimum;
-       uint32_t softlimit;
-       uint32_t maximum;
-};
-
-#define CRLIM_UNSET            (0ULL)
-#define CRLIM_INFINITY         (~0ULL)
-#define CRLIM_KEEP             (~1ULL)
-
 #ifdef __KERNEL__
 
-#include <linux/compiler.h>
-
-extern int vc_get_rlimit(uint32_t, void __user *);
-extern int vc_set_rlimit(uint32_t, void __user *);
-extern int vc_get_rlimit_mask(uint32_t, void __user *);
-
 struct sysinfo;
 
 void vx_vsi_meminfo(struct sysinfo *);
 void vx_vsi_swapinfo(struct sysinfo *);
 
+#define VXD_RLIMIT(r,l)                (VXD_CBIT(limit, (l)) && ((r) == (l)))
 
-#endif /* __KERNEL__ */
+#define NUM_LIMITS     20
 
+#define VLIMIT_NSOCK   16
+
+extern const char *vlimit_name[NUM_LIMITS];
+
+#endif /* __KERNEL__ */
 #endif /* _VX_LIMIT_H */
-#endif
diff --git a/include/linux/vserver/limit_cmd.h b/include/linux/vserver/limit_cmd.h
new file mode 100644 (file)
index 0000000..a994d02
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _VX_LIMIT_CMD_H
+#define _VX_LIMIT_CMD_H
+
+/*  rlimit vserver commands */
+
+#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
+
+struct vcmd_ctx_rlimit_v0 {
+       uint32_t id;
+       uint64_t minimum;
+       uint64_t softlimit;
+       uint64_t maximum;
+};
+
+struct vcmd_ctx_rlimit_mask_v0 {
+       uint32_t minimum;
+       uint32_t softlimit;
+       uint32_t maximum;
+};
+
+#define CRLIM_UNSET            (0ULL)
+#define CRLIM_INFINITY         (~0ULL)
+#define CRLIM_KEEP             (~1ULL)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_LIMIT_CMD_H */
diff --git a/include/linux/vserver/limit_def.h b/include/linux/vserver/limit_def.h
new file mode 100644 (file)
index 0000000..bab1def
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _VX_LIMIT_DEF_H
+#define _VX_LIMIT_DEF_H
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+#include "limit.h"
+
+/* context sub struct */
+
+struct _vx_limit {
+       atomic_t ticks;
+
+       unsigned long rlim[NUM_LIMITS];         /* Context limit */
+       unsigned long rmax[NUM_LIMITS];         /* Context maximum */
+       atomic_t rcur[NUM_LIMITS];              /* Current value */
+       atomic_t lhit[NUM_LIMITS];              /* Limit hits */
+};
+
+
+#endif /* _VX_LIMIT_DEF_H */
index 140fc79..72a51f6 100644 (file)
@@ -3,13 +3,13 @@
 
 #include <linux/types.h>
 
-       
+
 /* virtual host info names */
 
 #define VCMD_vx_set_vhi_name   VC_CMD(VHOST, 1, 0)
 #define VCMD_vx_get_vhi_name   VC_CMD(VHOST, 2, 0)
 
-struct  vcmd_vx_vhi_name_v0 {
+struct vcmd_vx_vhi_name_v0 {
        uint32_t field;
        char name[65];
 };
@@ -44,6 +44,9 @@ extern int vc_get_vhi_name(uint32_t, void __user *);
 struct vx_info;
 struct namespace;
 struct fs_struct;
+struct vfsmount;
+
+extern int vx_check_vfsmount(struct vx_info *, struct vfsmount *);
 
 extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
 
@@ -52,4 +55,6 @@ extern int vc_cleanup_namespace(uint32_t, void __user *);
 extern int vc_set_namespace(uint32_t, void __user *);
 
 #endif /* __KERNEL__ */
+#else  /* _VX_NAMESPACE_H */
+#warning duplicate inclusion
 #endif /* _VX_NAMESPACE_H */
index e77866b..b1ccb9a 100644 (file)
@@ -1,19 +1,21 @@
 #ifndef _VX_NETWORK_H
 #define _VX_NETWORK_H
 
+#include <linux/types.h>
+
+
 #define MAX_N_CONTEXT  65535   /* Arbitrary limit */
 
 #define NX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
 
 #define NB_IPV4ROOT    16
 
+
 #ifdef __KERNEL__
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/utsname.h>
 #include <linux/rcupdate.h>
-#include <asm/resource.h>
 #include <asm/atomic.h>
 
 
@@ -43,9 +45,10 @@ struct nx_info {
 
 struct rcu_head;
 
-extern void rcu_free_nx_info(struct rcu_head *);
 extern void unhash_nx_info(struct nx_info *);
 
+extern void free_nx_info(struct nx_info *);
+
 extern struct nx_info *locate_nx_info(int);
 extern struct nx_info *locate_or_create_nx_info(int);
 
@@ -64,89 +67,7 @@ struct sock;
 
 int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *);
 
-
 #endif /* __KERNEL__ */
-
-#include "switch.h"
-
-/* vinfo commands */
-
-#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_nid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
-
-struct vcmd_nx_info_v0 {
-       uint32_t nid;
-       /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_nx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_net_create                VC_CMD(VNET, 1, 0)
-#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
-
-#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
-#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
-
-struct vcmd_net_nx_v0 {
-       uint16_t type;
-       uint16_t count;
-       uint32_t ip[4];
-       uint32_t mask[4];
-       /* more to come */
-};
-
-//     IPN_TYPE_IPV4
-
-
-#ifdef __KERNEL__
-extern int vc_net_create(uint32_t, void __user *);
-extern int vc_net_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
-#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
-
-struct vcmd_net_flags_v0 {
-       uint64_t flagword;
-       uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_nflags(uint32_t, void __user *);
-extern int vc_set_nflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define IPF_STATE_SETUP                (1ULL<<32)
-
-
-#define IPF_ONE_TIME           (0x0001ULL<<32)
-
-#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
-#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
-
-struct vcmd_net_caps_v0 {
-       uint64_t ncaps;
-       uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ncaps(uint32_t, void __user *);
-extern int vc_set_ncaps(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define IPC_WOSSNAME           0x00000001
-
-
+#else  /* _VX_NETWORK_H */
+#warning duplicate inclusion
 #endif /* _VX_NETWORK_H */
diff --git a/include/linux/vserver/network_cmd.h b/include/linux/vserver/network_cmd.h
new file mode 100644 (file)
index 0000000..4403f54
--- /dev/null
@@ -0,0 +1,81 @@
+#ifndef _VX_NETWORK_CMD_H
+#define _VX_NETWORK_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
+
+struct vcmd_nx_info_v0 {
+       uint32_t nid;
+       /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create                VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
+
+struct vcmd_net_nx_v0 {
+       uint16_t type;
+       uint16_t count;
+       uint32_t ip[4];
+       uint32_t mask[4];
+       /* more to come */
+};
+
+//     IPN_TYPE_IPV4
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
+
+struct vcmd_net_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP                (1ULL<<32)
+
+
+#define IPF_ONE_TIME           (0x0001ULL<<32)
+
+#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
+
+struct vcmd_net_caps_v0 {
+       uint64_t ncaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
index f5982bb..e527b44 100644 (file)
-/* _VX_SCHED_H defined below */
-
-#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <linux/spinlock.h>
-#include <linux/jiffies.h>
-#include <linux/cpumask.h>
-#include <asm/atomic.h>
-#include <asm/param.h>
-
-struct _vx_ticks {
-       uint64_t user_ticks;            /* token tick events */
-       uint64_t sys_ticks;             /* token tick events */
-       uint64_t hold_ticks;            /* token ticks paused */
-       uint64_t unused[5];             /* cacheline ? */
-};
-
-/* context sub struct */
-
-struct _vx_sched {
-       atomic_t tokens;                /* number of CPU tokens */
-       spinlock_t tokens_lock;         /* lock for token bucket */
-
-       int fill_rate;                  /* Fill rate: add X tokens... */
-       int interval;                   /* Divisor:   per Y jiffies   */
-       int tokens_min;                 /* Limit:     minimum for unhold */
-       int tokens_max;                 /* Limit:     no more than N tokens */
-       uint32_t jiffies;               /* last time accounted */
-
-       int priority_bias;              /* bias offset for priority */
-       cpumask_t cpus_allowed;         /* cpu mask for context */
-
-       struct _vx_ticks cpu[NR_CPUS];
-};
-
-static inline void vx_info_init_sched(struct _vx_sched *sched)
-{
-       int i;
-
-       /* scheduling; hard code starting values as constants */
-       sched->fill_rate        = 1;
-       sched->interval         = 4;
-       sched->tokens_min       = HZ >> 4;
-       sched->tokens_max       = HZ >> 1;
-       sched->jiffies          = jiffies;
-       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
-
-       atomic_set(&sched->tokens, HZ >> 2);
-       sched->cpus_allowed     = CPU_MASK_ALL;
-       sched->priority_bias    = 0;
-
-       for_each_cpu(i) {
-               sched->cpu[i].user_ticks        = 0;
-               sched->cpu[i].sys_ticks         = 0;
-               sched->cpu[i].hold_ticks        = 0;
-       }
-}
-
-static inline void vx_info_exit_sched(struct _vx_sched *sched)
-{
-       return;
-}
-
-static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
-{
-       int length = 0;
-       int i;
-
-       length += sprintf(buffer,
-               "Token:\t\t%8d\n"
-               "FillRate:\t%8d\n"
-               "Interval:\t%8d\n"
-               "TokensMin:\t%8d\n"
-               "TokensMax:\t%8d\n"
-               "PrioBias:\t%8d\n"
-               ,atomic_read(&sched->tokens)
-               ,sched->fill_rate
-               ,sched->interval
-               ,sched->tokens_min
-               ,sched->tokens_max
-               ,sched->priority_bias
-               );
-
-       for_each_online_cpu(i) {
-               length += sprintf(buffer + length,
-                       "cpu %d: %lld %lld %lld\n"
-                       ,i
-                       ,(long long)sched->cpu[i].user_ticks
-                       ,(long long)sched->cpu[i].sys_ticks
-                       ,(long long)sched->cpu[i].hold_ticks
-                       );
-       }
-
-       return length;
-}
-
-
-#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_SCHED_H
 #define _VX_SCHED_H
 
-#include "switch.h"
-
-/*  sched vserver commands */
-
-#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
-#define VCMD_set_sched         VC_CMD(SCHED, 1, 3)
-
-struct vcmd_set_sched_v2 {
-       int32_t fill_rate;
-       int32_t interval;
-       int32_t tokens;
-       int32_t tokens_min;
-       int32_t tokens_max;
-       uint64_t cpu_mask;
-};
-
-struct vcmd_set_sched_v3 {
-       uint32_t set_mask;
-       int32_t fill_rate;
-       int32_t interval;
-       int32_t tokens;
-       int32_t tokens_min;
-       int32_t tokens_max;
-       int32_t priority_bias;
-};
-
-
-#define VXSM_FILL_RATE         0x0001
-#define VXSM_INTERVAL          0x0002
-#define VXSM_TOKENS            0x0010
-#define VXSM_TOKENS_MIN                0x0020
-#define VXSM_TOKENS_MAX                0x0040
-#define VXSM_PRIO_BIAS         0x0100
-
-#define SCHED_KEEP             (-2)
-
 #ifdef __KERNEL__
 
-extern int vc_set_sched_v1(uint32_t, void __user *);
-extern int vc_set_sched_v2(uint32_t, void __user *);
-extern int vc_set_sched(uint32_t, void __user *);
+struct timespec;
 
+void vx_vsi_uptime(struct timespec *, struct timespec *);
 
-#define VAVAVOOM_RATIO         50
 
-#define MAX_PRIO_BIAS          20
-#define MIN_PRIO_BIAS          -20
+struct vx_info;
 
-#include "context.h"
+void vx_update_load(struct vx_info *);
 
 
-/* scheduling stuff */
+struct task_struct;
 
 int effective_vavavoom(struct task_struct *, int);
 
 int vx_tokens_recalc(struct vx_info *);
 
-/* new stuff ;) */
-
-static inline int vx_tokens_avail(struct vx_info *vxi)
-{
-       return atomic_read(&vxi->sched.tokens);
-}
-
-static inline void vx_consume_token(struct vx_info *vxi)
-{
-       atomic_dec(&vxi->sched.tokens);
-}
-
-static inline int vx_need_resched(struct task_struct *p)
-{
-#ifdef CONFIG_VSERVER_HARDCPU
-       struct vx_info *vxi = p->vx_info;
-#endif
-       int slice = --p->time_slice;
-
-#ifdef CONFIG_VSERVER_HARDCPU
-       if (vxi) {
-               int tokens;
-
-               if ((tokens = vx_tokens_avail(vxi)) > 0)
-                       vx_consume_token(vxi);
-               /* for tokens > 0, one token was consumed */
-               if (tokens < 2)
-                       return 1;
-       }
-#endif
-       return (slice == 0);
-}
-
-
-static inline void vx_onhold_inc(struct vx_info *vxi)
-{
-       int onhold = atomic_read(&vxi->cvirt.nr_onhold);
-
-       atomic_inc(&vxi->cvirt.nr_onhold);
-       if (!onhold)
-               vxi->cvirt.onhold_last = jiffies;
-}
-
-static inline void __vx_onhold_update(struct vx_info *vxi)
-{
-       int cpu = smp_processor_id();
-       uint32_t now = jiffies;
-       uint32_t delta = now - vxi->cvirt.onhold_last;
-
-       vxi->cvirt.onhold_last = now;
-       vxi->sched.cpu[cpu].hold_ticks += delta;
-}
-
-static inline void vx_onhold_dec(struct vx_info *vxi)
-{
-       if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
-               __vx_onhold_update(vxi);
-}
-
 #endif /* __KERNEL__ */
-
+#else  /* _VX_SCHED_H */
+#warning duplicate inclusion
 #endif /* _VX_SCHED_H */
-#endif
diff --git a/include/linux/vserver/sched_cmd.h b/include/linux/vserver/sched_cmd.h
new file mode 100644 (file)
index 0000000..2a6f55b
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _VX_SCHED_CMD_H
+#define _VX_SCHED_CMD_H
+
+/*  sched vserver commands */
+
+#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
+#define VCMD_set_sched         VC_CMD(SCHED, 1, 3)
+
+struct vcmd_set_sched_v2 {
+       int32_t fill_rate;
+       int32_t interval;
+       int32_t tokens;
+       int32_t tokens_min;
+       int32_t tokens_max;
+       uint64_t cpu_mask;
+};
+
+struct vcmd_set_sched_v3 {
+       uint32_t set_mask;
+       int32_t fill_rate;
+       int32_t interval;
+       int32_t tokens;
+       int32_t tokens_min;
+       int32_t tokens_max;
+       int32_t priority_bias;
+};
+
+
+#define VXSM_FILL_RATE         0x0001
+#define VXSM_INTERVAL          0x0002
+#define VXSM_TOKENS            0x0010
+#define VXSM_TOKENS_MIN                0x0020
+#define VXSM_TOKENS_MAX                0x0040
+#define VXSM_PRIO_BIAS         0x0100
+
+#define SCHED_KEEP             (-2)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched_v2(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_SCHED_CMD_H */
diff --git a/include/linux/vserver/sched_def.h b/include/linux/vserver/sched_def.h
new file mode 100644 (file)
index 0000000..e85c09f
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _VX_SCHED_DEF_H
+#define _VX_SCHED_DEF_H
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/cpumask.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+
+
+struct _vx_ticks {
+       uint64_t user_ticks;            /* token tick events */
+       uint64_t sys_ticks;             /* token tick events */
+       uint64_t hold_ticks;            /* token ticks paused */
+       uint64_t unused[5];             /* cacheline ? */
+};
+
+/* context sub struct */
+
+struct _vx_sched {
+       atomic_t tokens;                /* number of CPU tokens */
+       spinlock_t tokens_lock;         /* lock for token bucket */
+
+       int fill_rate;                  /* Fill rate: add X tokens... */
+       int interval;                   /* Divisor:   per Y jiffies   */
+       int tokens_min;                 /* Limit:     minimum for unhold */
+       int tokens_max;                 /* Limit:     no more than N tokens */
+       uint32_t jiffies;               /* last time accounted */
+
+       int priority_bias;              /* bias offset for priority */
+       cpumask_t cpus_allowed;         /* cpu mask for context */
+
+       struct _vx_ticks cpu[NR_CPUS];
+};
+
+#endif /* _VX_SCHED_DEF_H */
index 5fef690..81f5c23 100644 (file)
 
 /*
 
-  Syscall Matrix V2.6
+  Syscall Matrix V2.8
 
-         |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
-         |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
-         |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
+        |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
+        |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
+        |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
   SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICES|       |
   HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
   PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
   MEMORY |       |       |       |       |       |       | |SWAP   |       |
-         |     16|     17|     18|     19|     20|     21| |     22|     23|
+        |     16|     17|     18|     19|     20|     21| |     22|     23|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
   NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
-         |     24|     25|     26|     27|     28|     29| |     30|     31|
+        |     24|     25|     26|     27|     28|     29| |     30|     31|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
-  DISK   |       |       |       |       |       |       | |INODE  |       |
+  DISK   |       |       |       |       |DLIMIT |       | |INODE  |       |
   VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
   OTHER  |       |       |       |       |       |       | |VINFO  |       |
-         |     40|     41|     42|     43|     44|     45| |     46|     47|
+        |     40|     41|     42|     43|     44|     45| |     46|     47|
   =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
-  SPECIAL|       |       |       |       |FLAGS  |       | |       |       |
-         |     48|     49|     50|     51|     52|     53| |     54|     55|
+  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |       |       |
+        |     48|     49|     50|     51|     52|     53| |     54|     55|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
-  SPECIAL|       |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
-         |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
+  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
+        |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
 
 */
@@ -49,7 +49,7 @@
 
 #define VC_CAT_VSETUP          1
 #define VC_CAT_VHOST           2
-       
+
 #define VC_CAT_VPROC           9
 #define VC_CAT_PROCALT         10
 #define VC_CAT_PROCMIG         11
 #define VC_CAT_NETMIG          27
 #define VC_CAT_NETCTRL         28
 
+#define VC_CAT_DLIMIT          36
 #define VC_CAT_INODE           38
 
 #define VC_CAT_VINFO           46
+#define VC_CAT_EVENT           48
 
 #define VC_CAT_FLAGS           52
+#define VC_CAT_DEBUG           56
 #define VC_CAT_RLIMIT          60
 
 #define VC_CAT_SYSTEST         61
 #define VC_CAT_COMPAT          63
-       
+
 /*  interface version */
 
-#define VCI_VERSION            0x00010016
+#define VCI_VERSION            0x00010025
 
 
 /*  query version */
@@ -86,7 +89,6 @@
 
 #include <linux/errno.h>
 
-#define ENOTSUP                -EOPNOTSUPP
 
 #else  /* __KERNEL__ */
 #define __user
index ba52c25..91e28de 100644 (file)
@@ -1,15 +1,21 @@
-#ifndef _LINUX_XID_H_
-#define _LINUX_XID_H_
+#ifndef _VX_XID_H
+#define _VX_XID_H
+
+
+#define XID_TAG(in)    (!(in) || \
+       (((struct inode *)in)->i_sb && \
+       (((struct inode *)in)->i_sb->s_flags & MS_TAGXID)))
+
 
 #ifdef CONFIG_INOXID_NONE
 
 #define MAX_UID                0xFFFFFFFF
 #define MAX_GID                0xFFFFFFFF
 
-#define INOXID_XID(uid, gid, xid)      (0)
+#define INOXID_XID(tag, uid, gid, xid) (0)
 
-#define XIDINO_UID(uid, xid)           (uid)
-#define XIDINO_GID(gid, xid)           (gid)
+#define XIDINO_UID(tag, uid, xid)      (uid)
+#define XIDINO_GID(tag, gid, xid)      (gid)
 
 #endif
 
 #define MAX_UID                0xFFFFFFFF
 #define MAX_GID                0x0000FFFF
 
-#define INOXID_XID(uid, gid, xid)      (((gid) >> 16) & 0xFFFF)
-
-#define XIDINO_UID(uid, xid)           (uid)
-#define XIDINO_GID(gid, xid)           (((gid) & 0xFFFF) | ((xid) << 16))
+#define INOXID_XID(tag, uid, gid, xid) \
+       ((tag) ? (((gid) >> 16) & 0xFFFF) : 0)
 
+#define XIDINO_UID(tag, uid, xid)      (uid)
+#define XIDINO_GID(tag, gid, xid)      \
+       ((tag) ? (((gid) & 0xFFFF) | ((xid) << 16)) : (gid))
 
 #endif
 
 
-#ifdef CONFIG_INOXID_GID24
+#ifdef CONFIG_INOXID_UGID24
 
 #define MAX_UID                0x00FFFFFF
 #define MAX_GID                0x00FFFFFF
 
-#define INOXID_XID(uid, gid, xid)      ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF))
+#define INOXID_XID(tag, uid, gid, xid) \
+       ((tag) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
+
+#define XIDINO_UID(tag, uid, xid)      \
+       ((tag) ? (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) : (uid))
+#define XIDINO_GID(tag, gid, xid)      \
+       ((tag) ? (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) : (gid))
+
+#endif
+
+
+#ifdef CONFIG_INOXID_UID16
+
+#define MAX_UID                0x0000FFFF
+#define MAX_GID                0xFFFFFFFF
 
-#define XIDINO_UID(uid, xid)           (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16))
-#define XIDINO_GID(gid, xid)           (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24))
+#define INOXID_XID(tag, uid, gid, xid) \
+       ((tag) ? ((uid) >> 16) & 0xFFFF) : 0)
+
+#define XIDINO_UID(tag, uid, xid)      \
+       ((tag) ? (((uid) & 0xFFFF) | ((xid) << 16)) : (uid))
+#define XIDINO_GID(tag, gid, xid)      (gid)
 
 #endif
 
 
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
 
 #define MAX_UID                0xFFFFFFFF
 #define MAX_GID                0xFFFFFFFF
 
-#define INOXID_XID(uid, gid, xid)      (xid)
+#define INOXID_XID(tag, uid, gid, xid) \
+       ((tag) ? (xid) : 0)
 
-#define XIDINO_UID(uid, xid)           (uid)
-#define XIDINO_GID(gid, xid)           (gid)
+#define XIDINO_UID(tag, uid, xid)      (uid)
+#define XIDINO_GID(tag, gid, xid)      (gid)
 
 #endif
 
 #define MAX_UID                0xFFFFFFFF
 #define MAX_GID                0xFFFFFFFF
 
-#define INOXID_XID(uid, gid, xid)      (0)
+#define INOXID_XID(tag, uid, gid, xid) (0)
 
-#define XIDINO_UID(uid, xid)           (uid)
-#define XIDINO_GID(gid, xid)           (gid)
+#define XIDINO_UID(tag, uid, xid)      (uid)
+#define XIDINO_GID(tag, gid, xid)      (gid)
 
 #endif
 
 
-#define INOXID_UID(uid, gid)           ((uid) & MAX_UID)
-#define INOXID_GID(uid, gid)           ((gid) & MAX_GID)
+#define INOXID_UID(tag, uid, gid)      \
+       ((tag) ? ((uid) & MAX_UID) : (uid))
+#define INOXID_GID(tag, uid, gid)      \
+       ((tag) ? ((gid) & MAX_GID) : (gid))
+
 
 static inline uid_t vx_map_uid(uid_t uid)
 {
@@ -85,10 +114,13 @@ static inline gid_t vx_map_gid(gid_t gid)
 }
 
 
-#ifdef CONFIG_VSERVER_LEGACY           
+#ifdef CONFIG_VSERVER_LEGACY
 #define FIOC_GETXID    _IOR('x', 1, long)
 #define FIOC_SETXID    _IOW('x', 2, long)
 #define FIOC_SETXIDJ   _IOW('x', 3, long)
 #endif
 
-#endif /* _LINUX_XID_H_ */
+int vx_parse_xid(char *string, xid_t *xid, int remove);
+void vx_propagate_xid(struct nameidata *nd, struct inode *inode);
+
+#endif /* _VX_XID_H */
index c5d47b2..9ed04d9 100644 (file)
@@ -33,7 +33,6 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
-#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
 
@@ -146,6 +145,59 @@ static inline char rt_tos2priority(u8 tos)
        return ip_tos2prio[IPTOS_TOS(tos)>>1];
 }
 
+#define IPI_LOOPBACK   0x0100007f
+
+static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
+{
+       int err;
+       int i, n = nxi->nbipv4;
+       u32 ipv4root = nxi->ipv4[0];
+
+       if (ipv4root == 0)
+               return 0;
+
+       if (fl->fl4_src == 0) {
+               if (n > 1) {
+                       u32 foundsrc;
+
+                       err = __ip_route_output_key(rp, fl);
+                       if (err) {
+                               fl->fl4_src = ipv4root;
+                               err = __ip_route_output_key(rp, fl);
+                       }
+                       if (err)
+                               return err;
+
+                       foundsrc = (*rp)->rt_src;
+                       ip_rt_put(*rp);
+
+                       for (i=0; i<n; i++){
+                               u32 mask = nxi->mask[i];
+                               u32 ipv4 = nxi->ipv4[i];
+                               u32 net4 = ipv4 & mask;
+
+                               if (foundsrc == ipv4) {
+                                       fl->fl4_src = ipv4;
+                                       break;
+                               }
+                               if (!fl->fl4_src && (foundsrc & mask) == net4)
+                                       fl->fl4_src = ipv4;
+                       }
+               }
+               if (fl->fl4_src == 0)
+                       fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
+                               ? IPI_LOOPBACK : ipv4root;
+       } else {
+               for (i=0; i<n; i++) {
+                       if (nxi->ipv4[i] == fl->fl4_src)
+                               break;
+               }
+               if (i == n)
+                       return -EPERM;
+       }
+       return 0;
+}
+
 static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                   u32 src, u32 tos, int oif, u8 protocol,
                                   u16 sport, u16 dport, struct sock *sk)
@@ -160,7 +212,23 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                         .dport = dport } } };
 
        int err;
-       if (!dst || !src) {
+       struct nx_info *nx_info = current->nx_info;
+
+       if (sk)
+               nx_info = sk->sk_nx_info;
+       vxdprintk(VXD_CBIT(net, 4),
+               "ip_route_connect(%p) %p,%p;%lx",
+               sk, nx_info, sk->sk_socket,
+               (sk->sk_socket?sk->sk_socket->flags:0));
+
+       if (nx_info) {
+               err = ip_find_src(nx_info, rp, &fl);
+               if (err)
+                       return err;
+               if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+                       fl.fl4_dst = nx_info->ipv4[0];
+       }
+       if (!fl.fl4_dst || !fl.fl4_src) {
                err = __ip_route_output_key(rp, &fl);
                if (err)
                        return err;
index 7964406..62aead4 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -99,7 +99,7 @@ static int newque (key_t key, int msgflg)
 
        msq->q_perm.mode = (msgflg & S_IRWXUGO);
        msq->q_perm.key = key;
-       msq->q_perm.xid = current->xid;
+       msq->q_perm.xid = vx_current_xid();
 
        msq->q_perm.security = NULL;
        retval = security_msg_queue_alloc(msq);
index 3960ddb..d33f2ad 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -178,7 +178,7 @@ static int newary (key_t key, int nsems, int semflg)
 
        sma->sem_perm.mode = (semflg & S_IRWXUGO);
        sma->sem_perm.key = key;
-       sma->sem_perm.xid = current->xid;
+       sma->sem_perm.xid = vx_current_xid();
 
        sma->sem_perm.security = NULL;
        retval = security_sem_alloc(sma);
index d7bb539..fa14c36 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -195,7 +195,7 @@ static int newseg (key_t key, int shmflg, size_t size)
                return -ENOMEM;
 
        shp->shm_perm.key = key;
-       shp->shm_perm.xid = current->xid;
+       shp->shm_perm.xid = vx_current_xid();
        shp->shm_flags = (shmflg & S_IRWXUGO);
        shp->mlock_user = NULL;
 
index 3d32576..23dc38f 100644 (file)
@@ -14,6 +14,9 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 subdir-y  += vserver
 obj-y    += vserver/vserver.o
 
+subdir-y       += vserver
+obj-y    += vserver/vserver.o
+
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
index a4bf68d..649a9ce 100644 (file)
@@ -12,6 +12,8 @@
 #include <linux/security.h>
 #include <linux/vs_cvirt.h>
 #include <linux/syscalls.h>
+#include <linux/vs_cvirt.h>
+
 #include <asm/uaccess.h>
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
index 764c1ad..ebcc1b6 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/vs_limit.h>
 #include <linux/ckrm_mem.h>
 #include <linux/syscalls.h>
+#include <linux/vs_limit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -382,6 +383,7 @@ static inline void close_files(struct files_struct * files)
                                struct file * file = xchg(&files->fd[i], NULL);
                                if (file) 
                                        filp_close(file, files);
+                               // vx_openfd_dec(i);
                        }
                        i++;
                        set >>= 1;
@@ -611,6 +613,7 @@ static inline void forget_original_parent(struct task_struct * father,
        struct task_struct *p, *reaper = father;
        struct list_head *_p, *_n;
 
+       /* FIXME handle vchild_reaper/initpid */
        do {
                reaper = next_thread(reaper);
                if (reaper == father) {
index d19d14e..a44ced0 100644 (file)
 #include <linux/audit.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
-#include <linux/vs_network.h>
-#include <linux/vs_limit.h>
-#include <linux/vs_memory.h>
 #include <linux/ckrm.h>
 #include <linux/ckrm_tsk.h>
 #include <linux/ckrm_mem_inline.h>
+#include <linux/vs_network.h>
+#include <linux/vs_limit.h>
+#include <linux/vs_memory.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -354,7 +354,6 @@ void fastcall __mmdrop(struct mm_struct *mm)
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
-       clr_vx_info(&mm->mm_vx_info);
 #ifdef CONFIG_CKRM_RES_MEM
        /* class can be null and mm's tasklist can be empty here */
        if (mm->memclass) {
@@ -362,6 +361,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
                mm->memclass = NULL;
        }
 #endif
+       clr_vx_info(&mm->mm_vx_info);
        free_mm(mm);
 }
 
@@ -869,6 +869,23 @@ static task_t *copy_process(unsigned long clone_flags,
                        goto bad_fork_cleanup_vm;
        }
 
+       p->vx_info = NULL;
+       set_vx_info(&p->vx_info, current->vx_info);
+       p->nx_info = NULL;
+       set_nx_info(&p->nx_info, current->nx_info);
+
+       /* check vserver memory */
+       if (p->mm && !(clone_flags & CLONE_VM)) {
+               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+                       vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+               else
+                       goto bad_fork_free;
+       }
+       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+               if (!vx_rsspages_avail(p->mm, p->mm->rss))
+                       goto bad_fork_cleanup_vm;
+       }
+
        retval = -EAGAIN;
        if (!vx_nproc_avail(1))
                goto bad_fork_cleanup_vm;
index 2d04567..663980a 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/syscalls.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/vs_cvirt.h>
 
 #ifndef div_long_long_rem
 #include <asm/div64.h>
index 31e1731..2b80f44 100644 (file)
@@ -539,6 +539,8 @@ asmlinkage int printk(const char *fmt, ...)
        return r;
 }
 
+static volatile int printk_cpu = -1;
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
        unsigned long flags;
@@ -547,11 +549,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        static char printk_buf[1024];
        static int log_level_unknown = 1;
 
-       if (unlikely(oops_in_progress))
+       if (unlikely(oops_in_progress && printk_cpu == smp_processor_id()))
                zap_locks();
 
        /* This stops the holder of console_sem just where we want him */
        spin_lock_irqsave(&logbuf_lock, flags);
+       printk_cpu = smp_processor_id();
 
        /* Emit the output into the temporary buffer */
        printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
index f609197..855a39a 100644 (file)
@@ -50,6 +50,9 @@
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
+#include <linux/vs_context.h>
+#include <linux/vs_cvirt.h>
+#include <linux/vs_sched.h>
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
@@ -262,6 +265,10 @@ struct runqueue {
        task_t *migration_thread;
        struct list_head migration_queue;
 #endif
+#ifdef CONFIG_VSERVER_HARDCPU
+       struct list_head hold_queue;
+       int idle_tokens;
+#endif
 
 #ifdef CONFIG_VSERVER_HARDCPU
        struct list_head hold_queue;
@@ -738,12 +745,10 @@ static int effective_prio(task_t *p)
        bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
 
        prio = p->static_prio - bonus;
-
 #ifdef CONFIG_VSERVER_HARDCPU
        if (task_vx_flags(p, VXF_SCHED_PRIO, 0))
                prio += effective_vavavoom(p, MAX_USER_PRIO);
 #endif
-
        if (prio < MAX_RT_PRIO)
                prio = MAX_RT_PRIO;
        if (prio > MAX_PRIO-1)
@@ -904,10 +909,11 @@ static void __deactivate_task(struct task_struct *p, runqueue_t *rq)
        p->array = NULL;
 }
 
-static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+static inline
+void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-       __deactivate_task(p, rq);
        vx_deactivate_task(p);
+       __deactivate_task(p, rq);
 }
 
 /*
@@ -1244,6 +1250,9 @@ out_activate:
         * to be considered on this CPU.)
         */
        activate_task(p, rq, cpu == this_cpu);
+       /* this is to get the accounting behind the load update */
+       if (old_state == TASK_UNINTERRUPTIBLE)
+               vx_uninterruptible_dec(p);
        if (!sync || cpu != this_cpu) {
                if (TASK_PREEMPTS_CURR(p, rq))
                        resched_task(rq->curr);
@@ -2886,7 +2895,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
        if (rcu_pending(cpu))
                rcu_check_callbacks(cpu, user_ticks);
 
-
        if (vxi) {
                vxi->sched.cpu[cpu].user_ticks += user_ticks;
                vxi->sched.cpu[cpu].sys_ticks += sys_ticks;
@@ -2911,6 +2919,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 
                if (wake_priority_sleeper(rq))
                        goto out;
+
                ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
 
 #ifdef CONFIG_VSERVER_HARDCPU_IDLE
@@ -2955,6 +2964,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
                }
                goto out_unlock;
        }
+#warning MEF: vx_need_resched incorpates standard kernel code, which it should not.
        if (vx_need_resched(p)) {
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
                /* Hubertus ... we can abstract this out */
@@ -3158,11 +3168,11 @@ asmlinkage void __sched schedule(void)
        prio_array_t *array;
        unsigned long long now;
        unsigned long run_time;
-       int cpu;
 #ifdef CONFIG_VSERVER_HARDCPU
        struct vx_info *vxi;
        int maxidle = -HZ;
 #endif
+       int cpu;
 
        /*
         * If crash dump is in progress, this other cpu's
@@ -3173,7 +3183,6 @@ asmlinkage void __sched schedule(void)
         if (unlikely(dump_oncpu))
                 goto dump_scheduling_disabled;
 
-
        /*
         * Test if we are atomic.  Since do_exit() needs to call into
         * schedule() atomically, we ignore that path for now.
@@ -3249,8 +3258,10 @@ need_resched_nonpreemptible:
                                unlikely(signal_pending(prev))))
                        prev->state = TASK_RUNNING;
                else {
-                       if (prev->state == TASK_UNINTERRUPTIBLE)
+                       if (prev->state == TASK_UNINTERRUPTIBLE) {
                                rq->nr_uninterruptible++;
+                               vx_uninterruptible_inc(prev);
+                       }
                        deactivate_task(prev, rq);
                }
        }
@@ -3330,6 +3341,26 @@ go_idle:
         */
        next = rq_get_next_task(rq);
 
+#ifdef CONFIG_VSERVER_HARDCPU
+       vxi = next->vx_info;
+       if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
+               int ret = vx_tokens_recalc(vxi);
+
+               if (unlikely(ret <= 0)) {
+                       if (ret && (rq->idle_tokens > -ret))
+                               rq->idle_tokens = -ret;
+                       __deactivate_task(next, rq);
+                       recalc_task_prio(next, now);
+                       // a new one on hold
+                       vx_onhold_inc(vxi);
+                       next->state |= TASK_ONHOLD;
+                       list_add_tail(&next->run_list, &rq->hold_queue);
+                       //printk("··· %8lu hold   %p [%d]\n", jiffies, next, next->prio);
+                       goto pick_next;
+               }
+       }
+#endif
+
 #ifdef CONFIG_VSERVER_HARDCPU
        vxi = next->vx_info;
        if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
index a56f3d9..e74c821 100644 (file)
@@ -626,7 +626,6 @@ static int check_kill_permission(int sig, struct siginfo *info,
 
        if (sig < 0 || sig > _NSIG)
                return error;
-
        user = (!info ||
                (info != SEND_SIG_PRIV &&
                 info != SEND_SIG_FORCED &&
index 3792340..fee92cc 100644 (file)
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
 #include <linux/ckrm.h>
-#include <linux/vs_base.h>
-#include <linux/vs_cvirt.h>
 #include <linux/tty.h>
-
+#include <linux/vs_cvirt.h>
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 
index 635d8d4..22f8f23 100644 (file)
@@ -11,7 +11,7 @@ config        VSERVER_LEGACY
          This enables the legacy API used in vs1.xx, which allows
          to use older tools (for migration purposes).
 
-config PROC_SECURE
+config VSERVER_PROC_SECURE
        bool    "Enable Proc Security"
        depends on PROC_FS
        default y
@@ -25,9 +25,19 @@ config       VSERVER_HARDCPU
        help
          Activate the Hard CPU Limits
 
+config VSERVER_HARDCPU_IDLE
+       bool    "Limit the IDLE task"
+       depends on VSERVER_HARDCPU
+       default n
+       help
+         Limit the idle slices, so the the next context
+         will be scheduled as soon as possible.
+         might improve interactivity/latency but
+         increases scheduling overhead.
+
 choice
        prompt  "Persistent Inode Context Tagging"
-       default INOXID_GID24
+       default INOXID_UGID24
        help
          This adds persistent context information to filesystems
          mounted with the tagxid option. Tagging is a requirement
@@ -39,26 +49,31 @@ config      INOXID_NONE
        help
          no context information is store for inodes
 
+config INOXID_UID16
+       bool    "UID16/GID32"
+       help
+         reduces UID to 16 bit, but leaves GID at 32 bit.
+
 config INOXID_GID16
        bool    "UID32/GID16"
        help
          reduces GID to 16 bit, but leaves UID at 32 bit.
 
-config INOXID_GID24
+config INOXID_UGID24
        bool    "UID24/GID24"
        help
          uses the upper 8bit from UID and GID for XID tagging
          which leaves 24bit for UID/GID each, which should be
          more than sufficient for normal use.
 
-config INOXID_GID32
+config INOXID_INTERN
        bool    "UID32/GID32"
        help
          this uses otherwise reserved inode fields in the on
          disk representation, which limits the use to a few
          filesystems (currently ext2 and ext3)
 
-config INOXID_MAGIC
+config INOXID_RUNTIME
        bool    "Runtime"
        depends on EXPERIMENTAL
        help
@@ -68,5 +83,32 @@ config       INOXID_MAGIC
 
 endchoice
 
+config VSERVER_DEBUG
+       bool    "Compile Debugging Code"
+       default n
+       help
+         Set this to yes if you want to be able to activate
+         debugging output at runtime. It adds a probably small
+         overhead (~ ??%) to all vserver related functions and
+         increases the kernel size by about 20k.
+
+config VSERVER_HISTORY
+       bool    "Compile History Tracing"
+       depends on VSERVER_DEBUG
+       default n
+       help
+         Set this to yes if you want to record the history of
+         linux-vserver activities, so they can be replayed on
+         a kernel panic (oops)
+
+config VSERVER_HISTORY_SIZE
+       int "Per CPU History Size (32-65536)"
+       depends on VSERVER_HISTORY
+       range 32 65536
+       default 64
+       help
+         This allows you to specify the number of entries in
+         the per CPU history buffer.
+
 endmenu
 
index c035a77..1cee3de 100644 (file)
@@ -6,7 +6,9 @@
 obj-y          += vserver.o
 
 vserver-y      := switch.o context.o namespace.o sched.o network.o inode.o \
-                  limit.o cvirt.o signal.o proc.o sysctl.o init.o
+                  limit.o cvirt.o signal.o proc.o helper.o init.o dlimit.o
 
+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o
 vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
 
index d56d362..57481cb 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Virtual Server: Context Support
  *
- *  Copyright (C) 2003-2004  Herbert Pötzl
+ *  Copyright (C) 2003-2005  Herbert Pötzl
  *
  *  V0.01  context helper
  *  V0.02  vx_ctx_kill syscall command
  *  V0.06  task_xid and info commands
  *  V0.07  context flags and caps
  *  V0.08  switch to RCU based hash
+ *  V0.09  revert to non RCU for now
+ *  V0.10  and back to working RCU hash
  *
  */
 
 #include <linux/config.h>
 #include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vserver/legacy.h>
-#include <linux/vs_base.h>
-#include <linux/vs_context.h>
-#include <linux/kernel_stat.h>
+#include <linux/types.h>
 #include <linux/namespace.h>
-#include <linux/rcupdate.h>
 
-#define CKRM_VSERVER_INTEGRATION
-#ifdef CKRM_VSERVER_INTEGRATION
-#include <linux/ckrm.h>
-#endif //CKRM_VSERVER_INTEGRATION
+#include <linux/sched.h>
+#include <linux/vserver/network.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/debug.h>
+#include <linux/vs_context.h>
+#include <linux/vserver/context_cmd.h>
+#include <linux/ckrm.h> /* needed for ckrm_cb_xid() */
 
 #include <asm/errno.h>
 
+#include "cvirt_init.h"
+#include "limit_init.h"
+#include "sched_init.h"
+
 
 /*     __alloc_vx_info()
 
@@ -74,6 +79,7 @@ static struct vx_info *__alloc_vx_info(xid_t xid)
 
        vxdprintk(VXD_CBIT(xid, 0),
                "alloc_vx_info(%d) = %p", xid, new);
+       vxh_alloc_vx_info(new);
        return new;
 }
 
@@ -85,6 +91,7 @@ static void __dealloc_vx_info(struct vx_info *vxi)
 {
        vxdprintk(VXD_CBIT(xid, 0),
                "dealloc_vx_info(%p)", vxi);
+       vxh_dealloc_vx_info(vxi);
 
        vxi->vx_hlist.next = LIST_POISON1;
        vxi->vx_id = -1;
@@ -122,40 +129,47 @@ static inline int __free_vx_info(struct vx_info *vxi)
        return usecnt;
 }
 
-#if 0
-
-static void __rcu_free_vx_info(struct rcu_head *head)
+static void __rcu_put_vx_info(struct rcu_head *head)
 {
        struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
 
-       BUG_ON(!head);
        vxdprintk(VXD_CBIT(xid, 3),
-               "rcu_free_vx_info(%p): uc=%d", vxi,
-               atomic_read(&vxi->vx_usecnt));
-
-       __free_vx_info(vxi);
+               "__rcu_put_vx_info(%p[#%d]): %d,%d",
+               vxi, vxi->vx_id,
+               atomic_read(&vxi->vx_usecnt),
+               atomic_read(&vxi->vx_refcnt));
+       put_vx_info(vxi);
 }
 
-#endif
-
-void free_vx_info(struct vx_info *vxi)
+void __shutdown_vx_info(struct vx_info *vxi)
 {
        struct namespace *namespace;
        struct fs_struct *fs;
 
-       /* context shutdown is mandatory */
-       // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
+       might_sleep();
 
        namespace = xchg(&vxi->vx_namespace, NULL);
-       fs = xchg(&vxi->vx_fs, NULL);
-
        if (namespace)
                put_namespace(namespace);
+
+       fs = xchg(&vxi->vx_fs, NULL);
        if (fs)
                put_fs_struct(fs);
+}
+
+/* exported stuff */
+
+void free_vx_info(struct vx_info *vxi)
+{
+       /* context shutdown is mandatory */
+       // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
+
+       BUG_ON(vxi->vx_state & VXS_HASHED);
+
+       BUG_ON(vxi->vx_namespace);
+       BUG_ON(vxi->vx_fs);
 
        BUG_ON(__free_vx_info(vxi));
-       // call_rcu(&i->vx_rcu, __rcu_free_vx_info);
 }
 
 
@@ -186,6 +200,8 @@ static inline void __hash_vx_info(struct vx_info *vxi)
 
        vxdprintk(VXD_CBIT(xid, 4),
                "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+       vxh_hash_vx_info(vxi);
+
        get_vx_info(vxi);
        vxi->vx_state |= VXS_HASHED;
        head = &vx_info_hash[__hashval(vxi->vx_id)];
@@ -201,9 +217,12 @@ static inline void __unhash_vx_info(struct vx_info *vxi)
 {
        vxdprintk(VXD_CBIT(xid, 4),
                "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+       vxh_unhash_vx_info(vxi);
+
        vxi->vx_state &= ~VXS_HASHED;
        hlist_del_rcu(&vxi->vx_hlist);
-       put_vx_info(vxi);
+
+       call_rcu(&vxi->vx_rcu, __rcu_put_vx_info);
 }
 
 
@@ -216,22 +235,29 @@ static inline struct vx_info *__lookup_vx_info(xid_t xid)
 {
        struct hlist_head *head = &vx_info_hash[__hashval(xid)];
        struct hlist_node *pos;
+       struct vx_info *vxi;
 
        hlist_for_each_rcu(pos, head) {
-               struct vx_info *vxi =
-                       hlist_entry(pos, struct vx_info, vx_hlist);
+               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
 
                if ((vxi->vx_id == xid) &&
                        vx_info_state(vxi, VXS_HASHED))
-                       return vxi;
+                       goto found;
        }
-       return NULL;
+       vxi = NULL;
+found:
+       vxdprintk(VXD_CBIT(xid, 0),
+               "__lookup_vx_info(#%u): %p[#%u]",
+               xid, vxi, vxi?vxi->vx_id:0);
+       vxh_lookup_vx_info(xid, vxi);
+       return vxi;
 }
 
 
 /*     __vx_dynamic_id()
 
        * find unused dynamic xid
+       * requires the rcu_read_lock()
        * requires the hash_lock to be held                     */
 
 static inline xid_t __vx_dynamic_id(void)
@@ -267,6 +293,9 @@ static struct vx_info * __loc_vx_info(int id, int *err)
                return NULL;
        }
 
+       /* FIXME is this required at all ? */
+       rcu_read_lock();
+       /* required to make dynamic xids unique */
        spin_lock(&vx_info_hash_lock);
 
        /* dynamic context requested */
@@ -304,6 +333,8 @@ static struct vx_info * __loc_vx_info(int id, int *err)
 
 out_unlock:
        spin_unlock(&vx_info_hash_lock);
+       rcu_read_unlock();
+       vxh_loc_vx_info(id, vxi);
        if (new)
                __dealloc_vx_info(new);
        return vxi;
@@ -316,6 +347,7 @@ out_unlock:
 
 void unhash_vx_info(struct vx_info *vxi)
 {
+       __shutdown_vx_info(vxi);
        spin_lock(&vx_info_hash_lock);
        __unhash_vx_info(vxi);
        spin_unlock(&vx_info_hash_lock);
@@ -534,12 +566,7 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
 out:
 
 
-#ifdef CKRM_VSERVER_INTEGRATION
-       do {
-         ckrm_cb_xid(p);
-       } while (0);
-#endif //CKRM_VSERVER_INTEGRATION
-
+       ckrm_cb_xid(p);
 
        put_vx_info(old_vxi);
        return ret;
@@ -584,7 +611,7 @@ int vc_task_xid(uint32_t id, void __user *data)
                read_unlock(&tasklist_lock);
        }
        else
-               xid = current->xid;
+               xid = vx_current_xid();
        return xid;
 }
 
@@ -768,8 +795,6 @@ int vc_set_ccaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-// EXPORT_SYMBOL_GPL(rcu_free_vx_info);
 EXPORT_SYMBOL_GPL(free_vx_info);
-EXPORT_SYMBOL_GPL(vx_info_hash_lock);
 EXPORT_SYMBOL_GPL(unhash_vx_info);
 
index 2b5c81e..1cb3eda 100644 (file)
  */
 
 #include <linux/config.h>
-#include <linux/vserver/cvirt.h>
-#include <linux/vserver/context.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/vs_context.h>
+#include <linux/vs_cvirt.h>
 #include <linux/vserver/switch.h>
-#include <linux/vinline.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -24,8 +25,8 @@ void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
        struct vx_info *vxi = current->vx_info;
 
        set_normalized_timespec(uptime,
-               uptime->tv_sec - vxi->cvirt.bias_tp.tv_sec,
-               uptime->tv_nsec - vxi->cvirt.bias_tp.tv_nsec);
+               uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
+               uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
        if (!idle)
                return;
        set_normalized_timespec(idle,
@@ -34,8 +35,63 @@ void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
        return;
 }
 
-uint64_t vx_idle_jiffies()
+uint64_t vx_idle_jiffies(void)
 {
        return init_task.utime + init_task.stime;
 }
 
+
+
+static inline uint32_t __update_loadavg(uint32_t load,
+       int wsize, int delta, int n)
+{
+       unsigned long long calc, prev;
+
+       /* just set it to n */
+       if (unlikely(delta >= wsize))
+               return (n << FSHIFT);
+
+       calc = delta * n;
+       calc <<= FSHIFT;
+       prev = (wsize - delta);
+       prev *= load;
+       calc += prev;
+       do_div(calc, wsize);
+       return calc;
+}
+
+
+void vx_update_load(struct vx_info *vxi)
+{
+       uint32_t now, last, delta;
+       unsigned int nr_running, nr_uninterruptible;
+       unsigned int total;
+
+       spin_lock(&vxi->cvirt.load_lock);
+
+       now = jiffies;
+       last = vxi->cvirt.load_last;
+       delta = now - last;
+
+       if (delta < 5*HZ)
+               goto out;
+
+       nr_running = atomic_read(&vxi->cvirt.nr_running);
+       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
+       total = nr_running + nr_uninterruptible;
+
+       vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
+               60*HZ, delta, total);
+       vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
+               5*60*HZ, delta, total);
+       vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
+               15*60*HZ, delta, total);
+
+       vxi->cvirt.load_last = now;
+out:
+       atomic_inc(&vxi->cvirt.load_updates);
+       spin_unlock(&vxi->cvirt.load_lock);
+}
+
+
+
diff --git a/kernel/vserver/cvirt_init.h b/kernel/vserver/cvirt_init.h
new file mode 100644 (file)
index 0000000..ecc34e1
--- /dev/null
@@ -0,0 +1,66 @@
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+       uint64_t idle_jiffies = vx_idle_jiffies();
+       uint64_t nsuptime;
+
+       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
+       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
+               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
+       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
+
+       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+       atomic_set(&cvirt->nr_threads, 0);
+       atomic_set(&cvirt->nr_running, 0);
+       atomic_set(&cvirt->nr_uninterruptible, 0);
+       atomic_set(&cvirt->nr_onhold, 0);
+
+       down_read(&uts_sem);
+       cvirt->utsname = system_utsname;
+       up_read(&uts_sem);
+
+       spin_lock_init(&cvirt->load_lock);
+       cvirt->load_last = jiffies;
+       atomic_set(&cvirt->load_updates, 0);
+       cvirt->load[0] = 0;
+       cvirt->load[1] = 0;
+       cvirt->load[2] = 0;
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+       int value;
+
+       vxwprintk((value = atomic_read(&cvirt->nr_threads)),
+               "!!! cvirt: %p[nr_threads] = %d on exit.",
+               cvirt, value);
+       vxwprintk((value = atomic_read(&cvirt->nr_running)),
+               "!!! cvirt: %p[nr_running] = %d on exit.",
+               cvirt, value);
+       vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
+               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
+               cvirt, value);
+#endif
+       return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+       int i,j;
+
+       for (i=0; i<5; i++) {
+               for (j=0; j<3; j++) {
+                       atomic_set(&cacct->sock[i][j].count, 0);
+                       atomic_set(&cacct->sock[i][j].total, 0);
+               }
+       }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+       return;
+}
+
diff --git a/kernel/vserver/cvirt_proc.h b/kernel/vserver/cvirt_proc.h
new file mode 100644 (file)
index 0000000..ac67f98
--- /dev/null
@@ -0,0 +1,90 @@
+#ifndef _VX_CVIRT_PROC_H
+#define _VX_CVIRT_PROC_H
+
+#include <linux/sched.h>
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+       int length = 0;
+       int a, b, c;
+
+       length += sprintf(buffer + length,
+               "BiasUptime:\t%lu.%02lu\n",
+                       (unsigned long)cvirt->bias_uptime.tv_sec,
+                       (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
+       length += sprintf(buffer + length,
+               "SysName:\t%.*s\n"
+               "NodeName:\t%.*s\n"
+               "Release:\t%.*s\n"
+               "Version:\t%.*s\n"
+               "Machine:\t%.*s\n"
+               "DomainName:\t%.*s\n"
+               ,__NEW_UTS_LEN, cvirt->utsname.sysname
+               ,__NEW_UTS_LEN, cvirt->utsname.nodename
+               ,__NEW_UTS_LEN, cvirt->utsname.release
+               ,__NEW_UTS_LEN, cvirt->utsname.version
+               ,__NEW_UTS_LEN, cvirt->utsname.machine
+               ,__NEW_UTS_LEN, cvirt->utsname.domainname
+               );
+
+       a = cvirt->load[0] + (FIXED_1/200);
+       b = cvirt->load[1] + (FIXED_1/200);
+       c = cvirt->load[2] + (FIXED_1/200);
+       length += sprintf(buffer + length,
+               "nr_threads:\t%d\n"
+               "nr_running:\t%d\n"
+               "nr_unintr:\t%d\n"
+               "nr_onhold:\t%d\n"
+               "load_updates:\t%d\n"
+               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
+               ,atomic_read(&cvirt->nr_threads)
+               ,atomic_read(&cvirt->nr_running)
+               ,atomic_read(&cvirt->nr_uninterruptible)
+               ,atomic_read(&cvirt->nr_onhold)
+               ,atomic_read(&cvirt->load_updates)
+               ,LOAD_INT(a), LOAD_FRAC(a)
+               ,LOAD_INT(b), LOAD_FRAC(b)
+               ,LOAD_INT(c), LOAD_FRAC(c)
+               );
+       return length;
+}
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].total);
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+       int i,j, length = 0;
+       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+       for (i=0; i<5; i++) {
+               length += sprintf(buffer + length,
+                       "%s:", type[i]);
+               for (j=0; j<3; j++) {
+                       length += sprintf(buffer + length,
+                               "\t%12lu/%-12lu"
+                               ,vx_sock_count(cacct, i, j)
+                               ,vx_sock_total(cacct, i, j)
+                               );
+               }
+               buffer[length++] = '\n';
+       }
+       length += sprintf(buffer + length,
+               "forks:\t%lu\n", cacct->total_forks);
+       return length;
+}
+
+#endif /* _VX_CVIRT_PROC_H */
index 11da06d..6b14494 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/vserver/switch.h>
-#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_dlimit.h>
 
@@ -389,7 +388,7 @@ void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
        __u64 blimit, bfree, bavail;
        __u32 ifree;
 
-       dli = locate_dl_info(sb, current->xid);
+       dli = locate_dl_info(sb, vx_current_xid());
        if (!dli)
                return;
 
index 880b843..ce8f971 100644 (file)
@@ -13,8 +13,7 @@
 #include <linux/errno.h>
 #include <linux/reboot.h>
 #include <linux/kmod.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
+#include <linux/sched.h>
 #include <linux/vs_context.h>
 
 #include <asm/uaccess.h>
@@ -57,19 +56,19 @@ long vs_reboot(unsigned int cmd, void * arg)
        switch (cmd) {
        case LINUX_REBOOT_CMD_RESTART:
                argv[1] = "restart";
-               break;  
+               break;
 
        case LINUX_REBOOT_CMD_HALT:
                argv[1] = "halt";
-               break;  
+               break;
 
        case LINUX_REBOOT_CMD_POWER_OFF:
                argv[1] = "poweroff";
-               break;  
+               break;
 
        case LINUX_REBOOT_CMD_SW_SUSPEND:
                argv[1] = "swsusp";
-               break;  
+               break;
 
        case LINUX_REBOOT_CMD_RESTART2:
                if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
@@ -77,7 +76,7 @@ long vs_reboot(unsigned int cmd, void * arg)
                argv[3] = buffer;
        default:
                argv[1] = "restart2";
-               break;  
+               break;
        }
 
        /* maybe we should wait ? */
@@ -90,3 +89,34 @@ long vs_reboot(unsigned int cmd, void * arg)
        return 0;
 }
 
+long vs_context_state(unsigned int cmd)
+{
+       char id_buf[8], cmd_buf[32];
+
+       char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+       char *envp[] = {"HOME=/", "TERM=linux",
+                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
+
+       snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+
+       switch (cmd) {
+       case VS_CONTEXT_CREATED:
+               argv[1] = "startup";
+               break;
+       case VS_CONTEXT_DESTROY:
+               argv[1] = "shutdown";
+               break;
+       default:
+               return 0;
+       }
+
+       if (call_usermodehelper(*argv, argv, envp, 1)) {
+               printk( KERN_WARNING
+                       "vs_context_state(): failed to exec (%s %s %s %s)\n",
+                       vshelper_path, argv[1], argv[2], argv[3]);
+               return 0;
+       }
+       return 0;
+}
+
index 8afd1fc..8c44b33 100644 (file)
@@ -11,8 +11,6 @@
 
 #include <linux/config.h>
 #include <linux/errno.h>
-#include <linux/vserver.h>
-// #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
@@ -24,7 +22,9 @@ static int __init init_vserver(void)
 {
        int ret = 0;
 
+#ifdef CONFIG_VSERVER_DEBUG
        vserver_register_sysctl();
+#endif
        return ret;
 }
 
@@ -32,7 +32,9 @@ static int __init init_vserver(void)
 static void __exit exit_vserver(void)
 {
 
+#ifdef CONFIG_VSERVER_DEBUG
        vserver_unregister_sysctl();
+#endif
        return;
 }
 
index 60e6fe1..8fdd30c 100644 (file)
  */
 
 #include <linux/config.h>
-#include <linux/vs_base.h>
+#include <linux/sched.h>
 #include <linux/vs_context.h>
-#include <linux/fs.h>
 #include <linux/proc_fs.h>
+#include <linux/devpts_fs.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
 #include <linux/vserver/inode.h>
+#include <linux/vserver/xid.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -23,6 +26,8 @@
 
 static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask)
 {
+       struct proc_dir_entry *entry;
+
        if (!in || !in->i_sb)
                return -ESRCH;
 
@@ -40,8 +45,9 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
                *mask |= IATTR_XID;
        }
 
-       if (in->i_sb->s_magic == PROC_SUPER_MAGIC) {
-               struct proc_dir_entry *entry = PROC_I(in)->pde;
+       switch (in->i_sb->s_magic) {
+       case PROC_SUPER_MAGIC:
+               entry = PROC_I(in)->pde;
 
                // check for specific inodes ?
                if (entry)
@@ -50,6 +56,15 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
                        *flags |= (entry->vx_flags & IATTR_FLAGS);
                else
                        *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
+               break;
+
+       case DEVPTS_SUPER_MAGIC:
+               *xid = in->i_xid;
+               *mask |= IATTR_XID;
+               break;
+
+       default:
+               break;
        }
        return 0;
 }
@@ -57,7 +72,7 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
 int vc_get_iattr(uint32_t id, void __user *data)
 {
        struct nameidata nd;
-       struct vcmd_ctx_iattr_v1 vc_data;
+       struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 };
        int ret;
 
        if (!vx_check(0, VX_ADMIN))
@@ -80,7 +95,7 @@ int vc_get_iattr(uint32_t id, void __user *data)
 static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask)
 {
        struct inode *in = de->d_inode;
-       int error = 0, is_proc = 0;
+       int error = 0, is_proc = 0, has_xid = 0;
 
        if (!in || !in->i_sb)
                return -ESRCH;
@@ -88,7 +103,10 @@ static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uin
        is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
        if ((*mask & IATTR_FLAGS) && !is_proc)
                return -EINVAL;
-       if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID))
+
+       has_xid = (in->i_sb->s_flags & MS_TAGXID) ||
+               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
+       if ((*mask & IATTR_XID) && !has_xid)
                return -EINVAL;
 
        down(&in->i_sem);
@@ -170,40 +188,8 @@ int vc_set_iattr(uint32_t id, void __user *data)
        return ret;
 }
 
-int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg)
-{
-       void __user *data = (void __user *)arg;
-       struct vcmd_ctx_iattr_v1 vc_data;
-       int ret;
-
-       /*
-        * I don't think we need any dget/dput pairs in here as long as
-        * this function is always called from sys_ioctl i.e., de is
-         * a field of a struct file that is guaranteed not to be freed.
-        */
-       if (cmd == FIOC_SETIATTR) {
-               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
-                       return -EPERM;
-               if (copy_from_user (&vc_data, data, sizeof(vc_data)))
-                       return -EFAULT;
-               ret = __vc_set_iattr(de,
-                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
-       }
-       else {
-               if (!vx_check(0, VX_ADMIN))
-                       return -ENOSYS;
-               ret = __vc_get_iattr(de->d_inode,
-                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
-       }
-
-       if (!ret && copy_to_user (data, &vc_data, sizeof(vc_data)))
-               ret = -EFAULT;
-       return ret;
-}
-
 
 #ifdef CONFIG_VSERVER_LEGACY
-#include <linux/proc_fs.h>
 
 #define PROC_DYNAMIC_FIRST 0xF0000000UL
 
@@ -252,3 +238,69 @@ int vx_proc_ioctl(struct inode * inode, struct file * filp,
 }
 #endif
 
+
+int vx_parse_xid(char *string, xid_t *xid, int remove)
+{
+       static match_table_t tokens = {
+               {1, "xid=%u"},
+               {0, NULL}
+       };
+       substring_t args[MAX_OPT_ARGS];
+       int token, option = 0;
+
+       if (!string)
+               return 0;
+
+       token = match_token(string, tokens, args);
+       if (token && xid && !match_int(args, &option))
+               *xid = option;
+
+       vxdprintk(VXD_CBIT(xid, 7),
+               "vx_parse_xid(»%s«): %d:#%d",
+               string, token, option);
+
+       if (token && remove) {
+               char *p = strstr(string, "xid=");
+               char *q = p;
+
+               if (p) {
+                       while (*q != '\0' && *q != ',')
+                               q++;
+                       while (*q)
+                               *p++ = *q++;
+                       while (*p)
+                               *p++ = '\0';
+               }
+       }
+       return token;
+}
+
+void vx_propagate_xid(struct nameidata *nd, struct inode *inode)
+{
+       xid_t new_xid = 0;
+       struct vfsmount *mnt;
+       int propagate;
+
+       if (!nd)
+               return;
+       mnt = nd->mnt;
+       if (!mnt)
+               return;
+
+       propagate = (mnt->mnt_flags & MNT_XID);
+       if (propagate)
+               new_xid = mnt->mnt_xid;
+
+       vxdprintk(VXD_CBIT(xid, 7),
+               "vx_propagate_xid(%p[#%lu.%d]): %d,%d",
+               inode, inode->i_ino, inode->i_xid,
+               new_xid, (propagate)?1:0);
+
+       if (propagate)
+               inode->i_xid = new_xid;
+}
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(vx_propagate_xid);
+
index e760653..fe4c66d 100644 (file)
 
 #include <linux/config.h>
 #include <linux/sched.h>
-#include <linux/namespace.h>
-#include <linux/vserver/legacy.h>
-#include <linux/vserver/namespace.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/namespace.h>
+#include <linux/namespace.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -61,8 +59,9 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
                return ret;
        }
 
-       if (!vx_check(0, VX_ADMIN) ||
-               !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_PRIVATE, 0))
+       if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN)
+               /* might make sense in the future, or not ... */
+               || vx_flags(VX_INFO_LOCK, 0))
                return -EPERM;
 
        /* ugly hack for Spectator */
@@ -82,6 +81,12 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
 
        if (!new_vxi)
                return -EINVAL;
+
+       ret = -EPERM;
+       if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) &&
+               vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0))
+               goto out_put;
+
        new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT);
 
        ret = vx_migrate_task(current, new_vxi);
@@ -99,6 +104,7 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
                                current->signal->rlim[RLIMIT_NPROC].rlim_max;
                ret = new_vxi->vx_id;
        }
+out_put:
        put_vx_info(new_vxi);
        return ret;
 }
index 5bd2fdc..a1497be 100644 (file)
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
 #include <linux/vserver/limit.h>
-#include <linux/vserver/context.h>
 #include <linux/vserver/switch.h>
-#include <linux/vinline.h>
+#include <linux/vserver/limit_cmd.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
 
+const char *vlimit_name[NUM_LIMITS] = {
+       [RLIMIT_CPU]            = "CPU",
+       [RLIMIT_RSS]            = "RSS",
+       [RLIMIT_NPROC]          = "NPROC",
+       [RLIMIT_NOFILE] = "NOFILE",
+       [RLIMIT_MEMLOCK]        = "VML",
+       [RLIMIT_AS]             = "VM",
+       [RLIMIT_LOCKS]          = "LOCKS",
+       [RLIMIT_MSGQUEUE]       = "MSGQ",
+       [VLIMIT_NSOCK]          = "NSOCK",
+};
+
+EXPORT_SYMBOL_GPL(vlimit_name);
+
+
 static int is_valid_rlimit(int id)
 {
        int valid = 0;
@@ -42,7 +59,7 @@ static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id)
        limit = vxi->limit.rlim[id];
        if (limit == RLIM_INFINITY)
                return CRLIM_INFINITY;
-       return limit;   
+       return limit;
 }
 
 int vc_get_rlimit(uint32_t id, void __user *data)
@@ -54,8 +71,8 @@ int vc_get_rlimit(uint32_t id, void __user *data)
                return -EFAULT;
        if (!is_valid_rlimit(vc_data.id))
                return -ENOTSUPP;
-               
-       vxi = find_vx_info(id);
+
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -81,13 +98,12 @@ int vc_set_rlimit(uint32_t id, void __user *data)
        if (!is_valid_rlimit(vc_data.id))
                return -ENOTSUPP;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
        if (vc_data.maximum != CRLIM_KEEP)
                vxi->limit.rlim[vc_data.id] = vc_data.maximum;
-       printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum);
        put_vx_info(vxi);
 
        return 0;
@@ -111,7 +127,7 @@ int vc_get_rlimit_mask(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
                return -EPERM;
        if (copy_to_user(data, &mask, sizeof(mask)))
-                return -EFAULT;
+               return -EFAULT;
        return 0;
 }
 
@@ -124,25 +140,25 @@ void vx_vsi_meminfo(struct sysinfo *val)
        v = vxi->limit.rlim[RLIMIT_RSS];
        if (v != RLIM_INFINITY)
                val->totalram = min(val->totalram, v);
-       v = atomic_read(&vxi->limit.res[RLIMIT_RSS]);
+       v = atomic_read(&vxi->limit.rcur[RLIMIT_RSS]);
        val->freeram = (v < val->totalram) ? val->totalram - v : 0;
        val->bufferram = 0;
-        val->totalhigh = 0;
-        val->freehigh = 0;
+       val->totalhigh = 0;
+       val->freehigh = 0;
        return;
 }
 
 void vx_vsi_swapinfo(struct sysinfo *val)
 {
        struct vx_info *vxi = current->vx_info;
-       unsigned long w,v;
+       unsigned long v, w;
 
        v = vxi->limit.rlim[RLIMIT_RSS];
        w = vxi->limit.rlim[RLIMIT_AS];
        if (w != RLIM_INFINITY)
                val->totalswap = min(val->totalswap, w -
                ((v != RLIM_INFINITY) ? v : 0));
-       w = atomic_read(&vxi->limit.res[RLIMIT_AS]);
+       w = atomic_read(&vxi->limit.rcur[RLIMIT_AS]);
        val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0;
        return;
 }
diff --git a/kernel/vserver/limit_init.h b/kernel/vserver/limit_init.h
new file mode 100644 (file)
index 0000000..0a9dcf4
--- /dev/null
@@ -0,0 +1,28 @@
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+       int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               limit->rlim[lim] = RLIM_INFINITY;
+               limit->rmax[lim] = 0;
+               atomic_set(&limit->rcur[lim], 0);
+               atomic_set(&limit->lhit[lim], 0);
+       }
+}
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+       unsigned long value;
+       unsigned int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               value = atomic_read(&limit->rcur[lim]);
+               vxwprintk(value,
+                       "!!! limit: %p[%s,%d] = %ld on exit.",
+                       limit, vlimit_name[lim], lim, value);
+       }
+#endif
+}
+
diff --git a/kernel/vserver/limit_proc.h b/kernel/vserver/limit_proc.h
new file mode 100644 (file)
index 0000000..97696e9
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef _VX_LIMIT_PROC_H
+#define _VX_LIMIT_PROC_H
+
+
+static inline void vx_limit_fixup(struct _vx_limit *limit)
+{
+       unsigned long value;
+       unsigned int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               value = atomic_read(&limit->rcur[lim]);
+               if (value > limit->rmax[lim])
+                       limit->rmax[lim] = value;
+               if (limit->rmax[lim] > limit->rlim[lim])
+                       limit->rmax[lim] = limit->rlim[lim];
+       }
+}
+
+#define VX_LIMIT_FMT   ":\t%10d\t%10ld\t%10ld\t%6d\n"
+
+#define VX_LIMIT_ARG(r)                                \
+               ,atomic_read(&limit->rcur[r])   \
+               ,limit->rmax[r]                 \
+               ,limit->rlim[r]                 \
+               ,atomic_read(&limit->lhit[r])
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+       vx_limit_fixup(limit);
+       return sprintf(buffer,
+               "PROC"  VX_LIMIT_FMT
+               "VM"    VX_LIMIT_FMT
+               "VML"   VX_LIMIT_FMT
+               "RSS"   VX_LIMIT_FMT
+               "FILES" VX_LIMIT_FMT
+               "SOCK"  VX_LIMIT_FMT
+               VX_LIMIT_ARG(RLIMIT_NPROC)
+               VX_LIMIT_ARG(RLIMIT_AS)
+               VX_LIMIT_ARG(RLIMIT_MEMLOCK)
+               VX_LIMIT_ARG(RLIMIT_RSS)
+               VX_LIMIT_ARG(RLIMIT_NOFILE)
+               VX_LIMIT_ARG(VLIMIT_NSOCK)
+               );
+}
+
+#endif /* _VX_LIMIT_PROC_H */
+
+
index 2c76c6f..6685161 100644 (file)
 
 #include <linux/config.h>
 #include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/vs_context.h>
 #include <linux/vserver/namespace.h>
-#include <linux/vinline.h>
-#include <linux/namespace.h>
 #include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
 
+int vx_check_vfsmount(struct vx_info *vxi, struct vfsmount *mnt)
+{
+       struct vfsmount *root_mnt, *altroot_mnt;
+       struct dentry *root, *altroot, *point;
+       int r1, r2, s1, s2, ret = 0;
+
+       if (!vxi || !mnt)
+               return 1;
+
+       spin_lock(&dcache_lock);
+       altroot_mnt = current->fs->rootmnt;
+       altroot = current->fs->root;
+       point = altroot;
+
+       if (vxi->vx_fs) {
+               root_mnt = vxi->vx_fs->rootmnt;
+               root = vxi->vx_fs->root;
+       } else {
+               root_mnt = altroot_mnt;
+               root = altroot;
+       }
+       /* printk("··· %p:%p/%p:%p ",
+               root_mnt, root, altroot_mnt, altroot);  */
+
+       while ((mnt != mnt->mnt_parent) &&
+               (mnt != root_mnt) && (mnt != altroot_mnt)) {
+               point = mnt->mnt_mountpoint;
+               mnt = mnt->mnt_parent;
+       }
+
+       r1 = (mnt == root_mnt);
+       s1 = is_subdir(point, root);
+       r2 = (mnt == altroot_mnt);
+       s2 = is_subdir(point, altroot);
+
+       ret = (((mnt == root_mnt) && is_subdir(point, root)) ||
+               ((mnt == altroot_mnt) && is_subdir(point, altroot)));
+       /* printk("··· for %p:%p -> %d:%d/%d:%d = %d\n",
+               mnt, point, r1, s1, r2, s2, ret);       */
+       spin_unlock(&dcache_lock);
+
+       return (r2 && s2);
+}
+
+
 /* virtual host info names */
 
 static char * vx_vhi_name(struct vx_info *vxi, int id)
@@ -56,11 +103,11 @@ int vc_set_vhi_name(uint32_t id, void __user *data)
                return -EPERM;
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
-       
-       vxi = find_vx_info(id);
+
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
-       
+
        name = vx_vhi_name(vxi, vc_data.field);
        if (name)
                memcpy(name, vc_data.name, 65);
@@ -77,14 +124,14 @@ int vc_get_vhi_name(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
        name = vx_vhi_name(vxi, vc_data.field);
        if (!name)
                goto out_put;
-                       
+
        memcpy(vc_data.name, name, 65);
        if (copy_to_user (data, &vc_data, sizeof(vc_data)))
                return -EFAULT;
@@ -126,7 +173,7 @@ int vc_enter_namespace(uint32_t id, void *data)
        if (!vx_check(0, VX_ADMIN))
                return -ENOSYS;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -144,7 +191,7 @@ int vc_enter_namespace(uint32_t id, void *data)
        old_ns = current->namespace;
        old_fs = current->fs;
        get_namespace(vxi->vx_namespace);
-       current->namespace = vxi->vx_namespace; 
+       current->namespace = vxi->vx_namespace;
        current->fs = fs;
        task_unlock(current);
 
@@ -158,11 +205,9 @@ out_put:
 int vc_cleanup_namespace(uint32_t id, void *data)
 {
        down_write(&current->namespace->sem);
-       // spin_lock(&dcache_lock);
        spin_lock(&vfsmount_lock);
        umount_unused(current->namespace->root, current->fs);
        spin_unlock(&vfsmount_lock);
-       // spin_unlock(&dcache_lock);
        up_write(&current->namespace->sem);
        return 0;
 }
index e87c8b6..f1a110b 100644 (file)
@@ -14,8 +14,7 @@
 
 #include <linux/config.h>
 #include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
+#include <linux/vserver/network_cmd.h>
 #include <linux/rcupdate.h>
 #include <net/tcp.h>
 
@@ -70,6 +69,35 @@ static void __dealloc_nx_info(struct nx_info *nxi)
        kfree(nxi);
 }
 
+static inline int __free_nx_info(struct nx_info *nxi)
+{
+       int usecnt, refcnt;
+
+       BUG_ON(!nxi);
+
+       usecnt = atomic_read(&nxi->nx_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&nxi->nx_refcnt);
+       BUG_ON(refcnt < 0);
+
+       if (!usecnt)
+               __dealloc_nx_info(nxi);
+       return usecnt;
+}
+
+static void __rcu_put_nx_info(struct rcu_head *head)
+{
+       struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
+
+       vxdprintk(VXD_CBIT(nid, 3),
+               "__rcu_put_nx_info(%p[#%d]): %d,%d",
+               nxi, nxi->nx_id,
+               atomic_read(&nxi->nx_usecnt),
+               atomic_read(&nxi->nx_refcnt));
+       put_nx_info(nxi);
+}
+
 
 /*     hash table for nx_info hash */
 
@@ -113,7 +141,7 @@ static inline void __unhash_nx_info(struct nx_info *nxi)
        vxdprintk(VXD_CBIT(nid, 4),
                "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
        hlist_del_rcu(&nxi->nx_hlist);
-       put_nx_info(nxi);
+       call_rcu(&nxi->nx_rcu, __rcu_put_nx_info);
 }
 
 
@@ -142,6 +170,7 @@ static inline struct nx_info *__lookup_nx_info(nid_t nid)
 /*     __nx_dynamic_id()
 
        * find unused dynamic nid
+       * requires the rcu_read_lock()
        * requires the hash_lock to be held                     */
 
 static inline nid_t __nx_dynamic_id(void)
@@ -177,6 +206,9 @@ static struct nx_info * __loc_nx_info(int id, int *err)
                return NULL;
        }
 
+       /* FIXME is this required at all ? */
+       rcu_read_lock();
+       /* required to make dynamic xids unique */
        spin_lock(&nx_info_hash_lock);
 
        /* dynamic context requested */
@@ -214,6 +246,7 @@ static struct nx_info * __loc_nx_info(int id, int *err)
 
 out_unlock:
        spin_unlock(&nx_info_hash_lock);
+       rcu_read_unlock();
        if (new)
                __dealloc_nx_info(new);
        return nxi;
@@ -223,28 +256,9 @@ out_unlock:
 
 /*     exported stuff                                          */
 
-
-
-
-void rcu_free_nx_info(struct rcu_head *head)
+void free_nx_info(struct nx_info *nxi)
 {
-       struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
-       int usecnt, refcnt;
-
-       BUG_ON(!nxi || !head);
-
-       usecnt = atomic_read(&nxi->nx_usecnt);
-       BUG_ON(usecnt < 0);
-
-       refcnt = atomic_read(&nxi->nx_refcnt);
-       BUG_ON(refcnt < 0);
-
-       vxdprintk(VXD_CBIT(nid, 3),
-               "rcu_free_nx_info(%p): uc=%d", nxi, usecnt);
-       if (!usecnt)
-               __dealloc_nx_info(nxi);
-       else
-               printk("!!! rcu didn't free\n");
+       BUG_ON(__free_nx_info(nxi));
 }
 
 void unhash_nx_info(struct nx_info *nxi)
@@ -696,7 +710,6 @@ int vc_set_ncaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-EXPORT_SYMBOL_GPL(rcu_free_nx_info);
-EXPORT_SYMBOL_GPL(nx_info_hash_lock);
+EXPORT_SYMBOL_GPL(free_nx_info);
 EXPORT_SYMBOL_GPL(unhash_nx_info);
 
index 42bc182..823226b 100644 (file)
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/proc_fs.h>
-#include <linux/vserver.h>
+#include <linux/sched.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
+#include <linux/vs_cvirt.h>
+
+#include <linux/vserver/switch.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
+#include "cvirt_proc.h"
+#include "limit_proc.h"
+#include "sched_proc.h"
 
 static struct proc_dir_entry *proc_virtual;
 
@@ -43,7 +51,7 @@ enum vid_directory_inos {
        PROC_NID_STATUS,
 };
 
-#define        PROC_VID_MASK   0x60
+#define PROC_VID_MASK  0x60
 
 
 /* first the actual feeds */
@@ -66,7 +74,7 @@ int proc_xid_info (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = sprintf(buffer,
@@ -86,19 +94,21 @@ int proc_xid_status (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = sprintf(buffer,
-               "RefC:\t%d\n"           
+               "UseCnt:\t%d\n"
+               "RefCnt:\t%d\n"
                "Flags:\t%016llx\n"
                "BCaps:\t%016llx\n"
                "CCaps:\t%016llx\n"
-               "Ticks:\t%d\n"          
-               ,atomic_read(&vxi->vx_refcount)
-               ,vxi->vx_flags
-               ,vxi->vx_bcaps
-               ,vxi->vx_ccaps
+               "Ticks:\t%d\n"
+               ,atomic_read(&vxi->vx_usecnt)
+               ,atomic_read(&vxi->vx_refcnt)
+               ,(unsigned long long)vxi->vx_flags
+               ,(unsigned long long)vxi->vx_bcaps
+               ,(unsigned long long)vxi->vx_ccaps
                ,atomic_read(&vxi->limit.ticks)
                );
        put_vx_info(vxi);
@@ -110,7 +120,7 @@ int proc_xid_limit (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_limit(&vxi->limit, buffer);
@@ -123,7 +133,7 @@ int proc_xid_sched (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_sched(&vxi->sched, buffer);
@@ -136,9 +146,10 @@ int proc_xid_cvirt (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
+       vx_update_load(vxi);
        length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
        put_vx_info(vxi);
        return length;
@@ -149,7 +160,7 @@ int proc_xid_cacct (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_cacct(&vxi->cacct, buffer);
@@ -169,7 +180,7 @@ static int proc_vnet_info(int vid, char *buffer)
                );
 }
 
-#define        atoquad(a) \
+#define atoquad(a) \
        (((a)>>0) & 0xff), (((a)>>8) & 0xff), \
        (((a)>>16) & 0xff), (((a)>>24) & 0xff)
 
@@ -178,7 +189,7 @@ int proc_nid_info (int vid, char *buffer)
        struct nx_info *nxi;
        int length, i;
 
-       nxi = find_nx_info(vid);
+       nxi = locate_nx_info(vid);
        if (!nxi)
                return 0;
        length = sprintf(buffer,
@@ -202,12 +213,14 @@ int proc_nid_status (int vid, char *buffer)
        struct nx_info *nxi;
        int length;
 
-       nxi = find_nx_info(vid);
+       nxi = locate_nx_info(vid);
        if (!nxi)
                return 0;
        length = sprintf(buffer,
-               "RefC:\t%d\n"           
-               ,atomic_read(&nxi->nx_refcount)
+               "UseCnt:\t%d\n"
+               "RefCnt:\t%d\n"
+               ,atomic_read(&nxi->nx_usecnt)
+               ,atomic_read(&nxi->nx_refcnt)
                );
        put_nx_info(nxi);
        return length;
@@ -216,11 +229,11 @@ int proc_nid_status (int vid, char *buffer)
 /* here the inode helpers */
 
 
+#define fake_ino(id,nr) (((nr) & 0xFFFF) | \
+                       (((id) & 0xFFFF) << 16))
 
-#define fake_ino(id,ino) (((id)<<16)|(ino))
-
-#define        inode_vid(i)    ((i)->i_ino >> 16)
-#define        inode_type(i)   ((i)->i_ino & 0xFFFF)
+#define inode_vid(i)   (((i)->i_ino >> 16) & 0xFFFF)
+#define inode_type(i)  ((i)->i_ino & 0xFFFF)
 
 #define MAX_MULBY10    ((~0U-9)/10)
 
@@ -247,18 +260,18 @@ out:
 static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
 {
        struct inode * inode = dentry->d_inode;
-       int vid, valid=0;
+       int vid, hashed=0;
 
        vid = inode_vid(inode);
        switch (inode_type(inode) & PROC_VID_MASK) {
                case PROC_XID_INO:
-                       valid = vx_info_id_valid(vid);
+                       hashed = vx_info_is_hashed(vid);
                        break;
                case PROC_NID_INO:
-                       valid = nx_info_id_valid(vid);
+                       hashed = nx_info_is_hashed(vid);
                        break;
-       }       
-       if (valid)
+       }
+       if (hashed)
                return 1;
        d_drop(dentry);
        return 0;
@@ -267,7 +280,7 @@ static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
 /*
 static int proc_vid_delete_dentry(struct dentry * dentry)
 {
-        return 1;
+       return 1;
 }
 */
 
@@ -320,7 +333,7 @@ static struct file_operations proc_vid_info_file_operations = {
 };
 
 static struct dentry_operations proc_vid_dentry_operations = {
-       d_revalidate:   proc_vid_revalidate,
+       d_revalidate:   proc_vid_revalidate,
 //     d_delete:       proc_vid_delete_dentry,
 };
 
@@ -364,10 +377,10 @@ static struct dentry *proc_vid_lookup(struct inode *dir,
 
        switch (inode_type(dir)) {
                case PROC_XID_INO:
-                       p = vx_base_stuff;      
+                       p = vx_base_stuff;
                        break;
                case PROC_NID_INO:
-                       p = vn_base_stuff;      
+                       p = vn_base_stuff;
                        break;
                default:
                        goto out;
@@ -413,7 +426,7 @@ static struct dentry *proc_vid_lookup(struct inode *dir,
                case PROC_NID_STATUS:
                        PROC_I(inode)->op.proc_vid_read = proc_nid_status;
                        break;
-               
+
                default:
                        printk("procfs: impossible type (%d)",p->type);
                        iput(inode);
@@ -424,7 +437,7 @@ static struct dentry *proc_vid_lookup(struct inode *dir,
        inode->i_fop = &proc_vid_info_file_operations;
        inode->i_nlink = 1;
        inode->i_flags|=S_IMMUTABLE;
-       
+
        dentry->d_op = &proc_vid_dentry_operations;
        d_add(dentry, inode);
        error = 0;
@@ -439,7 +452,7 @@ static int proc_vid_readdir(struct file * filp,
        int i, size;
        struct inode *inode = filp->f_dentry->d_inode;
        struct vid_entry *p;
-       
+
        i = filp->f_pos;
        switch (i) {
                case 0:
@@ -461,11 +474,11 @@ static int proc_vid_readdir(struct file * filp,
                        switch (inode_type(inode)) {
                                case PROC_XID_INO:
                                        size = sizeof(vx_base_stuff);
-                                       p = vx_base_stuff + i;  
+                                       p = vx_base_stuff + i;
                                        break;
                                case PROC_NID_INO:
                                        size = sizeof(vn_base_stuff);
-                                       p = vn_base_stuff + i;  
+                                       p = vn_base_stuff + i;
                                        break;
                                default:
                                        return 1;
@@ -564,7 +577,7 @@ struct dentry *proc_virtual_lookup(struct inode *dir,
        xid = atovid(name, len);
        if (xid < 0)
                goto out;
-       vxi = find_vx_info(xid);
+       vxi = locate_vx_info(xid);
        if (!vxi)
                goto out;
 
@@ -584,7 +597,7 @@ struct dentry *proc_virtual_lookup(struct inode *dir,
        dentry->d_op = &proc_vid_dentry_operations;
        d_add(dentry, inode);
        ret = 0;
-       
+
 out_release:
        put_vx_info(vxi);
 out:
@@ -634,7 +647,7 @@ struct dentry *proc_vnet_lookup(struct inode *dir,
        nid = atovid(name, len);
        if (nid < 0)
                goto out;
-       nxi = find_nx_info(nid);
+       nxi = locate_nx_info(nid);
        if (!nxi)
                goto out;
 
@@ -654,7 +667,7 @@ struct dentry *proc_vnet_lookup(struct inode *dir,
        dentry->d_op = &proc_vid_dentry_operations;
        d_add(dentry, inode);
        ret = 0;
-       
+
 out_release:
        put_nx_info(nxi);
 out:
@@ -667,27 +680,6 @@ out:
 #define PROC_NUMBUF 10
 #define PROC_MAXVIDS 32
 
-
-static int get_xid_list(int index, unsigned int *xids)
-{
-       struct vx_info *p;
-       int nr_xids = 0;
-
-       index--;
-       spin_lock(&vxlist_lock);
-       list_for_each_entry(p, &vx_infos, vx_list) {
-               int xid = p->vx_id;
-
-               if (--index >= 0)
-                       continue;
-               xids[nr_xids] = xid;
-               if (++nr_xids >= PROC_MAXVIDS)
-                       break;
-       }
-       spin_unlock(&vxlist_lock);
-       return nr_xids;
-}
-
 int proc_virtual_readdir(struct file * filp,
        void * dirent, filldir_t filldir)
 {
@@ -720,7 +712,7 @@ int proc_virtual_readdir(struct file * filp,
                        filp->f_pos++;
                        /* fall through */
                case 3:
-                       if (current->xid > 1) {
+                       if (vx_current_xid() > 1) {
                                ino = fake_ino(1, PROC_XID_INO);
                                if (filldir(dirent, "current", 7,
                                        filp->f_pos, ino, DT_LNK) < 0)
@@ -729,12 +721,11 @@ int proc_virtual_readdir(struct file * filp,
                        filp->f_pos++;
        }
 
-       nr_xids = get_xid_list(nr, xid_array);
-
+       nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS);
        for (i = 0; i < nr_xids; i++) {
                int xid = xid_array[i];
                ino_t ino = fake_ino(xid, PROC_XID_INO);
-               unsigned long j = PROC_NUMBUF;
+               unsigned int j = PROC_NUMBUF;
 
                do buf[--j] = '0' + (xid % 10); while (xid/=10);
 
@@ -757,27 +748,6 @@ static struct inode_operations proc_virtual_dir_inode_operations = {
 };
 
 
-
-static int get_nid_list(int index, unsigned int *nids)
-{
-       struct nx_info *p;
-       int nr_nids = 0;
-
-       index--;
-       spin_lock(&nxlist_lock);
-       list_for_each_entry(p, &nx_infos, nx_list) {
-               int nid = p->nx_id;
-
-               if (--index >= 0)
-                       continue;
-               nids[nr_nids] = nid;
-               if (++nr_nids >= PROC_MAXVIDS)
-                       break;
-       }
-       spin_unlock(&nxlist_lock);
-       return nr_nids;
-}
-
 int proc_vnet_readdir(struct file * filp,
        void * dirent, filldir_t filldir)
 {
@@ -810,7 +780,7 @@ int proc_vnet_readdir(struct file * filp,
                        filp->f_pos++;
                        /* fall through */
                case 3:
-                       if (current->xid > 1) {
+                       if (vx_current_xid() > 1) {
                                ino = fake_ino(1, PROC_NID_INO);
                                if (filldir(dirent, "current", 7,
                                        filp->f_pos, ino, DT_LNK) < 0)
@@ -819,8 +789,7 @@ int proc_vnet_readdir(struct file * filp,
                        filp->f_pos++;
        }
 
-       nr_nids = get_nid_list(nr, nid_array);
-
+       nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS);
        for (i = 0; i < nr_nids; i++) {
                int nid = nid_array[i];
                ino_t ino = fake_ino(nid, PROC_NID_INO);
@@ -859,7 +828,7 @@ void proc_vx_init(void)
        }
        proc_virtual = ent;
 
-       ent = proc_mkdir("vnet", 0);
+       ent = proc_mkdir("virtnet", 0);
        if (ent) {
                ent->proc_fops = &proc_vnet_dir_operations;
                ent->proc_iops = &proc_vnet_dir_inode_operations;
@@ -875,9 +844,22 @@ void proc_vx_init(void)
 
 char *task_vx_info(struct task_struct *p, char *buffer)
 {
-       return buffer + sprintf(buffer,
-               "XID:\t%d\n"
-               ,p->xid);
+       struct vx_info *vxi;
+
+       buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p));
+       vxi = task_get_vx_info(p);
+       if (vxi && !vx_flags(VXF_INFO_HIDE, 0)) {
+               buffer += sprintf (buffer,"BCaps:\t%016llx\n"
+                       ,(unsigned long long)vxi->vx_bcaps);
+               buffer += sprintf (buffer,"CCaps:\t%016llx\n"
+                       ,(unsigned long long)vxi->vx_ccaps);
+               buffer += sprintf (buffer,"CFlags:\t%016llx\n"
+                       ,(unsigned long long)vxi->vx_flags);
+               buffer += sprintf (buffer,"CIPid:\t%d\n"
+                       ,vxi->vx_initpid);
+       }
+       put_vx_info(vxi);
+       return buffer;
 }
 
 int proc_pid_vx_info(struct task_struct *p, char *buffer)
@@ -890,9 +872,25 @@ int proc_pid_vx_info(struct task_struct *p, char *buffer)
 
 char *task_nx_info(struct task_struct *p, char *buffer)
 {
-       return buffer + sprintf(buffer,
-               "NID:\t%d\n"
-               ,p->nid);
+       struct nx_info *nxi;
+
+       buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p));
+       nxi = task_get_nx_info(p);
+       if (nxi && !vx_flags(VXF_INFO_HIDE, 0)) {
+               int i;
+
+               for (i=0; i<nxi->nbipv4; i++){
+                       buffer += sprintf (buffer,
+                               "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i
+                               ,NIPQUAD(nxi->ipv4[i])
+                               ,NIPQUAD(nxi->mask[i]));
+               }
+               buffer += sprintf (buffer,
+                       "V4Root[bcast]:\t%d.%d.%d.%d\n"
+                       ,NIPQUAD(nxi->v4_bcast));
+       }
+       put_nx_info(nxi);
+       return buffer;
 }
 
 int proc_pid_nx_info(struct task_struct *p, char *buffer)
index a75195a..70e964e 100644 (file)
 
 #include <linux/config.h>
 #include <linux/sched.h>
-#include <linux/vinline.h>
-#include <linux/vserver/context.h>
-#include <linux/vserver/sched.h>
+// #include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_sched.h>
+#include <linux/vserver/sched_cmd.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -32,7 +33,7 @@ int vx_tokens_recalc(struct vx_info *vxi)
 {
        long delta, tokens = 0;
 
-       if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0))
+       if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
                /* we are paused */
                return 0;
 
@@ -51,7 +52,7 @@ int vx_tokens_recalc(struct vx_info *vxi)
                atomic_add(tokens, &vxi->sched.tokens);
                vxi->sched.jiffies += delta;
                tokens = atomic_read(&vxi->sched.tokens);
-       
+
                if (tokens > vxi->sched.tokens_max) {
                        tokens = vxi->sched.tokens_max;
                        atomic_set(&vxi->sched.tokens, tokens);
@@ -59,7 +60,10 @@ int vx_tokens_recalc(struct vx_info *vxi)
                spin_unlock(&vxi->sched.tokens_lock);
        } else {
                /* no new tokens */
-               if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) {
+               tokens = vx_tokens_avail(vxi);
+               if (tokens <= 0)
+                       vxi->vx_state |= VXS_ONHOLD;
+               if (tokens < vxi->sched.tokens_min) {
                        /* enough tokens will be available in */
                        if (vxi->sched.tokens_min == 0)
                                return delta - vxi->sched.interval;
@@ -67,7 +71,14 @@ int vx_tokens_recalc(struct vx_info *vxi)
                                vxi->sched.tokens_min / vxi->sched.fill_rate;
                }
        }
+
        /* we have some tokens left */
+       if (vx_info_state(vxi, VXS_ONHOLD) &&
+               (tokens >= vxi->sched.tokens_min))
+               vxi->vx_state &= ~VXS_ONHOLD;
+       if (vx_info_state(vxi, VXS_ONHOLD))
+               tokens -= vxi->sched.tokens_min;
+
        return tokens;
 }
 
@@ -118,15 +129,15 @@ int effective_vavavoom(task_t *p, int max_prio)
 }
 
 
-int vc_set_sched(uint32_t xid, void __user *data)
+int vc_set_sched_v2(uint32_t xid, void __user *data)
 {
        struct vcmd_set_sched_v2 vc_data;
        struct vx_info *vxi;
 
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
-       
-       vxi = find_vx_info(xid);
+
+       vxi = locate_vx_info(xid);
        if (!vxi)
                return -EINVAL;
 
@@ -160,3 +171,55 @@ int vc_set_sched(uint32_t xid, void __user *data)
        return 0;
 }
 
+
+int vc_set_sched(uint32_t xid, void __user *data)
+{
+       struct vcmd_set_sched_v3 vc_data;
+       struct vx_info *vxi;
+       unsigned int set_mask;
+
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       vxi = locate_vx_info(xid);
+       if (!vxi)
+               return -EINVAL;
+
+       set_mask = vc_data.set_mask;
+
+       spin_lock(&vxi->sched.tokens_lock);
+
+       if (set_mask & VXSM_FILL_RATE)
+               vxi->sched.fill_rate = vc_data.fill_rate;
+       if (set_mask & VXSM_INTERVAL)
+               vxi->sched.interval = vc_data.interval;
+       if (set_mask & VXSM_TOKENS)
+               atomic_set(&vxi->sched.tokens, vc_data.tokens);
+       if (set_mask & VXSM_TOKENS_MIN)
+               vxi->sched.tokens_min = vc_data.tokens_min;
+       if (set_mask & VXSM_TOKENS_MAX)
+               vxi->sched.tokens_max = vc_data.tokens_max;
+       if (set_mask & VXSM_PRIO_BIAS)
+               vxi->sched.priority_bias = vc_data.priority_bias;
+
+       /* Sanity check the resultant values */
+       if (vxi->sched.fill_rate <= 0)
+               vxi->sched.fill_rate = 1;
+       if (vxi->sched.interval <= 0)
+               vxi->sched.interval = HZ;
+       if (vxi->sched.tokens_max == 0)
+               vxi->sched.tokens_max = 1;
+       if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
+               atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
+       if (vxi->sched.tokens_min > vxi->sched.tokens_max)
+               vxi->sched.tokens_min = vxi->sched.tokens_max;
+       if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
+               vxi->sched.priority_bias = MAX_PRIO_BIAS;
+       if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
+               vxi->sched.priority_bias = MIN_PRIO_BIAS;
+
+       spin_unlock(&vxi->sched.tokens_lock);
+       put_vx_info(vxi);
+       return 0;
+}
+
diff --git a/kernel/vserver/sched_init.h b/kernel/vserver/sched_init.h
new file mode 100644 (file)
index 0000000..3fbab7c
--- /dev/null
@@ -0,0 +1,29 @@
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+       int i;
+
+       /* scheduling; hard code starting values as constants */
+       sched->fill_rate        = 1;
+       sched->interval         = 4;
+       sched->tokens_min       = HZ >> 4;
+       sched->tokens_max       = HZ >> 1;
+       sched->jiffies          = jiffies;
+       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
+
+       atomic_set(&sched->tokens, HZ >> 2);
+       sched->cpus_allowed     = CPU_MASK_ALL;
+       sched->priority_bias    = 0;
+
+       for_each_cpu(i) {
+               sched->cpu[i].user_ticks        = 0;
+               sched->cpu[i].sys_ticks         = 0;
+               sched->cpu[i].hold_ticks        = 0;
+       }
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+       return;
+}
+
diff --git a/kernel/vserver/sched_proc.h b/kernel/vserver/sched_proc.h
new file mode 100644 (file)
index 0000000..1da5fa3
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef _VX_SCHED_PROC_H
+#define _VX_SCHED_PROC_H
+
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+       int length = 0;
+       int i;
+
+       length += sprintf(buffer,
+               "Token:\t\t%8d\n"
+               "FillRate:\t%8d\n"
+               "Interval:\t%8d\n"
+               "TokensMin:\t%8d\n"
+               "TokensMax:\t%8d\n"
+               "PrioBias:\t%8d\n"
+               ,atomic_read(&sched->tokens)
+               ,sched->fill_rate
+               ,sched->interval
+               ,sched->tokens_min
+               ,sched->tokens_max
+               ,sched->priority_bias
+               );
+
+       for_each_online_cpu(i) {
+               length += sprintf(buffer + length,
+                       "cpu %d: %lld %lld %lld\n"
+                       ,i
+                       ,(long long)sched->cpu[i].user_ticks
+                       ,(long long)sched->cpu[i].sys_ticks
+                       ,(long long)sched->cpu[i].hold_ticks
+                       );
+       }
+
+       return length;
+}
+
+#endif /* _VX_SCHED_PROC_H */
index 464ea1b..bdf3c22 100644 (file)
@@ -15,7 +15,7 @@
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
-#include <linux/vinline.h>
+#include <linux/vs_context.h>
 #include <linux/vserver/signal.h>
 
 
@@ -31,14 +31,14 @@ int vc_ctx_kill(uint32_t id, void __user *data)
                return -ENOSYS;
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
-       
+
        info.si_signo = vc_data.sig;
        info.si_errno = 0;
        info.si_code = SI_USER;
        info.si_pid = current->pid;
        info.si_uid = current->uid;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -61,14 +61,14 @@ int vc_ctx_kill(uint32_t id, void __user *data)
                                retval = err;
                }
                break;
-               
+
        default:
-       p = find_task_by_pid(vc_data.pid);
+       p = find_task_by_real_pid(vc_data.pid);
                if (p) {
                        if (!thread_group_leader(p)) {
                                struct task_struct *tg;
-                       
-                               tg = find_task_by_pid(p->tgid);
+
+                               tg = find_task_by_real_pid(p->tgid);
                                if (tg)
                                        p = tg;
                        }
@@ -83,3 +83,44 @@ int vc_ctx_kill(uint32_t id, void __user *data)
 }
 
 
+static int __wait_exit(struct vx_info *vxi)
+{
+       DECLARE_WAITQUEUE(wait, current);
+       int ret = 0;
+
+       add_wait_queue(&vxi->vx_exit, &wait);
+       set_current_state(TASK_INTERRUPTIBLE);
+
+wait:
+       if (vx_info_state(vxi, VXS_DEFUNCT))
+               goto out;
+       if (signal_pending(current)) {
+               ret = -ERESTARTSYS;
+               goto out;
+       }
+       schedule();
+       goto wait;
+
+out:
+       set_current_state(TASK_RUNNING);
+       remove_wait_queue(&vxi->vx_exit, &wait);
+       return ret;
+}
+
+
+
+int vc_wait_exit(uint32_t id, void __user *data)
+{
+//     struct vcmd_wait_exit_v0 vc_data;
+       struct vx_info *vxi;
+       int ret;
+
+       vxi = locate_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       ret = __wait_exit(vxi);
+       put_vx_info(vxi);
+       return ret;
+}
+
index 90fee14..271f630 100644 (file)
@@ -3,21 +3,24 @@
  *
  *  Virtual Server: Syscall Switch
  *
- *  Copyright (C) 2003-2004  Herbert Pötzl
+ *  Copyright (C) 2003-2005  Herbert Pötzl
  *
  *  V0.01  syscall switch
  *  V0.02  added signal to context
  *  V0.03  added rlimit functions
  *  V0.04  added iattr, task/xid functions
+ *  V0.05  added debug/history stuff
  *
  */
 
 #include <linux/config.h>
 #include <linux/linkage.h>
+#include <linux/sched.h>
 #include <asm/errno.h>
 
+#include <linux/vserver/network.h>
 #include <linux/vserver/switch.h>
-#include <linux/vinline.h>
+#include <linux/vserver/debug.h>
 
 
 static inline int
@@ -26,34 +29,50 @@ vc_get_version(uint32_t id)
        return VCI_VERSION;
 }
 
+#include <linux/vserver/context_cmd.h>
+#include <linux/vserver/cvirt_cmd.h>
+#include <linux/vserver/limit_cmd.h>
+#include <linux/vserver/network_cmd.h>
+#include <linux/vserver/sched_cmd.h>
+#include <linux/vserver/debug_cmd.h>
 
 #include <linux/vserver/legacy.h>
-#include <linux/vserver/context.h>
-#include <linux/vserver/network.h>
 #include <linux/vserver/namespace.h>
-#include <linux/vserver/sched.h>
-#include <linux/vserver/limit.h>
 #include <linux/vserver/inode.h>
 #include <linux/vserver/signal.h>
-
-
-extern unsigned int vx_debug_switch;
+#include <linux/vserver/dlimit.h>
 
 
 extern asmlinkage long
 sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
 {
+       vxdprintk(VXD_CBIT(switch, 0),
+               "vc: VCMD_%02d_%d[%d], %d",
+               VC_CATEGORY(cmd), VC_COMMAND(cmd),
+               VC_VERSION(cmd), id);
 
-       if (vx_debug_switch)
-               printk( "vc: VCMD_%02d_%d[%d], %d\n",
-                       VC_CATEGORY(cmd), VC_COMMAND(cmd),
-                       VC_VERSION(cmd), id);
+#ifdef CONFIG_VSERVER_LEGACY
+       if (!capable(CAP_CONTEXT) &&
+               /* dirty hack for capremove */
+               !(cmd==VCMD_new_s_context && id==-2))
+               return -EPERM;
+#else
+       if (!capable(CAP_CONTEXT))
+               return -EPERM;
+#endif
 
        switch (cmd) {
        case VCMD_get_version:
                return vc_get_version(id);
 
-#ifdef CONFIG_VSERVER_LEGACY           
+       case VCMD_dump_history:
+#ifdef CONFIG_VSERVER_HISTORY
+               return vc_dump_history(id);
+#else
+               return -ENOSYS;
+#endif
+
+#ifdef CONFIG_VSERVER_LEGACY
        case VCMD_new_s_context:
                return vc_new_s_context(id, data);
        case VCMD_set_ipv4root:
@@ -97,7 +116,7 @@ sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
                return vc_set_rlimit(id, data);
        case VCMD_get_rlimit_mask:
                return vc_get_rlimit_mask(id, data);
-               
+
        case VCMD_vx_get_vhi_name:
                return vc_get_vhi_name(id, data);
        case VCMD_vx_set_vhi_name:
@@ -123,8 +142,20 @@ sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
        case VCMD_get_ncaps:
                return vc_get_ncaps(id, data);
 
+       case VCMD_set_sched_v2:
+               return vc_set_sched_v2(id, data);
+       /* this is version 3 */
        case VCMD_set_sched:
                return vc_set_sched(id, data);
+
+       case VCMD_add_dlimit:
+               return vc_add_dlimit(id, data);
+       case VCMD_rem_dlimit:
+               return vc_rem_dlimit(id, data);
+       case VCMD_set_dlimit:
+               return vc_set_dlimit(id, data);
+       case VCMD_get_dlimit:
+               return vc_get_dlimit(id, data);
        }
 
        /* below here only with VX_ADMIN */
@@ -135,9 +166,14 @@ sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
        case VCMD_ctx_kill:
                return vc_ctx_kill(id, data);
 
-#ifdef CONFIG_VSERVER_LEGACY           
+       case VCMD_wait_exit:
+               return vc_wait_exit(id, data);
+
        case VCMD_create_context:
+#ifdef CONFIG_VSERVER_LEGACY
                return vc_ctx_create(id, data);
+#else
+               return -ENOSYS;
 #endif
 
        case VCMD_get_iattr:
@@ -149,7 +185,7 @@ sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
                return vc_enter_namespace(id, data);
 
        case VCMD_ctx_create:
-#ifdef CONFIG_VSERVER_LEGACY           
+#ifdef CONFIG_VSERVER_LEGACY
                if (id == 1) {
                        current->xid = 1;
                        return 1;
index fffc0dd..6a90067 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/config.h>
 #include <linux/errno.h>
-#include <linux/vserver.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
@@ -52,10 +51,6 @@ void vserver_register_sysctl(void)
 {
        if (!vserver_table_header) {
                vserver_table_header = register_sysctl_table(vserver_table, 1);
-#ifdef CONFIG_PROC_FS
-//             if (vserver_table[0].de)
-//                     vserver_table[0].de->owner = THIS_MODULE;
-#endif
        }
 
 }
index 2362ba2..b7f0f91 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
+#include <linux/vs_memory.h>
 
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
index 9a4f695..3a911dd 100644 (file)
@@ -1584,9 +1584,9 @@ retry:
         */
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
-               if (!PageReserved(new_page)) 
-                       //++mm->rss;
-                       vx_rsspages_inc(mm);
+               if (!PageReserved(new_page))
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
index 3be348d..fb3a1cf 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
+#include <linux/vs_memory.h>
 
 
 static int mlock_fixup(struct vm_area_struct * vma, 
index c17c39e..5fc8e01 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1469,10 +1469,9 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
        vma->vm_end = address;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED) {
+       if (vma->vm_flags & VM_LOCKED)
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
-       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        anon_vma_unlock(vma);
        return 0;
@@ -1548,10 +1547,9 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
        vma->vm_pgoff -= grow;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED) {
+       if (vma->vm_flags & VM_LOCKED)
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
-       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        anon_vma_unlock(vma);
        return 0;
@@ -1657,11 +1655,10 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
 
        // area->vm_mm->total_vm -= len >> PAGE_SHIFT;
        vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT);
-       
-       if (area->vm_flags & VM_LOCKED) {
+
+       if (area->vm_flags & VM_LOCKED)
                // area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
                vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT);
-       }
        vm_stat_unaccount(area);
        area->vm_mm->unmap_area(area);
        remove_vm_struct(area);
@@ -2007,7 +2004,6 @@ void exit_mmap(struct mm_struct *mm)
        vx_vmpages_sub(mm, mm->total_vm);
        // mm->locked_vm = 0;
        vx_vmlocked_sub(mm, mm->locked_vm);
-       arch_flush_exec_range(mm);
 
        spin_unlock(&mm->page_table_lock);
 
index 8ad4f77..b9bc487 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
+#include <linux/vs_memory.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
index 1e780d5..834a364 100644 (file)
@@ -440,7 +440,8 @@ unsigned long do_mmap_pgoff(
 
        tblock->next = current->mm->context.tblock.next;
        current->mm->context.tblock.next = tblock;
-       current->mm->total_vm += len >> PAGE_SHIFT;
+       // current->mm->total_vm += len >> PAGE_SHIFT;
+       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
 
 #ifdef DEBUG
        printk("do_mmap:\n");
@@ -494,7 +495,8 @@ int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
        realalloc -= kobjsize(tblock);
        askedalloc -= sizeof(struct mm_tblock_struct);
        kfree(tblock);
-       mm->total_vm -= len >> PAGE_SHIFT;
+       // mm->total_vm -= len >> PAGE_SHIFT;
+       vx_vmpages_sub(mm, len >> PAGE_SHIFT);
 
 #ifdef DEBUG
        show_process_blocks();
@@ -507,7 +509,8 @@ int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
 void exit_mmap(struct mm_struct * mm)
 {
        struct mm_tblock_struct *tmp;
-       mm->total_vm = 0;
+       // mm->total_vm = 0;
+       vx_vmpages_sub(mm, mm->total_vm);
 
        if (!mm)
                return;
index abc73e0..35e1084 100644 (file)
@@ -55,7 +55,7 @@ static unsigned long badness(struct task_struct *p, unsigned long uptime)
         * The memory size of the process is the basis for the badness.
         */
        points = p->mm->total_vm;
-       /* add vserver badness ;) */
+       /* FIXME add vserver badness ;) */
 
        /*
         * CPU time is in tens of seconds and run time is in thousands
index be1d6dc..71e5a7d 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/vs_limit.h>
 #include <linux/ckrm_mem_inline.h>
 #include <linux/nodemask.h>
+#include <linux/vs_limit.h>
 
 #include <asm/tlbflush.h>
 
index 42288bb..1b4dae6 100644 (file)
@@ -30,7 +30,6 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
-#include <linux/vs_base.h>
 #include <linux/vs_memory.h>
 
 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
index ba42ce7..4513472 100644 (file)
@@ -41,7 +41,7 @@
 #include <linux/ckrm_mem.h>
 
 #ifndef AT_LIMIT_SUPPORT
-#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true"
+#warning "ckrm_at_limit disabled due to problems with memory hog tests -- setting ckrm_shrink_list_empty to true"
 #undef ckrm_shrink_list_empty
 #define ckrm_shrink_list_empty()               (1)
 #endif
index 9227745..65aedf8 100644 (file)
 #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
 #include <net/iw_handler.h>
 #endif /* CONFIG_NET_RADIO */
+#include <linux/vs_network.h>
 #include <asm/current.h>
 #include <linux/vs_network.h>
 
@@ -1894,6 +1895,9 @@ static int dev_ifconf(char __user *arg)
 
        total = 0;
        for (dev = dev_base; dev; dev = dev->next) {
+               if (vx_flags(VXF_HIDE_NETIF, 0) &&
+                       !dev_in_nx_info(dev, current->nx_info))
+                       continue;
                for (i = 0; i < NPROTO; i++) {
                        if (gifconf_list[i]) {
                                int done;
@@ -1954,6 +1958,10 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
+       struct nx_info *nxi = current->nx_info;
+
+       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
+               return;
        if (dev->get_stats) {
                struct net_device_stats *stats = dev->get_stats(dev);
 
index 52641b0..2a8e289 100644 (file)
@@ -251,6 +251,9 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
                if (idx < s_idx)
                        continue;
+               if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
+                       !dev_in_nx_info(dev, skb->sk->sk_nx_info))
+                       continue;
                if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
                        break;
        }
@@ -416,6 +419,9 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
                               sizeof(struct rtnl_link_ifmap) +
                               sizeof(struct rtnl_link_stats) + 128);
 
+       if (vx_flags(VXF_HIDE_NETIF, 0) &&
+               !dev_in_nx_info(dev, current->nx_info))
+               return;
        skb = alloc_skb(size, GFP_KERNEL);
        if (!skb)
                return;
index f030e0f..b5d42a2 100644 (file)
@@ -94,7 +94,6 @@
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
-#include <linux/vs_base.h>
 #include <linux/vs_socket.h>
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
index 2fd2975..697cdb1 100644 (file)
@@ -262,7 +262,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
        get_group_info(current->group_info);
        acred.uid = current->fsuid;
        acred.gid = current->fsgid;
-       acred.xid = current->xid;
+       acred.xid = vx_current_xid();
        acred.group_info = current->group_info;
 
        dprintk("RPC:     looking up %s cred\n",
@@ -282,7 +282,7 @@ rpcauth_bindcred(struct rpc_task *task)
        get_group_info(current->group_info);
        acred.uid = current->fsuid;
        acred.gid = current->fsgid;
-       acred.xid = current->xid;
+       acred.xid = vx_current_xid();
        acred.group_info = current->group_info;
 
        dprintk("RPC: %4d looking up %s cred\n",
index 294875e..19f17f7 100644 (file)
@@ -83,7 +83,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
        if (flags & RPC_TASK_ROOTCREDS) {
                cred->uc_uid = cred->uc_puid = 0;
                cred->uc_gid = cred->uc_pgid = 0;
-               cred->uc_xid = cred->uc_pxid = current->xid;
+               cred->uc_xid = cred->uc_pxid = vx_current_xid();
                cred->uc_gids[0] = NOGROUP;
        } else {
                int groups = acred->group_info->ngroups;
@@ -95,7 +95,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
                cred->uc_xid = acred->xid;
                cred->uc_puid = current->uid;
                cred->uc_pgid = current->gid;
-               cred->uc_pxid = current->xid;
+               cred->uc_pxid = vx_current_xid();
                for (i = 0; i < groups; i++)
                        cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
                if (i < NFS_NGROUPS)
@@ -131,7 +131,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
                 || cred->uc_xid != acred->xid
                 || cred->uc_puid != current->uid
                 || cred->uc_pgid != current->gid
-                || cred->uc_pxid != current->xid)
+                || cred->uc_pxid != vx_current_xid())
                        return 0;
 
                groups = acred->group_info->ngroups;
index e8e79c3..4e9c198 100644 (file)
@@ -185,6 +185,8 @@ int mod_unreg_security(const char *name, struct security_operations *ops)
  */
 int capable(int cap)
 {
+       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
+               return 0;
        if (security_ops->capable(current, cap)) {
                /* capability denied */
                return 0;
@@ -195,9 +197,24 @@ int capable(int cap)
        return 1;
 }
 
+int vx_capable(int cap, int ccap)
+{
+       if (security_ops->capable(current, cap)) {
+               /* capability denied */
+               return 0;
+       }
+       if (!vx_ccaps(ccap))
+               return 0;
+
+       /* capability granted */
+       current->flags |= PF_SUPERPRIV;
+       return 1;
+}
+
 EXPORT_SYMBOL_GPL(register_security);
 EXPORT_SYMBOL_GPL(unregister_security);
 EXPORT_SYMBOL_GPL(mod_reg_security);
 EXPORT_SYMBOL_GPL(mod_unreg_security);
 EXPORT_SYMBOL(capable);
+EXPORT_SYMBOL(vx_capable);
 EXPORT_SYMBOL(security_ops);