patch-2.6.6-vs1.9.0
authorMark Huang <mlhuang@cs.princeton.edu>
Wed, 2 Jun 2004 20:45:41 +0000 (20:45 +0000)
committerMark Huang <mlhuang@cs.princeton.edu>
Wed, 2 Jun 2004 20:45:41 +0000 (20:45 +0000)
184 files changed:
Makefile
arch/alpha/Kconfig
arch/alpha/kernel/ptrace.c
arch/alpha/kernel/systbls.S
arch/arm/Kconfig
arch/arm/kernel/ptrace.c
arch/arm26/Kconfig
arch/arm26/kernel/ptrace.c
arch/cris/Kconfig
arch/h8300/Kconfig
arch/h8300/kernel/ptrace.c
arch/i386/Kconfig
arch/i386/kernel/entry.S
arch/i386/kernel/ptrace.c
arch/i386/kernel/sys_i386.c
arch/i386/mm/hugetlbpage.c
arch/ia64/Kconfig
arch/ia64/ia32/binfmt_elf32.c
arch/ia64/kernel/perfmon.c
arch/ia64/kernel/ptrace.c
arch/ia64/mm/fault.c
arch/ia64/mm/hugetlbpage.c
arch/m68k/Kconfig
arch/m68k/atari/stram.c
arch/m68k/kernel/ptrace.c
arch/m68knommu/Kconfig
arch/m68knommu/kernel/ptrace.c
arch/mips/Kconfig
arch/mips/kernel/irixelf.c
arch/mips/kernel/linux32.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/syscall.c
arch/mips/kernel/sysirix.c
arch/parisc/Kconfig
arch/parisc/kernel/ptrace.c
arch/parisc/kernel/sys_parisc32.c
arch/ppc/Kconfig
arch/ppc/kernel/misc.S
arch/ppc/kernel/ptrace.c
arch/ppc/kernel/syscalls.c
arch/ppc64/Kconfig
arch/ppc64/kernel/misc.S
arch/ppc64/kernel/ptrace.c
arch/ppc64/kernel/sys_ppc32.c
arch/ppc64/mm/hugetlbpage.c
arch/s390/Kconfig
arch/s390/kernel/compat_exec.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/syscalls.S
arch/sh/Kconfig
arch/sh/kernel/ptrace.c
arch/sh/mm/hugetlbpage.c
arch/sparc/Kconfig
arch/sparc/kernel/ptrace.c
arch/sparc/kernel/sys_sparc.c
arch/sparc/kernel/systbls.S
arch/sparc64/Kconfig
arch/sparc64/kernel/binfmt_aout32.c
arch/sparc64/kernel/ptrace.c
arch/sparc64/kernel/sys_sparc.c
arch/sparc64/kernel/systbls.S
arch/sparc64/mm/hugetlbpage.c
arch/um/Kconfig
arch/um/kernel/ptrace.c
arch/um/kernel/syscall_kern.c
arch/v850/Kconfig
arch/v850/kernel/ptrace.c
arch/x86_64/Kconfig
arch/x86_64/ia32/ia32_aout.c
arch/x86_64/ia32/ia32_binfmt.c
arch/x86_64/ia32/sys_ia32.c
arch/x86_64/kernel/ptrace.c
arch/x86_64/kernel/sys_x86_64.c
fs/attr.c
fs/binfmt_aout.c
fs/binfmt_elf.c
fs/binfmt_flat.c
fs/binfmt_som.c
fs/devpts/inode.c
fs/exec.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/super.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/super.c
fs/fcntl.c
fs/file_table.c
fs/inode.c
fs/ioctl.c
fs/jfs/jfs_imap.c
fs/namei.c
fs/namespace.c
fs/nfs/nfsroot.c
fs/open.c
fs/proc/array.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/proc_misc.c
fs/proc/root.c
fs/reiserfs/file.c
fs/reiserfs/inode.c
fs/reiserfs/ioctl.c
fs/reiserfs/namei.c
fs/xfs/linux/xfs_ioctl.c
fs/xfs/linux/xfs_iops.c
fs/xfs/linux/xfs_super.c
fs/xfs/linux/xfs_vnode.c
fs/xfs/xfs_dinode.h
fs/xfs/xfs_fs.h
fs/xfs/xfs_vnodeops.c
include/asm-alpha/unistd.h
include/asm-arm/tlb.h
include/asm-arm26/tlb.h
include/asm-generic/tlb.h
include/asm-ia64/tlb.h
include/asm-m68k/unistd.h
include/asm-m68knommu/unistd.h
include/asm-mips/unistd.h
include/asm-parisc/unistd.h
include/asm-ppc/unistd.h
include/asm-ppc64/unistd.h
include/asm-s390/unistd.h
include/asm-sparc/unistd.h
include/asm-sparc64/unistd.h
include/linux/capability.h
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/fs.h
include/linux/init_task.h
include/linux/ip.h
include/linux/ipc.h
include/linux/mm.h
include/linux/namespace.h
include/linux/net.h
include/linux/proc_fs.h
include/linux/reiserfs_fs.h
include/linux/sched.h
include/linux/sysctl.h
include/linux/types.h
include/net/af_unix.h
include/net/route.h
include/net/scm.h
include/net/sock.h
include/net/tcp.h
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
kernel/Makefile
kernel/exit.c
kernel/fork.c
kernel/pid.c
kernel/printk.c
kernel/sched.c
kernel/signal.c
kernel/sys.c
kernel/sysctl.c
kernel/timer.c
kernel/user.c
mm/fremap.c
mm/memory.c
mm/mlock.c
mm/mmap.c
mm/mremap.c
mm/oom_kill.c
mm/page_alloc.c
mm/rmap.c
mm/swapfile.c
net/core/dev.c
net/core/rtnetlink.c
net/core/sock.c
net/ipv4/af_inet.c
net/ipv4/devinet.c
net/ipv4/fib_hash.c
net/ipv4/raw.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/udp.c
net/socket.c
net/unix/af_unix.c
security/commoncap.c

index 4d86970..1511e96 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 6
-EXTRAVERSION =
+EXTRAVERSION = -vs1.9.0
 NAME=Zonked Quokka
 
 # *DOCUMENTATION*
index 16328ad..57ae7a4 100644 (file)
@@ -692,6 +692,8 @@ config DEBUG_INFO
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index aa654cc..5d9aae6 100644 (file)
@@ -287,6 +287,8 @@ do_sys_ptrace(long request, long pid, long addr, long data,
        read_unlock(&tasklist_lock);
        if (!child)
                goto out_notsk;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out;
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index 8e28e03..088cecf 100644 (file)
@@ -291,7 +291,7 @@ sys_call_table:
        .quad alpha_ni_syscall                  /* 270 */
        .quad alpha_ni_syscall
        .quad alpha_ni_syscall
-       .quad alpha_ni_syscall
+       .quad sys_vserver                       /* 273 sys_vserver */
        .quad alpha_ni_syscall
        .quad alpha_ni_syscall                  /* 275 */
        .quad alpha_ni_syscall
index 588fd8b..266427e 100644 (file)
@@ -801,6 +801,8 @@ config DEBUG_S3C2410_UART
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index d4058ed..4dcf99a 100644 (file)
@@ -754,6 +754,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index ce96fd3..b40758b 100644 (file)
@@ -327,6 +327,8 @@ config DEBUG_LL
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 57b9fb1..78cd893 100644 (file)
@@ -691,6 +691,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 86a05db..9b0b719 100644 (file)
@@ -214,6 +214,8 @@ config PROFILE_SHIFT
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 5568638..3822a60 100644 (file)
@@ -368,6 +368,8 @@ config CONFIG_BLKDEV_RESERVE_ADDRESS
          BLKDEV start address.
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 0843013..cdbfe9e 100644 (file)
@@ -80,6 +80,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 9548873..7d4f69c 100644 (file)
@@ -1296,6 +1296,8 @@ config X86_MPPARSE
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index afa02ea..4995528 100644 (file)
@@ -881,7 +881,7 @@ ENTRY(sys_call_table)
        .long sys_tgkill        /* 270 */
        .long sys_utimes
        .long sys_fadvise64_64
-       .long sys_ni_syscall    /* sys_vserver */
+       .long sys_vserver
        .long sys_ni_syscall    /* sys_mbind */
        .long sys_ni_syscall    /* 275 sys_get_mempolicy */
        .long sys_ni_syscall    /* sys_set_mempolicy */
index 9f9b32a..f965495 100644 (file)
@@ -258,6 +258,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 0c36130..fc8001a 100644 (file)
@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -225,6 +225,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name)
 asmlinkage int sys_olduname(struct oldold_utsname __user * name)
 {
        int error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
@@ -233,15 +234,16 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
   
        down_read(&uts_sem);
        
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
        error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
        error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
        error |= __put_user(0,name->release+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
        error |= __put_user(0,name->version+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
        error |= __put_user(0,name->machine+__OLD_UTS_LEN);
        
        up_read(&uts_sem);
index 6e2fa1f..31aba6a 100644 (file)
@@ -43,7 +43,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struc
 {
        pte_t entry;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
        if (write_access) {
                entry =
                    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -83,7 +84,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                ptepage = pte_page(entry);
                get_page(ptepage);
                set_pte(dst_pte, entry);
-               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -222,7 +224,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                page = pte_page(pte);
                put_page(page);
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_range(vma, start, end);
 }
 
index 991916a..ff8970d 100644 (file)
@@ -491,6 +491,8 @@ config SYSVIPC_COMPAT
        default y
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 679e68a..ad67b04 100644 (file)
@@ -151,7 +151,7 @@ ia64_elf32_init (struct pt_regs *regs)
 int
 ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
 {
-       unsigned long stack_base;
+       unsigned long stack_base, grow;
        struct vm_area_struct *mpnt;
        struct mm_struct *mm = current->mm;
        int i;
@@ -168,7 +168,10 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
        if (!mpnt)
                return -ENOMEM;
 
-       if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+       grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+               >> PAGE_SHIFT;
+       if (security_vm_enough_memory(grow) ||
+               !vx_vmpages_avail(mm, grow)) {
                kmem_cache_free(vm_area_cachep, mpnt);
                return -ENOMEM;
        }
@@ -191,7 +194,9 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
                mpnt->vm_file = NULL;
                mpnt->vm_private_data = 0;
                insert_vm_struct(current->mm, mpnt);
-               current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               // current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               vx_vmpages_sub(current->mm, current->mm->total_vm -
+                       ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
        }
 
        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
index ae2eb13..42caeae 100644 (file)
@@ -2350,7 +2350,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
         */
        insert_vm_struct(mm, vma);
 
-       mm->total_vm  += size >> PAGE_SHIFT;
+       // mm->total_vm  += size >> PAGE_SHIFT;
+       vx_vmpages_add(mm, size >> PAGE_SHIFT);
 
        up_write(&task->mm->mmap_sem);
 
index 0432abe..eace1b7 100644 (file)
@@ -1310,6 +1310,9 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
+
        ret = -EPERM;
        if (pid == 1)           /* no messing around with init! */
                goto out_tsk;
index d823ff8..20d11f4 100644 (file)
@@ -36,10 +36,14 @@ expand_backing_store (struct vm_area_struct *vma, unsigned long address)
        if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
            || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
                return -ENOMEM;
+       if (!vx_vmpages_avail(vma->vm_mm, grow)
+               return -ENOMEM;
        vma->vm_end += PAGE_SIZE;
-       vma->vm_mm->total_vm += grow;
+       // vma->vm_mm->total_vm += grow;
+       vx_vmpages_add(vma->vm_mm, grow);
        if (vma->vm_flags & VM_LOCKED)
-               vma->vm_mm->locked_vm += grow;
+               // vma->vm_mm->locked_vm += grow;
+               vx_vmlocked_add(vma->vm_mm, grow);
        return 0;
 }
 
index 114e3d9..eaae371 100644 (file)
@@ -65,7 +65,8 @@ set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
 {
        pte_t entry;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
        if (write_access) {
                entry =
                    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -108,7 +109,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                ptepage = pte_page(entry);
                get_page(ptepage);
                set_pte(dst_pte, entry);
-               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -251,7 +253,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig
                put_page(page);
                pte_clear(pte);
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_range(vma, start, end);
 }
 
index 503a32d..5cac841 100644 (file)
@@ -690,6 +690,8 @@ config DEBUG_INFO
          
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 45644b8..ebc08fc 100644 (file)
@@ -635,7 +635,8 @@ static inline void unswap_pte(struct vm_area_struct * vma, unsigned long
        set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
        swap_free(entry);
        get_page(page);
-       ++vma->vm_mm->rss;
+       // ++vma->vm_mm->rss;
+       vx_rsspages_inc(vma->vm_mm);
 }
 
 static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir,
index 666b3ae..540f49c 100644 (file)
@@ -140,6 +140,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index f113177..b8f71e7 100644 (file)
@@ -566,6 +566,8 @@ config BDM_DISABLE
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 4f3df6d..0c57f87 100644 (file)
@@ -124,6 +124,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 61fa43f..5f2b277 100644 (file)
@@ -1600,6 +1600,8 @@ config DEBUG_HIGHMEM
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 6a2b362..b6de9b8 100644 (file)
@@ -686,7 +686,8 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        /* Do this so that we can load the interpreter, if need be.  We will
         * change some of these later.
         */
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
        setup_arg_pages(bprm, EXSTACK_DEFAULT);
        current->mm->start_stack = bprm->p;
 
index 06813fe..c7b19aa 100644 (file)
@@ -1728,7 +1728,7 @@ asmlinkage long sys32_newuname(struct new_utsname * name)
        int ret = 0;
 
        down_read(&uts_sem);
-       if (copy_to_user(name,&system_utsname,sizeof *name))
+       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
                ret = -EFAULT;
        up_read(&uts_sem);
 
index 1036abe..7661128 100644 (file)
@@ -76,6 +76,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 7e1eca9..5785d06 100644 (file)
@@ -209,7 +209,7 @@ out:
  */
 asmlinkage int sys_uname(struct old_utsname * name)
 {
-       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
                return 0;
        return -EFAULT;
 }
@@ -220,21 +220,23 @@ asmlinkage int sys_uname(struct old_utsname * name)
 asmlinkage int sys_olduname(struct oldold_utsname * name)
 {
        int error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
        if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
                return -EFAULT;
 
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
        error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
        error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
        error -= __put_user(0,name->release+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
        error -= __put_user(0,name->version+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
        error = __put_user(0,name->machine+__OLD_UTS_LEN);
        error = error ? -EFAULT : 0;
 
@@ -260,10 +262,10 @@ asmlinkage int _sys_sysmips(int cmd, long arg1, int arg2, int arg3)
                        return -EFAULT;
 
                down_write(&uts_sem);
-               strncpy(system_utsname.nodename, nodename, len);
+               strncpy(vx_new_uts(nodename), nodename, len);
                nodename[__NEW_UTS_LEN] = '\0';
-               strlcpy(system_utsname.nodename, nodename,
-                       sizeof(system_utsname.nodename));
+               strlcpy(vx_new_uts(nodename), nodename,
+                       sizeof(vx_new_uts(nodename)));
                up_write(&uts_sem);
                return 0;
        }
index 1b10357..8aa6e23 100644 (file)
@@ -577,7 +577,8 @@ asmlinkage int irix_brk(unsigned long brk)
        /*
         * Check if we have enough memory..
         */
-       if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
+       if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT) ||
+               !vx_vmpages_avail(mm, (newbrk-oldbrk) >> PAGE_SHIFT)) {
                ret = -ENOMEM;
                goto out;
        }
index 751f3ac..aa723ca 100644 (file)
@@ -224,6 +224,8 @@ config DEBUG_INFO
          
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 514e8b5..d45980c 100644 (file)
@@ -110,6 +110,9 @@ long sys_ptrace(long request, pid_t pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
+
        ret = -EPERM;
        if (pid == 1)           /* no messing around with init! */
                goto out_tsk;
index 7159953..cd0d1bd 100644 (file)
@@ -1211,6 +1211,7 @@ asmlinkage int sys32_sysinfo(struct sysinfo32 *info)
 
        do {
                seq = read_seqbegin(&xtime_lock);
+               /* requires vx virtualization */
                val.uptime = jiffies / HZ;
 
                val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
index 8de9f39..9326f27 100644 (file)
@@ -1245,6 +1245,8 @@ config OCP
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 134b74b..7a0b48b 100644 (file)
@@ -1374,7 +1374,7 @@ _GLOBAL(sys_call_table)
        .long ppc_fadvise64_64
        .long sys_ni_syscall            /* 255 - rtas (used on ppc64) */
        .long sys_ni_syscall            /* 256 reserved for sys_debug_setcontext */
-       .long sys_ni_syscall            /* 257 reserved for vserver */
+       .long sys_vserver
        .long sys_ni_syscall            /* 258 reserved for new sys_remap_file_pages */
        .long sys_ni_syscall            /* 259 reserved for new sys_mbind */
        .long sys_ni_syscall            /* 260 reserved for new sys_get_mempolicy */
index 2ddfb1a..f912078 100644 (file)
@@ -197,6 +197,8 @@ int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 7f2531d..d33e635 100644 (file)
@@ -229,7 +229,7 @@ int sys_uname(struct old_utsname __user * name)
        int err = -EFAULT;
 
        down_read(&uts_sem);
-       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
                err = 0;
        up_read(&uts_sem);
        return err;
@@ -238,6 +238,7 @@ int sys_uname(struct old_utsname __user * name)
 int sys_olduname(struct oldold_utsname __user * name)
 {
        int error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
@@ -245,15 +246,16 @@ int sys_olduname(struct oldold_utsname __user * name)
                return -EFAULT;
 
        down_read(&uts_sem);
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
        error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
        error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
        error -= __put_user(0,name->release+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
        error -= __put_user(0,name->version+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
        error = __put_user(0,name->machine+__OLD_UTS_LEN);
        up_read(&uts_sem);
 
index 9b2f319..584c8a1 100644 (file)
@@ -397,6 +397,8 @@ config DEBUG_INFO
          
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index a59c040..68c167a 100644 (file)
@@ -829,7 +829,7 @@ _GLOBAL(sys_call_table32)
        .llong .ppc32_fadvise64_64      /* 32bit only fadvise64_64 */
        .llong .ppc_rtas                /* 255 */
        .llong .sys_ni_syscall          /* 256 reserved for sys_debug_setcontext */
-       .llong .sys_ni_syscall          /* 257 reserved for vserver */
+       .llong .sys_vserver
        .llong .sys_ni_syscall          /* 258 reserved for new sys_remap_file_pages */
        .llong .sys_ni_syscall          /* 259 reserved for new sys_mbind */
        .llong .sys_ni_syscall          /* 260 reserved for new sys_get_mempolicy */
index 6afe71a..a8fc80f 100644 (file)
@@ -76,6 +76,8 @@ int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 7e4bc83..7b5af0b 100644 (file)
@@ -2456,6 +2456,7 @@ asmlinkage long sys32_time(compat_time_t* tloc)
 int sys32_olduname(struct oldold_utsname * name)
 {
        int error;
+       struct new_utsname *ptr;
        
        if (!name)
                return -EFAULT;
@@ -2463,15 +2464,16 @@ int sys32_olduname(struct oldold_utsname * name)
                return -EFAULT;
   
        down_read(&uts_sem);
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
        error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
        error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
        error -= __put_user(0,name->release+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
        error -= __put_user(0,name->version+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+       error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
        error = __put_user(0,name->machine+__OLD_UTS_LEN);
        up_read(&uts_sem);
 
index 3ac622c..d17c903 100644 (file)
@@ -126,7 +126,8 @@ static void setup_huge_pte(struct mm_struct *mm, struct page *page,
        hugepte_t entry;
        int i;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE);
        entry = mk_hugepte(page, write_access);
        for (i = 0; i < HUGEPTE_BATCH_SIZE; i++)
                set_hugepte(ptep+i, entry);
@@ -288,7 +289,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        /* This is the first hugepte in a batch */
                        ptepage = hugepte_page(entry);
                        get_page(ptepage);
-                       dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+                       // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+                       vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                }
                set_hugepte(dst_pte, entry);
 
@@ -408,7 +410,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                put_page(page);
        }
 
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
 }
 
 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
index eab5d1e..c0b72b4 100644 (file)
@@ -425,6 +425,8 @@ config DEBUG_SPINLOCK_SLEEP
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 162deb2..3f43a36 100644 (file)
@@ -39,7 +39,7 @@
 
 int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack)
 {
-       unsigned long stack_base;
+       unsigned long stack_base, grow;
        struct vm_area_struct *mpnt;
        struct mm_struct *mm = current->mm;
        int i;
@@ -56,7 +56,10 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack)
        if (!mpnt) 
                return -ENOMEM; 
        
-       if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+       grow = (STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+               >> PAGE_SHIFT;
+       if (security_vm_enough_memory(grow) ||
+               !vx_vmpages_avail(mm, grow)) {
                kmem_cache_free(vm_area_cachep, mpnt);
                return -ENOMEM;
        }
@@ -75,7 +78,9 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack)
                INIT_LIST_HEAD(&mpnt->shared);
                mpnt->vm_private_data = (void *) 0;
                insert_vm_struct(mm, mpnt);
-               mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               vx_vmpages_sub(mm, mm->total_vm -
+                       ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
        } 
 
        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
index 1176b4f..fa7875f 100644 (file)
@@ -680,9 +680,11 @@ sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = do_ptrace(child, request, addr, data);
-
+out_tsk:
        put_task_struct(child);
 out:
        unlock_kernel();
index da7ba3e..beae3b1 100644 (file)
@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper)
 SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)       /* 260 */
 SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
 SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
-NI_SYSCALL                                                     /* reserved for vserver */
+SYSCALL(sys_vserver,sys_vserver,sys_vserver)
 SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
 SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
 SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
index 8efb8e0..e774424 100644 (file)
@@ -792,6 +792,8 @@ config FRAME_POINTER
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 602f6c5..732afae 100644 (file)
@@ -108,6 +108,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 5309f67..9002fb0 100644 (file)
@@ -62,7 +62,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long i;
        pte_t entry;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
 
        if (write_access)
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
@@ -115,7 +116,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        pte_val(entry) += PAGE_SIZE;
                        dst_pte++;
                }
-               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -206,7 +208,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                        pte++;
                }
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_range(vma, start, end);
 }
 
index 79d52a1..4b3dd85 100644 (file)
@@ -450,6 +450,8 @@ config DEBUG_BUGVERBOSE
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index b1b6cda..d13bf73 100644 (file)
@@ -319,6 +319,10 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
                pt_error_return(regs, ESRCH);
                goto out;
        }
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
+               pt_error_return(regs, ESRCH);
+               goto out_tsk;
+       }
 
        if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
            || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
index 925efe9..f0720b8 100644 (file)
@@ -467,13 +467,13 @@ asmlinkage int sys_getdomainname(char __user *name, int len)
        
        down_read(&uts_sem);
        
-       nlen = strlen(system_utsname.domainname) + 1;
+       nlen = strlen(vx_new_uts(domainname)) + 1;
 
        if (nlen < len)
                len = nlen;
        if (len > __NEW_UTS_LEN)
                goto done;
-       if (copy_to_user(name, system_utsname.domainname, len))
+       if (copy_to_user(name, vx_new_uts(domainname), len))
                goto done;
        err = 0;
 done:
index fd452a6..56c8c87 100644 (file)
@@ -72,7 +72,7 @@ sys_call_table:
 /*250*/        .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
 /*255*/        .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
-/*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
+/*265*/        .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
 /*270*/        .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
 /*275*/        .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall
 /*280*/        .long sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
index ff9718e..d79d5f2 100644 (file)
@@ -707,6 +707,8 @@ config FRAME_POINTER
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 4ba5d48..609b6ed 100644 (file)
@@ -239,7 +239,8 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        current->mm->brk = ex.a_bss +
                (current->mm->start_brk = N_BSSADDR(ex));
 
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
        current->mm->mmap = NULL;
        compute_creds(bprm);
        current->flags &= ~PF_FORKNOEXEC;
index d935eb6..dc47713 100644 (file)
@@ -168,6 +168,10 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
                pt_error_return(regs, ESRCH);
                goto out;
        }
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
+               pt_error_return(regs, ESRCH);
+               goto out_tsk;
+       }
 
        if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
            || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
index 4c75f61..a6b29c9 100644 (file)
@@ -459,13 +459,13 @@ asmlinkage int sys_getdomainname(char __user *name, int len)
 
        down_read(&uts_sem);
        
-       nlen = strlen(system_utsname.domainname) + 1;
+       nlen = strlen(vx_new_uts(domainname)) + 1;
 
         if (nlen < len)
                 len = nlen;
        if (len > __NEW_UTS_LEN)
                goto done;
-       if (copy_to_user(name, system_utsname.domainname, len))
+       if (copy_to_user(name, vx_new_uts(domainname), len))
                goto done;
        err = 0;
 done:
index 3c7ef2d..6148e76 100644 (file)
@@ -72,7 +72,7 @@ sys_call_table32:
 /*250*/        .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl
        .word sys_ni_syscall, compat_clock_settime, compat_clock_gettime, compat_clock_getres, compat_clock_nanosleep
 /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_timer_settime, compat_timer_gettime, sys_timer_getoverrun
-       .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
+       .word sys_timer_delete, sys32_timer_create, sys_vserver, compat_sys_io_setup, sys_io_destroy
 /*270*/        .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink
        .word sys32_mq_timedsend, sys32_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, sys_ni_syscall
 /*280*/        .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
@@ -136,7 +136,7 @@ sys_call_table:
 /*250*/        .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
        .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
-       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
+       .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
 /*270*/        .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
        .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall
 /*280*/        .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
index 1df587b..cb6968c 100644 (file)
@@ -59,7 +59,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long i;
        pte_t entry;
 
-       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
 
        if (write_access)
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
@@ -112,7 +113,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        pte_val(entry) += PAGE_SIZE;
                        dst_pte++;
                }
-               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -203,7 +205,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                        pte++;
                }
        }
-       mm->rss -= (end - start) >> PAGE_SHIFT;
+       // mm->rss -= (end - start) >> PAGE_SHIFT;
+       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
        flush_tlb_range(vma, start, end);
 }
 
index 47fbaf3..7ef333e 100644 (file)
@@ -188,6 +188,8 @@ source "net/Kconfig"
 
 source "fs/Kconfig"
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index c68c937..90f8d28 100644 (file)
@@ -58,6 +58,8 @@ int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index 2af5fc2..f3a72d2 100644 (file)
@@ -264,7 +264,7 @@ int sys_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -272,6 +272,7 @@ int sys_uname(struct old_utsname * name)
 int sys_olduname(struct oldold_utsname * name)
 {
        int error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
@@ -280,19 +281,20 @@ int sys_olduname(struct oldold_utsname * name)
   
        down_read(&uts_sem);
        
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,
                               __OLD_UTS_LEN);
        error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+       error |= __copy_to_user(&name->nodename,ptr->nodename,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release,&system_utsname.release,
+       error |= __copy_to_user(&name->release,ptr->release,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->release+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version,&system_utsname.version,
+       error |= __copy_to_user(&name->version,ptr->version,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->version+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,
+       error |= __copy_to_user(&name->machine,ptr->machine,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->machine+__OLD_UTS_LEN);
        
index 8665652..994e4f8 100644 (file)
@@ -334,6 +334,8 @@ config NO_KERNEL_MSG
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index fc06058..7d25547 100644 (file)
@@ -138,6 +138,8 @@ int sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        rval = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index cfe5b0f..e763749 100644 (file)
@@ -490,6 +490,8 @@ config IOMMU_LEAK
 
 endmenu
 
+source "kernel/vserver/Kconfig"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
index 040adf6..b7b698c 100644 (file)
@@ -308,7 +308,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                (current->mm->start_brk = N_BSSADDR(ex));
        current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
        current->mm->mmap = NULL;
        compute_creds(bprm);
        current->flags &= ~PF_FORKNOEXEC;
index 92817f1..1b70ef9 100644 (file)
@@ -327,7 +327,7 @@ static void elf32_init(struct pt_regs *regs)
 
 int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
 {
-       unsigned long stack_base;
+       unsigned long stack_base, grow;
        struct vm_area_struct *mpnt;
        struct mm_struct *mm = current->mm;
        int i;
@@ -344,7 +344,10 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
        if (!mpnt) 
                return -ENOMEM; 
        
-       if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+       grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+               >> PAGE_SHIFT;
+       if (security_vm_enough_memory(grow) ||
+               !vx_vmpages_avail(mm, grow)) {
                kmem_cache_free(vm_area_cachep, mpnt);
                return -ENOMEM;
        }
@@ -368,7 +371,9 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
                INIT_LIST_HEAD(&mpnt->shared);
                mpnt->vm_private_data = (void *) 0;
                insert_vm_struct(mm, mpnt);
-               mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               vx_vmpages_sub(mm, mm->total_vm -
+                       ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
        } 
 
        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
index 47c2341..c60544f 100644 (file)
@@ -1258,6 +1258,7 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
 asmlinkage long sys32_olduname(struct oldold_utsname * name)
 {
        int error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
@@ -1266,13 +1267,14 @@ asmlinkage long sys32_olduname(struct oldold_utsname * name)
   
        down_read(&uts_sem);
        
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
         __put_user(0,name->sysname+__OLD_UTS_LEN);
-        __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+        __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
         __put_user(0,name->nodename+__OLD_UTS_LEN);
-        __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+        __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
         __put_user(0,name->release+__OLD_UTS_LEN);
-        __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+        __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
         __put_user(0,name->version+__OLD_UTS_LEN);
         { 
                 char *arch = "x86_64";
@@ -1295,7 +1297,7 @@ long sys32_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
        up_read(&uts_sem);
        if (personality(current->personality) == PER_LINUX32) 
                err |= copy_to_user(&name->machine, "i686", 5);
index a97bee6..87ab1e2 100644 (file)
@@ -208,6 +208,8 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
index e518c1f..752d840 100644 (file)
@@ -146,7 +146,7 @@ asmlinkage long sys_uname(struct new_utsname * name)
 {
        int err;
        down_read(&uts_sem);
-       err = copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
        up_read(&uts_sem);
        if (personality(current->personality) == PER_LINUX32) 
                err |= copy_to_user(&name->machine, "i686", 5);                 
index d63350c..5bb63a8 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -62,6 +62,24 @@ error:
 
 EXPORT_SYMBOL(inode_change_ok);
 
+int inode_setattr_flags(struct inode *inode, unsigned int flags)
+{
+       unsigned int oldflags, newflags;
+
+       oldflags = inode->i_flags;
+       newflags = oldflags & ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER);
+       if (flags & ATTR_FLAG_IMMUTABLE)
+               newflags |= S_IMMUTABLE;
+       if (flags & ATTR_FLAG_IUNLINK)
+               newflags |= S_IUNLINK;
+       if (flags & ATTR_FLAG_BARRIER)
+               newflags |= S_BARRIER;
+
+       if (oldflags ^ newflags)
+               inode->i_flags = newflags;
+       return 0;
+}
+
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
        unsigned int ia_valid = attr->ia_valid;
@@ -98,6 +116,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
                        mode &= ~S_ISGID;
                inode->i_mode = mode;
        }
+       if (ia_valid & ATTR_ATTR_FLAG)
+               inode_setattr_flags(inode, attr->ia_attr_flags);
        mark_inode_dirty(inode);
 out:
        return error;
index 7827c12..5552d27 100644 (file)
@@ -309,7 +309,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                (current->mm->start_brk = N_BSSADDR(ex));
        current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
        current->mm->mmap = NULL;
        compute_creds(bprm);
        current->flags &= ~PF_FORKNOEXEC;
index a67e6f5..fee8d14 100644 (file)
@@ -695,7 +695,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
        /* Do this so that we can load the interpreter, if need be.  We will
           change some of these later */
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
        current->mm->free_area_cache = TASK_UNMAPPED_BASE;
        retval = setup_arg_pages(bprm, executable_stack);
        if (retval < 0) {
index ddbc390..c8113cc 100644 (file)
@@ -651,7 +651,8 @@ static int load_flat_file(struct linux_binprm * bprm,
                current->mm->start_brk = datapos + data_len + bss_len;
                current->mm->brk = (current->mm->start_brk + 3) & ~3;
                current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
-               current->mm->rss = 0;
+               // current->mm->rss = 0;
+               vx_rsspages_sub(current->mm, current->mm->rss);
        }
 
        if (flags & FLAT_FLAG_KTRACE)
index cabf3cc..ac64fa5 100644 (file)
@@ -259,7 +259,8 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        create_som_tables(bprm);
 
        current->mm->start_stack = bprm->p;
-       current->mm->rss = 0;
+       // current->mm->rss = 0;
+       vx_rsspages_sub(current->mm, current->mm->rss);
 
 #if 0
        printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
index 1d49ef4..17b686a 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/devpts_fs.h>
+#include <linux/vinline.h>
 #include "xattr.h"
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
@@ -134,11 +135,21 @@ static struct dentry *get_node(int num)
        return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
 
+static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+       int ret = -EACCES;
+       
+       if (vx_check(inode->i_xid, VX_IDENT))
+               ret = vfs_permission(inode, mask);
+       return ret;
+}
+
 static struct inode_operations devpts_file_inode_operations = {
        .setxattr       = devpts_setxattr,
        .getxattr       = devpts_getxattr,
        .listxattr      = devpts_listxattr,
        .removexattr    = devpts_removexattr,
+       .permission     = devpts_permission,
 };
 
 int devpts_pty_new(struct tty_struct *tty)
@@ -162,6 +173,7 @@ int devpts_pty_new(struct tty_struct *tty)
        inode->i_gid = config.setgid ? config.gid : current->fsgid;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        init_special_inode(inode, S_IFCHR|config.mode, device);
+       inode->i_xid = vx_current_xid();
        inode->i_op = &devpts_file_inode_operations;
        inode->u.generic_ip = tty;
 
index f73d2c4..c10a7d9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -327,7 +327,8 @@ void put_dirty_page(struct task_struct *tsk, struct page *page,
        set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
        pte_chain = page_add_rmap(page, pte, pte_chain);
        pte_unmap(pte);
-       tsk->mm->rss++;
+       // tsk->mm->rss++;
+       vx_rsspages_inc(tsk->mm);
        spin_unlock(&tsk->mm->page_table_lock);
 
        /* no need for flush_tlb */
@@ -409,7 +410,8 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
        if (!mpnt)
                return -ENOMEM;
 
-       if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
+       if (security_vm_enough_memory(arg_size >> PAGE_SHIFT) ||
+               !vx_vmpages_avail(mm, arg_size >> PAGE_SHIFT)) {
                kmem_cache_free(vm_area_cachep, mpnt);
                return -ENOMEM;
        }
@@ -441,7 +443,9 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
                INIT_LIST_HEAD(&mpnt->shared);
                mpnt->vm_private_data = (void *) 0;
                insert_vm_struct(mm, mpnt);
-               mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+               vx_vmpages_sub(mm, mm->total_vm -
+                       ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
        }
 
        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
index cbd6ae8..a2ea40c 100644 (file)
@@ -579,7 +579,8 @@ got:
        inode->i_blocks = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        memset(ei->i_data, 0, sizeof(ei->i_data));
-       ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+       ei->i_flags = EXT2_I(dir)->i_flags &
+               ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL);
        if (S_ISLNK(mode))
                ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
        /* dirsync is only applied to directories */
index e7cc85d..9b8aeac 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
+#include <linux/vserver/xid.h>
 #include "ext2.h"
 #include "acl.h"
 
@@ -65,6 +66,8 @@ void ext2_put_inode(struct inode *inode)
                ext2_discard_prealloc(inode);
 }
 
+static void ext2_truncate_nocheck (struct inode * inode);
+
 /*
  * Called at the last iput() if i_nlink is zero.
  */
@@ -78,7 +81,7 @@ void ext2_delete_inode (struct inode * inode)
 
        inode->i_size = 0;
        if (inode->i_blocks)
-               ext2_truncate (inode);
+               ext2_truncate_nocheck(inode);
        ext2_free_inode (inode);
 
        return;
@@ -877,7 +880,7 @@ static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
                ext2_free_data(inode, p, q);
 }
 
-void ext2_truncate (struct inode * inode)
+static void ext2_truncate_nocheck(struct inode * inode)
 {
        u32 *i_data = EXT2_I(inode)->i_data;
        int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
@@ -894,8 +897,6 @@ void ext2_truncate (struct inode * inode)
                return;
        if (ext2_inode_is_fast_symlink(inode))
                return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
 
        ext2_discard_prealloc(inode);
 
@@ -1017,6 +1018,13 @@ Egdp:
        return ERR_PTR(-EIO);
 }
 
+void ext2_truncate (struct inode * inode)
+{
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return;
+       ext2_truncate_nocheck(inode);
+}
+
 void ext2_set_inode_flags(struct inode *inode)
 {
        unsigned int flags = EXT2_I(inode)->i_flags;
@@ -1028,6 +1036,10 @@ void ext2_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_APPEND;
        if (flags & EXT2_IMMUTABLE_FL)
                inode->i_flags |= S_IMMUTABLE;
+       if (flags & EXT2_IUNLINK_FL)
+               inode->i_flags |= S_IUNLINK;
+       if (flags & EXT2_BARRIER_FL)
+               inode->i_flags |= S_BARRIER;
        if (flags & EXT2_NOATIME_FL)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT2_DIRSYNC_FL)
@@ -1040,6 +1052,8 @@ void ext2_read_inode (struct inode * inode)
        ino_t ino = inode->i_ino;
        struct buffer_head * bh;
        struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
+       uid_t uid;
+       gid_t gid;
        int n;
 
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -1050,12 +1064,17 @@ void ext2_read_inode (struct inode * inode)
                goto bad_inode;
 
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
-       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
        if (!(test_opt (inode->i_sb, NO_UID32))) {
-               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
-               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
+       inode->i_uid = INOXID_UID(uid, gid);
+       inode->i_gid = INOXID_GID(uid, gid);
+       if (inode->i_sb->s_flags & MS_TAGXID)
+               inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid));
+
        inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
        inode->i_size = le32_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
@@ -1148,8 +1167,8 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
        struct ext2_inode_info *ei = EXT2_I(inode);
        struct super_block *sb = inode->i_sb;
        ino_t ino = inode->i_ino;
-       uid_t uid = inode->i_uid;
-       gid_t gid = inode->i_gid;
+       uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+       gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
        struct buffer_head * bh;
        struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
        int n;
@@ -1184,6 +1203,9 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
+#ifdef CONFIG_INOXID_GID32
+       raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
+#endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le32(inode->i_size);
        raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
@@ -1261,6 +1283,27 @@ int ext2_sync_inode(struct inode *inode)
        return sync_inode(inode, &wbc);
 }
 
+int ext2_setattr_flags(struct inode *inode, unsigned int flags)
+{
+       unsigned int oldflags, newflags;
+
+       oldflags = EXT2_I(inode)->i_flags;
+       newflags = oldflags &
+               ~(EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | EXT2_BARRIER_FL);       
+       if (flags & ATTR_FLAG_IMMUTABLE)
+               newflags |= EXT2_IMMUTABLE_FL;
+       if (flags & ATTR_FLAG_IUNLINK)
+               newflags |= EXT2_IUNLINK_FL;
+       if (flags & ATTR_FLAG_BARRIER)
+               newflags |= EXT2_BARRIER_FL;
+
+       if (oldflags ^ newflags) {
+               EXT2_I(inode)->i_flags = newflags;
+               inode->i_ctime = CURRENT_TIME;
+       }
+       return 0;
+}
+
 int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
        struct inode *inode = dentry->d_inode;
@@ -1275,6 +1318,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
                if (error)
                        return error;
        }
+       if (iattr->ia_valid & ATTR_ATTR_FLAG)
+               ext2_setattr_flags(inode, iattr->ia_attr_flags);
+
        inode_setattr(inode, iattr);
        if (iattr->ia_valid & ATTR_MODE)
                error = ext2_acl_chmod(inode);
index 3a06830..3e54fbe 100644 (file)
@@ -270,7 +270,7 @@ enum {
        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
        Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_tagxid,
        Opt_ignore, Opt_err,
 };
 
@@ -299,6 +299,7 @@ static match_table_t tokens = {
        {Opt_nouser_xattr, "nouser_xattr"},
        {Opt_acl, "acl"},
        {Opt_noacl, "noacl"},
+       {Opt_tagxid, "tagxid"},
        {Opt_ignore, "grpquota"},
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
@@ -362,6 +363,11 @@ static int parse_options (char * options,
                case Opt_nouid32:
                        set_opt (sbi->s_mount_opt, NO_UID32);
                        break;
+#ifndef CONFIG_INOXID_NONE
+               case Opt_tagxid:
+                       set_opt (sbi->s_mount_opt, TAG_XID);
+                       break;
+#endif
                case Opt_check:
 #ifdef CONFIG_EXT2_CHECK
                        set_opt (sbi->s_mount_opt, CHECK);
@@ -646,6 +652,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        if (!parse_options ((char *) data, sbi))
                goto failed_mount;
 
+       if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAG_XID)
+               sb->s_flags |= MS_TAGXID;
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
                 MS_POSIXACL : 0);
index ac238b2..624f0bb 100644 (file)
@@ -567,7 +567,8 @@ got:
        ei->i_dir_start_lookup = 0;
        ei->i_disksize = 0;
 
-       ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+       ei->i_flags = EXT3_I(dir)->i_flags &
+               ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL);
        if (S_ISLNK(mode))
                ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
        /* dirsync only applies to directories */
index b76259e..c1508b2 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/uio.h>
+#include <linux/vserver/xid.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -189,6 +190,8 @@ void ext3_put_inode(struct inode *inode)
                ext3_discard_prealloc(inode);
 }
 
+static void ext3_truncate_nocheck (struct inode *inode);
+
 /*
  * Called at the last iput() if i_nlink is zero.
  */
@@ -214,7 +217,7 @@ void ext3_delete_inode (struct inode * inode)
                handle->h_sync = 1;
        inode->i_size = 0;
        if (inode->i_blocks)
-               ext3_truncate(inode);
+               ext3_truncate_nocheck(inode);
        /*
         * Kill off the orphan record which ext3_truncate created.
         * AKPM: I think this can be inside the above `if'.
@@ -2112,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * ext3_truncate() run will find them and release them.
  */
 
-void ext3_truncate(struct inode * inode)
+void ext3_truncate_nocheck(struct inode * inode)
 {
        handle_t *handle;
        struct ext3_inode_info *ei = EXT3_I(inode);
@@ -2133,8 +2136,6 @@ void ext3_truncate(struct inode * inode)
                return;
        if (ext3_inode_is_fast_symlink(inode))
                return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
 
        ext3_discard_prealloc(inode);
 
@@ -2441,6 +2442,13 @@ has_buffer:
        return 0;
 }
 
+void ext3_truncate(struct inode * inode)
+{
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return;
+       ext3_truncate_nocheck(inode);
+}
+
 void ext3_set_inode_flags(struct inode *inode)
 {
        unsigned int flags = EXT3_I(inode)->i_flags;
@@ -2452,6 +2460,10 @@ void ext3_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_APPEND;
        if (flags & EXT3_IMMUTABLE_FL)
                inode->i_flags |= S_IMMUTABLE;
+       if (flags & EXT3_IUNLINK_FL)
+               inode->i_flags |= S_IUNLINK;
+       if (flags & EXT3_BARRIER_FL)
+               inode->i_flags |= S_BARRIER;
        if (flags & EXT3_NOATIME_FL)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT3_DIRSYNC_FL)
@@ -2465,6 +2477,8 @@ void ext3_read_inode(struct inode * inode)
        struct ext3_inode_info *ei = EXT3_I(inode);
        struct buffer_head *bh;
        int block;
+       uid_t uid;
+       gid_t gid;
 
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
        ei->i_acl = EXT3_ACL_NOT_CACHED;
@@ -2475,12 +2489,17 @@ void ext3_read_inode(struct inode * inode)
        bh = iloc.bh;
        raw_inode = ext3_raw_inode(&iloc);
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
-       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
        if(!(test_opt (inode->i_sb, NO_UID32))) {
-               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
-               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
+       inode->i_uid = INOXID_UID(uid, gid);
+       inode->i_gid = INOXID_GID(uid, gid);
+       if (inode->i_sb->s_flags & MS_TAGXID)
+               inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid));
+
        inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
        inode->i_size = le32_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
@@ -2588,6 +2607,8 @@ static int ext3_do_update_inode(handle_t *handle,
        struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
        struct ext3_inode_info *ei = EXT3_I(inode);
        struct buffer_head *bh = iloc->bh;
+       uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+       gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
        int err = 0, rc, block;
 
        /* For fields not not tracking in the in-memory inode,
@@ -2597,29 +2618,32 @@ static int ext3_do_update_inode(handle_t *handle,
 
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        if(!(test_opt(inode->i_sb, NO_UID32))) {
-               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
-               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
+               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
 /*
  * Fix up interoperability with old kernels. Otherwise, old inodes get
  * re-used with the upper 16 bits of the uid/gid intact
  */
                if(!ei->i_dtime) {
                        raw_inode->i_uid_high =
-                               cpu_to_le16(high_16_bits(inode->i_uid));
+                               cpu_to_le16(high_16_bits(uid));
                        raw_inode->i_gid_high =
-                               cpu_to_le16(high_16_bits(inode->i_gid));
+                               cpu_to_le16(high_16_bits(gid));
                } else {
                        raw_inode->i_uid_high = 0;
                        raw_inode->i_gid_high = 0;
                }
        } else {
                raw_inode->i_uid_low =
-                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
+                       cpu_to_le16(fs_high2lowuid(uid));
                raw_inode->i_gid_low =
-                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
+                       cpu_to_le16(fs_high2lowgid(gid));
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
+#ifdef CONFIG_INOXID_GID32
+       raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
+#endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le32(ei->i_disksize);
        raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
@@ -2741,6 +2765,44 @@ void ext3_write_inode(struct inode *inode, int wait)
        ext3_force_commit(inode->i_sb);
 }
 
+int ext3_setattr_flags(struct inode *inode, unsigned int flags)
+{
+       unsigned int oldflags, newflags;
+       int err = 0;
+
+       oldflags = EXT3_I(inode)->i_flags;
+       newflags = oldflags &
+               ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL);       
+       if (flags & ATTR_FLAG_IMMUTABLE)
+               newflags |= EXT3_IMMUTABLE_FL;
+       if (flags & ATTR_FLAG_IUNLINK)
+               newflags |= EXT3_IUNLINK_FL;
+       if (flags & ATTR_FLAG_BARRIER)
+               newflags |= EXT3_BARRIER_FL;
+
+       if (oldflags ^ newflags) {
+               handle_t *handle;
+               struct ext3_iloc iloc;
+
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               if (IS_SYNC(inode))
+                       handle->h_sync = 1;
+               err = ext3_reserve_inode_write(handle, inode, &iloc);
+               if (err)
+                       goto flags_err;
+               
+               EXT3_I(inode)->i_flags = newflags;
+               inode->i_ctime = CURRENT_TIME;
+
+               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+       flags_err:
+               ext3_journal_stop(handle);
+       }
+       return err;
+}
+
 /*
  * ext3_setattr()
  *
@@ -2812,6 +2874,12 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                ext3_journal_stop(handle);
        }
 
+       if (ia_valid & ATTR_ATTR_FLAG) {
+               rc = ext3_setattr_flags(inode, attr->ia_attr_flags);
+               if (!error)
+                       error = rc;
+       }
+
        rc = inode_setattr(inode, attr);
 
        /* If inode_setattr's call to ext3_truncate failed to get a
index 3681474..82c325f 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/time.h>
+#include <linux/vserver/xid.h>
 #include <asm/uaccess.h>
 
 
@@ -150,6 +151,38 @@ flags_err:
                        remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
                        return ret;
                }
+#endif
+#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
+       case EXT3_IOC_SETXID: {
+               handle_t *handle;
+               struct ext3_iloc iloc;
+               int xid;
+               int err;
+
+               /* fixme: if stealth, return -ENOTTY */
+               if (!capable(CAP_CONTEXT))
+                       return -EPERM;
+               if (IS_RDONLY(inode))
+                       return -EROFS;
+               if (!(inode->i_sb->s_flags & MS_TAGXID))
+                       return -ENOSYS;
+               if (get_user(xid, (int *) arg))
+                       return -EFAULT; 
+
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               err = ext3_reserve_inode_write(handle, inode, &iloc);
+               if (err)
+                       return err;
+
+               inode->i_xid = (xid & 0xFFFF);
+               inode->i_ctime = CURRENT_TIME;
+
+               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+               ext3_journal_stop(handle);
+               return err;
+       }
 #endif
        default:
                return -ENOTTY;
index 4bbb5a0..09d6032 100644 (file)
@@ -583,7 +583,7 @@ enum {
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
-       Opt_ignore, Opt_err,
+       Opt_tagxid, Opt_ignore, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -624,6 +624,7 @@ static match_table_t tokens = {
        {Opt_grpjquota, "grpjquota=%s"},
        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+       {Opt_tagxid, "tagxid"},
        {Opt_ignore, "grpquota"},
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
@@ -717,6 +718,11 @@ static int parse_options (char * options, struct super_block *sb,
                case Opt_nouid32:
                        set_opt (sbi->s_mount_opt, NO_UID32);
                        break;
+#ifndef CONFIG_INOXID_NONE
+               case Opt_tagxid:
+                       set_opt (sbi->s_mount_opt, TAG_XID);
+                       break;
+#endif
                case Opt_check:
 #ifdef CONFIG_EXT3_CHECK
                        set_opt (sbi->s_mount_opt, CHECK);
@@ -1287,6 +1293,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
        if (!parse_options ((char *) data, sb, &journal_inum, 0))
                goto failed_mount;
 
+       if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAG_XID)
+               sb->s_flags |= MS_TAGXID;
        sb->s_flags |= MS_ONE_SECOND;
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
index abad0aa..c8c39b7 100644 (file)
@@ -107,6 +107,8 @@ repeat:
        error = -EMFILE;
        if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
                goto out;
+       if (!vx_files_avail(1))
+               goto out;
 
        error = expand_files(files, newfd);
        if (error < 0)
@@ -139,6 +141,7 @@ static int dupfd(struct file *file, unsigned int start)
                FD_SET(fd, files->open_fds);
                FD_CLR(fd, files->close_on_exec);
                spin_unlock(&files->file_lock);
+               vx_openfd_inc(fd);
                fd_install(fd, file);
        } else {
                spin_unlock(&files->file_lock);
@@ -186,6 +189,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
        FD_SET(newfd, files->open_fds);
        FD_CLR(newfd, files->close_on_exec);
        spin_unlock(&files->file_lock);
+       vx_openfd_inc(newfd);
 
        if (tofree)
                filp_close(tofree, files);
index 5d56ec5..1894e3b 100644 (file)
@@ -87,6 +87,7 @@ static int old_max;
                        f->f_owner.lock = RW_LOCK_UNLOCKED;
                        /* f->f_version: 0 */
                        INIT_LIST_HEAD(&f->f_list);
+                       vx_files_inc(f);
                        return f;
                }
        }
@@ -184,6 +185,7 @@ void fastcall __fput(struct file *file)
        fops_put(file->f_op);
        if (file->f_mode & FMODE_WRITE)
                put_write_access(inode);
+       vx_files_dec(file);
        file_kill(file);
        file->f_dentry = NULL;
        file->f_vfsmnt = NULL;
index 282d86a..1ad2a71 100644 (file)
@@ -114,6 +114,11 @@ static struct inode *alloc_inode(struct super_block *sb)
                struct address_space * const mapping = &inode->i_data;
 
                inode->i_sb = sb;
+               if (sb->s_flags & MS_TAGXID)
+                       inode->i_xid = current->xid;
+               else
+                       inode->i_xid = 0;       /* maybe xid -1 would be better? */
+               // inode->i_dqh = dqhget(sb->s_dqh);
                inode->i_blkbits = sb->s_blocksize_bits;
                inode->i_flags = 0;
                atomic_set(&inode->i_count, 1);
@@ -133,6 +138,7 @@ static struct inode *alloc_inode(struct super_block *sb)
                inode->i_bdev = NULL;
                inode->i_cdev = NULL;
                inode->i_rdev = 0;
+               // inode->i_xid = 0;    /* maybe not too wise ... */
                inode->i_security = NULL;
                inode->dirtied_when = 0;
                if (security_inode_alloc(inode)) {
index 9737a0f..f84ba41 100644 (file)
@@ -9,10 +9,18 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/security.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/xid.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#ifdef CONFIG_VSERVER_LEGACY           
+extern int vx_proc_ioctl(struct inode *, struct file *,
+       unsigned int, unsigned long);
+#endif
+
 static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg)
 {
        int error;
@@ -119,6 +127,48 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
                        else
                                error = -ENOTTY;
                        break;
+#ifdef CONFIG_VSERVER_LEGACY           
+#ifndef CONFIG_INOXID_NONE
+               case FIOC_GETXID: {
+                       struct inode *inode = filp->f_dentry->d_inode;
+
+                       /* fixme: if stealth, return -ENOTTY */
+                       error = -EPERM;
+                       if (capable(CAP_CONTEXT))
+                               error = put_user(inode->i_xid, (int *) arg);
+                       break;
+               }
+               case FIOC_SETXID: {
+                       struct inode *inode = filp->f_dentry->d_inode;
+                       int xid;
+
+                       /* fixme: if stealth, return -ENOTTY */
+                       error = -EPERM;
+                       if (!capable(CAP_CONTEXT))
+                               break;
+                       error = -EROFS;
+                       if (IS_RDONLY(inode))
+                               break;
+                       error = -ENOSYS;
+                       if (!(inode->i_sb->s_flags & MS_TAGXID))
+                               break;
+                       error = -EFAULT;
+                       if (get_user(xid, (int *) arg))
+                               break;
+                       error = 0;
+                       inode->i_xid = (xid & 0xFFFF);
+                       inode->i_ctime = CURRENT_TIME;
+                       mark_inode_dirty(inode);
+                       break;  
+               }
+#endif
+               case FIOC_GETXFLG:
+               case FIOC_SETXFLG:
+                       error = -ENOTTY;
+                       if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC)
+                               error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
+                       break;
+#endif
                default:
                        error = -ENOTTY;
                        if (S_ISREG(filp->f_dentry->d_inode->i_mode))
index 865334f..848fbc8 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -3098,14 +3099,21 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+       uid_t uid;
+       gid_t gid;
 
        jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
        jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
 
        ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
        ip->i_nlink = le32_to_cpu(dip->di_nlink);
-       ip->i_uid = le32_to_cpu(dip->di_uid);
-       ip->i_gid = le32_to_cpu(dip->di_gid);
+
+       uid = le32_to_cpu(dip->di_uid);
+       gid = le32_to_cpu(dip->di_gid);
+       ip->i_uid = INOXID_UID(uid, gid);
+       ip->i_gid = INOXID_GID(uid, gid);
+       ip->i_xid = INOXID_XID(uid, gid, 0);
+       
        ip->i_size = le64_to_cpu(dip->di_size);
        ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
        ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
@@ -3156,6 +3164,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+       uid_t uid;
+       gid_t gid;
 
        dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
        dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
@@ -3164,8 +3174,11 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
        dip->di_size = cpu_to_le64(ip->i_size);
        dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
        dip->di_nlink = cpu_to_le32(ip->i_nlink);
-       dip->di_uid = cpu_to_le32(ip->i_uid);
-       dip->di_gid = cpu_to_le32(ip->i_gid);
+
+       uid = XIDINO_UID(ip->i_uid, ip->i_xid);
+       gid = XIDINO_GID(ip->i_gid, ip->i_xid);
+       dip->di_uid = cpu_to_le32(uid);
+       dip->di_gid = cpu_to_le32(gid);
        /*
         * mode2 is only needed for storing the higher order bits.
         * Trust i_mode for the lower order ones
index d2cab64..45b138d 100644 (file)
@@ -163,6 +163,9 @@ int vfs_permission(struct inode * inode, int mask)
 {
        umode_t                 mode = inode->i_mode;
 
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH))
+               return -EACCES;
+
        if (mask & MAY_WRITE) {
                /*
                 * Nobody gets write access to a read-only fs.
@@ -208,6 +211,15 @@ int vfs_permission(struct inode * inode, int mask)
        return -EACCES;
 }
 
+static inline int xid_permission(struct inode *inode)
+{
+       if (inode->i_xid == 0)
+               return 0;
+       if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+               return 0;
+       return -EACCES;
+}
+
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
        int retval;
@@ -216,6 +228,8 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
+       if ((retval = xid_permission(inode)))
+               return retval;
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
        else
@@ -1039,7 +1053,7 @@ static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
        if (IS_APPEND(dir))
                return -EPERM;
        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-           IS_IMMUTABLE(victim->d_inode))
+               IS_IXORUNLINK(victim->d_inode))
                return -EPERM;
        if (isdir) {
                if (!S_ISDIR(victim->d_inode->i_mode))
@@ -1833,7 +1847,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        /*
         * A link to an append-only or immutable file cannot be created.
         */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
                return -EPERM;
        if (!dir->i_op || !dir->i_op->link)
                return -EPERM;
index fb0a3ab..dfeac21 100644 (file)
@@ -232,6 +232,9 @@ static int show_vfsmnt(struct seq_file *m, void *v)
        };
        struct proc_fs_info *fs_infop;
 
+       if (vx_flags(VXF_HIDE_MOUNT, 0))
+               return 0;
+
        mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
        seq_putc(m, ' ');
        seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
@@ -273,18 +276,10 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-void umount_tree(struct vfsmount *mnt)
+static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill)
 {
-       struct vfsmount *p;
-       LIST_HEAD(kill);
-
-       for (p = mnt; p; p = next_mnt(p, mnt)) {
-               list_del(&p->mnt_list);
-               list_add(&p->mnt_list, &kill);
-       }
-
-       while (!list_empty(&kill)) {
-               mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+       while (!list_empty(kill)) {
+               mnt = list_entry(kill->next, struct vfsmount, mnt_list);
                list_del_init(&mnt->mnt_list);
                if (mnt->mnt_parent == mnt) {
                        spin_unlock(&vfsmount_lock);
@@ -299,6 +294,32 @@ void umount_tree(struct vfsmount *mnt)
        }
 }
 
+void umount_tree(struct vfsmount *mnt)
+{
+       struct vfsmount *p;
+       LIST_HEAD(kill);
+
+       for (p = mnt; p; p = next_mnt(p, mnt)) {
+               list_del(&p->mnt_list);
+               list_add(&p->mnt_list, &kill);
+       }
+       __umount_tree(mnt, &kill);
+}
+
+void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
+{
+       struct vfsmount *p;
+       LIST_HEAD(kill);
+
+       for (p = mnt; p; p = next_mnt(p, mnt)) {
+               if (p == fs->rootmnt || p == fs->pwdmnt)
+                       continue;
+               list_del(&p->mnt_list);
+               list_add(&p->mnt_list, &kill);
+       }
+       __umount_tree(mnt, &kill);
+}
+
 static int do_umount(struct vfsmount *mnt, int flags)
 {
        struct super_block * sb = mnt->mnt_sb;
@@ -396,7 +417,7 @@ asmlinkage long sys_umount(char __user * name, int flags)
                goto dput_and_out;
 
        retval = -EPERM;
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT))
                goto dput_and_out;
 
        retval = do_umount(nd.mnt, flags);
@@ -419,6 +440,8 @@ static int mount_is_safe(struct nameidata *nd)
 {
        if (capable(CAP_SYS_ADMIN))
                return 0;
+       if (vx_ccaps(VXC_SECURE_MOUNT))
+               return 0;
        return -EPERM;
 #ifdef notyet
        if (S_ISLNK(nd->dentry->d_inode->i_mode))
@@ -779,6 +802,9 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
                mnt_flags |= MNT_NOEXEC;
        flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
 
+       if (vx_ccaps(VXC_SECURE_MOUNT))
+               mnt_flags |= MNT_NODEV;
+
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
        if (retval)
index 8646159..a561819 100644 (file)
@@ -306,7 +306,7 @@ static int __init root_nfs_name(char *name)
        /* Override them by options set on kernel command-line */
        root_nfs_parse(name, buf);
 
-       cp = system_utsname.nodename;
+       cp = vx_new_uts(nodename);
        if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
                printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
                return -1;
index 66a77f5..1ded0f7 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -22,6 +22,7 @@
 #include <asm/uaccess.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
 
 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
 {
@@ -605,6 +606,9 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        dentry = file->f_dentry;
        inode = dentry->d_inode;
 
+       err = -EPERM;
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
+               goto out_putf;
        err = -EROFS;
        if (IS_RDONLY(inode))
                goto out_putf;
@@ -637,6 +641,10 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
                goto out;
        inode = nd.dentry->d_inode;
 
+       error = -EPERM;
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
+               goto dput_and_out;
+
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
@@ -676,14 +684,15 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
+
        newattrs.ia_valid =  ATTR_CTIME;
        if (user != (uid_t) -1) {
                newattrs.ia_valid |= ATTR_UID;
-               newattrs.ia_uid = user;
+               newattrs.ia_uid = vx_map_uid(user);
        }
        if (group != (gid_t) -1) {
                newattrs.ia_valid |= ATTR_GID;
-               newattrs.ia_gid = group;
+               newattrs.ia_gid = vx_map_gid(group);
        }
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
@@ -878,6 +887,7 @@ repeat:
        FD_SET(fd, files->open_fds);
        FD_CLR(fd, files->close_on_exec);
        files->next_fd = fd + 1;
+       vx_openfd_inc(fd);
 #if 1
        /* Sanity check */
        if (files->fd[fd] != NULL) {
@@ -1032,6 +1042,7 @@ asmlinkage long sys_close(unsigned int fd)
        FD_CLR(fd, files->close_on_exec);
        __put_unused_fd(files, fd);
        spin_unlock(&files->file_lock);
+       vx_openfd_dec(fd);
        return filp_close(filp, files);
 
 out_unlock:
index 6bdd15b..4a2cce7 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/highmem.h>
 #include <linux/file.h>
 #include <linux/times.h>
+#include <linux/ninline.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -128,7 +129,8 @@ static const char *task_state_array[] = {
        "D (disk sleep)",       /*  2 */
        "T (stopped)",          /*  4 */
        "Z (zombie)",           /*  8 */
-       "X (dead)"              /* 16 */
+       "X (dead)",             /* 16 */
+       "H (on hold)"           /* 32 */
 };
 
 static inline const char * get_task_state(struct task_struct *tsk)
@@ -137,7 +139,8 @@ static inline const char * get_task_state(struct task_struct *tsk)
                                           TASK_INTERRUPTIBLE |
                                           TASK_UNINTERRUPTIBLE |
                                           TASK_ZOMBIE |
-                                          TASK_STOPPED);
+                                          TASK_STOPPED |
+                                          TASK_ONHOLD);
        const char **p = &task_state_array[0];
 
        while (state) {
@@ -150,8 +153,10 @@ static inline const char * get_task_state(struct task_struct *tsk)
 static inline char * task_state(struct task_struct *p, char *buffer)
 {
        int g;
+       pid_t ppid;
 
        read_lock(&tasklist_lock);
+       ppid = vx_map_tgid(current->vx_info, p->real_parent->pid);
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "SleepAVG:\t%lu%%\n"
@@ -164,7 +169,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
                p->tgid,
-               p->pid, p->pid ? p->real_parent->pid : 0,
+               p->pid, p->pid ? ppid : 0,
                p->pid && p->ptrace ? p->parent->pid : 0,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
@@ -275,6 +280,10 @@ extern char *task_mem(struct mm_struct *, char *);
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
        char * orig = buffer;
+#ifdef CONFIG_VSERVER_LEGACY           
+       struct vx_info *vxi;
+       struct nx_info *nxi;
+#endif
        struct mm_struct *mm = get_task_mm(task);
 
        buffer = task_name(task, buffer);
@@ -286,6 +295,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        }
        buffer = task_sig(task, buffer);
        buffer = task_cap(task, buffer);
+
+#ifdef CONFIG_VSERVER_LEGACY           
+       buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
+       vxi = task_get_vx_info(task);
+       if (vxi) {
+               buffer += sprintf (buffer,"ctxflags: %08llx\n"
+                       ,vxi->vx_flags);
+               buffer += sprintf (buffer,"initpid: %d\n"
+                       ,vxi->vx_initpid);
+       } else {
+               buffer += sprintf (buffer,"ctxflags: none\n");
+               buffer += sprintf (buffer,"initpid: none\n");
+       }
+       put_vx_info(vxi);
+       nxi = task_get_nx_info(task);
+       if (nxi) {
+               int i;
+
+               buffer += sprintf (buffer,"ipv4root:");
+               for (i=0; i<nxi->nbipv4; i++){
+                       buffer += sprintf (buffer," %08x/%08x"
+                               ,nxi->ipv4[i]
+                               ,nxi->mask[i]);
+               }
+               *buffer++ = '\n';
+               buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
+                       ,nxi->v4_bcast);
+               buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
+                       ,atomic_read(&nxi->nx_refcount));
+       } else {
+               buffer += sprintf (buffer,"ipv4root: 0\n");
+               buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
+       }
+       put_nx_info(nxi);
+#endif
 #if defined(CONFIG_ARCH_S390)
        buffer = task_show_regs(task, buffer);
 #endif
@@ -297,6 +341,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
 {
        unsigned long vsize, eip, esp, wchan;
        long priority, nice;
+       unsigned long long bias_jiffies;
        int tty_pgrp = -1, tty_nr = 0;
        sigset_t sigign, sigcatch;
        char state;
@@ -308,7 +353,16 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
+       bias_jiffies = INITIAL_JIFFIES;
+
        task_lock(task);
+       if (__vx_task_flags(task, VXF_VIRT_UPTIME, 0)) {
+               bias_jiffies = task->vx_info->cvirt.bias_jiffies;
+               /* hmm, do we need that? */
+               if (bias_jiffies > task->start_time)
+                       bias_jiffies = task->start_time;
+       }
+
        mm = task->mm;
        if(mm)
                mm = mmgrab(mm);
@@ -352,7 +406,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
        read_unlock(&tasklist_lock);
 
        /* Temporary variable needed for gcc-2.96 */
-       start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES);
+       start_time = jiffies_64_to_clock_t(task->start_time - bias_jiffies);
 
        res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
index 7ff742c..353407b 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/ninline.h>
 
 /*
  * For hysterical raisins we keep the same inumbers as in the old procfs.
@@ -67,6 +68,8 @@ enum pid_directory_inos {
        PROC_TGID_ATTR_EXEC,
        PROC_TGID_ATTR_FSCREATE,
 #endif
+       PROC_TGID_VX_INFO,
+       PROC_TGID_IP_INFO,
        PROC_TGID_FD_DIR,
        PROC_TID_INO,
        PROC_TID_STATUS,
@@ -90,6 +93,8 @@ enum pid_directory_inos {
        PROC_TID_ATTR_EXEC,
        PROC_TID_ATTR_FSCREATE,
 #endif
+       PROC_TID_VX_INFO,
+       PROC_TID_IP_INFO,
        PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
 };
 
@@ -123,6 +128,8 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_KALLSYMS
        E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
 #endif
+       E(PROC_TGID_VX_INFO,   "vinfo",   S_IFREG|S_IRUGO),
+       E(PROC_TGID_IP_INFO,   "ninfo",   S_IFREG|S_IRUGO),
        {0,0,NULL,0}
 };
 static struct pid_entry tid_base_stuff[] = {
@@ -145,6 +152,8 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_KALLSYMS
        E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
 #endif
+       E(PROC_TID_VX_INFO,    "vinfo",   S_IFREG|S_IRUGO),
+       E(PROC_TID_IP_INFO,    "ninfo",   S_IFREG|S_IRUGO),
        {0,0,NULL,0}
 };
 
@@ -954,6 +963,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
                inode->i_uid = task->euid;
                inode->i_gid = task->egid;
        }
+       inode->i_xid = vx_task_xid(task);
        security_task_to_inode(task, inode);
 
 out:
@@ -979,6 +989,11 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
        struct task_struct *task = proc_task(inode);
+
+       if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+               goto out_drop;
+       /* discard wrong fakeinit */
+
        if (pid_alive(task)) {
                if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
                        inode->i_uid = task->euid;
@@ -990,6 +1005,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
                security_task_to_inode(task, inode);
                return 1;
        }
+out_drop:
        d_drop(dentry);
        return 0;
 }
@@ -1375,6 +1391,16 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
                        ei->op.proc_read = proc_pid_wchan;
                        break;
 #endif
+               case PROC_TID_VX_INFO:
+               case PROC_TGID_VX_INFO:
+                       inode->i_fop = &proc_info_file_operations;
+                       ei->op.proc_read = proc_pid_vx_info;
+                       break;
+               case PROC_TID_IP_INFO:
+               case PROC_TGID_IP_INFO:
+                       inode->i_fop = &proc_info_file_operations;
+                       ei->op.proc_read = proc_pid_nx_info;
+                       break;
                default:
                        printk("procfs: impossible type (%d)",p->type);
                        iput(inode);
@@ -1555,7 +1581,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
                d_add(dentry, inode);
                return NULL;
        }
-       tgid = name_to_int(dentry);
+       tgid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
        if (tgid == ~0U)
                goto out;
 
@@ -1567,8 +1593,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        if (!task)
                goto out;
 
-       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
-
+       inode = NULL;
+       if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+               inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
 
        if (!inode) {
                put_task_struct(task);
@@ -1610,10 +1637,12 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        struct inode *inode;
        unsigned tid;
 
-       tid = name_to_int(dentry);
+       tid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
        if (tid == ~0U)
                goto out;
 
+/*     handle fakeinit */
+
        read_lock(&tasklist_lock);
        task = find_task_by_pid(tid);
        if (task)
@@ -1624,8 +1653,9 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        if (leader->tgid != task->tgid)
                goto out_drop_task;
 
-       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
-
+       inode = NULL;
+       if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+               inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
 
        if (!inode)
                goto out_drop_task;
@@ -1676,11 +1706,14 @@ static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
 
        for ( ; p != &init_task; p = next_task(p)) {
                int tgid = p->pid;
+
                if (!pid_alive(p))
                        continue;
+               if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT))
+                       continue;
                if (--index >= 0)
                        continue;
-               tgids[nr_tgids] = tgid;
+               tgids[nr_tgids] = vx_map_tgid(current->vx_info, tgid);
                nr_tgids++;
                if (nr_tgids >= PROC_MAXPIDS)
                        break;
@@ -1710,9 +1743,11 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
        if (pid_alive(task)) do {
                int tid = task->pid;
 
+               if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+                       continue;
                if (--index >= 0)
                        continue;
-               tids[nr_tids] = tid;
+               tids[nr_tids] = vx_map_tgid(current->vx_info, tid);
                nr_tids++;
                if (nr_tids >= PROC_MAXPIDS)
                        break;
@@ -1766,11 +1801,14 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
        unsigned int nr_tids, i;
        struct dentry *dentry = filp->f_dentry;
        struct inode *inode = dentry->d_inode;
+       struct task_struct *task = proc_task(inode);
        int retval = -ENOENT;
        ino_t ino;
        unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
 
-       if (!pid_alive(proc_task(inode)))
+       if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+               goto out;
+       if (!pid_alive(task))
                goto out;
        retval = 0;
 
index d2c88eb..21e06c9 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/idr.h>
+#include <linux/vinline.h>
+#include <linux/vserver/inode.h>
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
 
@@ -348,8 +350,15 @@ static int proc_delete_dentry(struct dentry * dentry)
        return 1;
 }
 
+static int proc_revalidate_dentry(struct dentry *de, struct nameidata *nd)
+{
+       /* maybe add a check if it's really necessary? */
+       return 0;
+}
+
 static struct dentry_operations proc_dentry_operations =
 {
+       .d_revalidate   = proc_revalidate_dentry,
        .d_delete       = proc_delete_dentry,
 };
 
@@ -369,6 +378,8 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
                for (de = de->subdir; de ; de = de->next) {
                        if (de->namelen != dentry->d_name.len)
                                continue;
+                       if (!vx_hide_check(0, de->vx_flags))
+                               continue;
                        if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
                                unsigned int ino = de->low_ino;
 
@@ -445,9 +456,12 @@ int proc_readdir(struct file * filp,
                        }
 
                        do {
+                               if (!vx_hide_check(0, de->vx_flags))
+                                       goto skip;
                                if (filldir(dirent, de->name, de->namelen, filp->f_pos,
                                            de->low_ino, de->mode >> 12) < 0)
                                        goto out;
+                       skip:
                                filp->f_pos++;
                                de = de->next;
                        } while (de);
@@ -559,6 +573,7 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
        ent->namelen = len;
        ent->mode = mode;
        ent->nlink = nlink;
+       ent->vx_flags = IATTR_PROC_DEFAULT;
  out:
        return ent;
 }
@@ -579,7 +594,8 @@ struct proc_dir_entry *proc_symlink(const char *name,
                                kfree(ent->data);
                                kfree(ent);
                                ent = NULL;
-                       }
+                       } else
+                               ent->vx_flags = IATTR_PROC_SYMLINK;
                } else {
                        kfree(ent);
                        ent = NULL;
index 2d38f02..bf090da 100644 (file)
@@ -211,6 +211,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
                        inode->i_uid = de->uid;
                        inode->i_gid = de->gid;
                }
+               if (de->vx_flags)
+                       PROC_I(inode)->vx_flags = de->vx_flags;
                if (de->size)
                        inode->i_size = de->size;
                if (de->nlink)
index d6b65c0..0b4de43 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/vinline.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -144,6 +145,9 @@ static int uptime_read_proc(char *page, char **start, off_t off,
 
        do_posix_clock_monotonic_gettime(&uptime);
        jiffies_to_timespec(idle_jiffies, &idle);
+       if (vx_flags(VXF_VIRT_UPTIME, 0))
+               vx_vsi_uptime(&uptime, &idle);
+
        len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
                        (unsigned long) uptime.tv_sec,
                        (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
index bf4b5d2..c84e88f 100644 (file)
@@ -23,6 +23,9 @@ struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver;
 #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
 #endif
+struct proc_dir_entry *proc_virtual;
+
+extern void proc_vx_init(void);
 
 static struct super_block *proc_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
@@ -75,6 +78,7 @@ void __init proc_root_init(void)
        proc_device_tree_init();
 #endif
        proc_bus = proc_mkdir("bus", 0);
+       proc_vx_init();
 }
 
 static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
index 6fa949e..7eba8fd 100644 (file)
@@ -97,10 +97,35 @@ static int reiserfs_sync_file(
   return ( n_err < 0 ) ? -EIO : 0;
 }
 
-static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
+int reiserfs_setattr_flags(struct inode *inode, unsigned int flags)
+{
+       unsigned int oldflags, newflags;
+
+       oldflags = REISERFS_I(inode)->i_flags;
+       newflags = oldflags & ~(REISERFS_IMMUTABLE_FL |
+               REISERFS_IUNLINK_FL | REISERFS_BARRIER_FL);
+       if (flags & ATTR_FLAG_IMMUTABLE)
+               newflags |= REISERFS_IMMUTABLE_FL;
+       if (flags & ATTR_FLAG_IUNLINK)
+               newflags |= REISERFS_IUNLINK_FL;
+       if (flags & ATTR_FLAG_BARRIER)
+               newflags |= REISERFS_BARRIER_FL;
+
+       if (oldflags ^ newflags) {
+               REISERFS_I(inode)->i_flags = newflags;
+               inode->i_ctime = CURRENT_TIME;
+       }
+       return 0;
+}
+
+int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
     struct inode *inode = dentry->d_inode ;
     int error ;
+
     reiserfs_write_lock(inode->i_sb);
+    if (S_ISDIR(inode->i_mode))
+       goto is_dir;
+
     if (attr->ia_valid & ATTR_SIZE) {
        /* version 2 items will be caught by the s_maxbytes check
        ** done for us in vmtruncate
@@ -133,7 +158,12 @@ static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
            goto out;   
        }
 
+is_dir:
     error = inode_change_ok(inode, attr) ;
+
+    if (!error && attr->ia_valid & ATTR_ATTR_FLAG)
+       reiserfs_setattr_flags(inode, attr->ia_attr_flags);
+
     if (!error)
         inode_setattr(inode, attr) ;
 
index 6aded3b..ce9c3d8 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/writeback.h>
+#include <linux/vserver/xid.h>
 
 extern int reiserfs_default_io_size; /* default io size devuned in super.c */
 
@@ -957,6 +958,8 @@ static void init_inode (struct inode * inode, struct path * path)
     struct buffer_head * bh;
     struct item_head * ih;
     __u32 rdev;
+    uid_t uid;
+    gid_t gid;
     //int version = ITEM_VERSION_1;
 
     bh = PATH_PLAST_BUFFER (path);
@@ -977,12 +980,13 @@ static void init_inode (struct inode * inode, struct path * path)
        struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
        unsigned long blocks;
 
+       uid = sd_v1_uid(sd);
+       gid = sd_v1_gid(sd);
+
        set_inode_item_key_version (inode, KEY_FORMAT_3_5);
         set_inode_sd_version (inode, STAT_DATA_V1);
        inode->i_mode  = sd_v1_mode(sd);
        inode->i_nlink = sd_v1_nlink(sd);
-       inode->i_uid   = sd_v1_uid(sd);
-       inode->i_gid   = sd_v1_gid(sd);
        inode->i_size  = sd_v1_size(sd);
        inode->i_atime.tv_sec = sd_v1_atime(sd);
        inode->i_mtime.tv_sec = sd_v1_mtime(sd);
@@ -1014,11 +1018,12 @@ static void init_inode (struct inode * inode, struct path * path)
        // (directories and symlinks)
        struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
 
+       uid    = sd_v2_uid(sd);
+       gid    = sd_v2_gid(sd);
+
        inode->i_mode   = sd_v2_mode(sd);
        inode->i_nlink  = sd_v2_nlink(sd);
-       inode->i_uid    = sd_v2_uid(sd);
        inode->i_size   = sd_v2_size(sd);
-       inode->i_gid    = sd_v2_gid(sd);
        inode->i_mtime.tv_sec  = sd_v2_mtime(sd);
        inode->i_atime.tv_sec = sd_v2_atime(sd);
        inode->i_ctime.tv_sec  = sd_v2_ctime(sd);
@@ -1043,6 +1048,9 @@ static void init_inode (struct inode * inode, struct path * path)
        REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd );
        sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
     }
+    inode->i_uid = INOXID_UID(uid, gid);
+    inode->i_gid = INOXID_GID(uid, gid);
+    inode->i_xid = INOXID_XID(uid, gid, 0);
 
     pathrelse (path);
     if (S_ISREG (inode->i_mode)) {
@@ -1066,13 +1074,15 @@ static void init_inode (struct inode * inode, struct path * path)
 static void inode2sd (void * sd, struct inode * inode)
 {
     struct stat_data * sd_v2 = (struct stat_data *)sd;
+    uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+    gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
     __u16 flags;
 
+    set_sd_v2_uid(sd_v2, uid );
+    set_sd_v2_gid(sd_v2, gid );
     set_sd_v2_mode(sd_v2, inode->i_mode );
     set_sd_v2_nlink(sd_v2, inode->i_nlink );
-    set_sd_v2_uid(sd_v2, inode->i_uid );
     set_sd_v2_size(sd_v2, inode->i_size );
-    set_sd_v2_gid(sd_v2, inode->i_gid );
     set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec );
     set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec );
     set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec );
@@ -2326,6 +2336,14 @@ void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
                        inode -> i_flags |= S_IMMUTABLE;
                else
                        inode -> i_flags &= ~S_IMMUTABLE;
+               if( sd_attrs & REISERFS_IUNLINK_FL )
+                       inode -> i_flags |= S_IUNLINK;
+               else
+                       inode -> i_flags &= ~S_IUNLINK;
+               if( sd_attrs & REISERFS_BARRIER_FL )
+                       inode -> i_flags |= S_BARRIER;
+               else
+                       inode -> i_flags &= ~S_BARRIER;
                if( sd_attrs & REISERFS_APPEND_FL )
                        inode -> i_flags |= S_APPEND;
                else
@@ -2348,6 +2366,14 @@ void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
                        *sd_attrs |= REISERFS_IMMUTABLE_FL;
                else
                        *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
+               if( inode -> i_flags & S_IUNLINK )
+                       *sd_attrs |= REISERFS_IUNLINK_FL;
+               else
+                       *sd_attrs &= ~REISERFS_IUNLINK_FL;
+               if( inode -> i_flags & S_BARRIER )
+                       *sd_attrs |= REISERFS_BARRIER_FL;
+               else
+                       *sd_attrs &= ~REISERFS_BARRIER_FL;
                if( inode -> i_flags & S_SYNC )
                        *sd_attrs |= REISERFS_SYNC_FL;
                else
index ec59e07..eaddaf9 100644 (file)
@@ -20,7 +20,7 @@
 int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                unsigned long arg)
 {
-       unsigned int flags;
+       unsigned int flags, oldflags;
 
        switch (cmd) {
            case REISERFS_IOC_UNPACK:
@@ -36,6 +36,7 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
        case REISERFS_IOC_GETFLAGS:
                flags = REISERFS_I(inode) -> i_attrs;
                i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags );
+               flags &= REISERFS_FL_USER_VISIBLE;
                return put_user(flags, (int *) arg);
        case REISERFS_IOC_SETFLAGS: {
                if (IS_RDONLY(inode))
@@ -47,7 +48,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                if (get_user(flags, (int *) arg))
                        return -EFAULT;
 
-               if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) &&
+               oldflags = REISERFS_I(inode) -> i_attrs;
+               if ( ( ( flags ^ oldflags) &
+                  ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) &&
                     !capable( CAP_LINUX_IMMUTABLE ) )
                        return -EPERM;
                        
@@ -59,6 +62,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                                if( result )
                                        return result;
                }
+               
+               flags = flags & REISERFS_FL_USER_MODIFYABLE;
+               flags |= oldflags & ~REISERFS_FL_USER_MODIFYABLE;
                sd_attrs_to_i_attrs( flags, inode );
                REISERFS_I(inode) -> i_attrs = flags;
                inode->i_ctime = CURRENT_TIME;
index 5dae18f..827b64f 100644 (file)
@@ -1318,5 +1318,6 @@ struct inode_operations reiserfs_dir_inode_operations = {
     .rmdir     = reiserfs_rmdir,
     .mknod     = reiserfs_mknod,
     .rename    = reiserfs_rename,
+    .setattr   = reiserfs_setattr,
 };
 
index d6402d7..df6330c 100644 (file)
@@ -1017,6 +1017,8 @@ xfs_ioc_fsgeometry(
 #define LINUX_XFLAG_APPEND     0x00000020 /* writes to file may only append */
 #define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
 #define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
+#define LINUX_XFLAG_BARRIER    0x00004000 /* chroot() barrier */
+#define LINUX_XFLAG_IUNLINK    0x00008000 /* Immutable unlink */
 
 STATIC unsigned int
 xfs_merge_ioc_xflags(
@@ -1062,6 +1064,7 @@ xfs_ioc_xattr(
        int                     error;
        int                     attr_flags;
        unsigned int            flags;
+       unsigned int            old_flags;
 
        switch (cmd) {
        case XFS_IOC_FSGETXATTR: {
@@ -1086,7 +1089,7 @@ xfs_ioc_xattr(
                attr_flags = 0;
                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                        attr_flags |= ATTR_NONBLOCK;
-
+               
                va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
                va.va_xflags  = fa.fsx_xflags;
                va.va_extsize = fa.fsx_extsize;
@@ -1114,15 +1117,17 @@ xfs_ioc_xattr(
 
        case XFS_IOC_GETXFLAGS: {
                flags = 0;
-               if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE)
+               if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
                        flags |= LINUX_XFLAG_IMMUTABLE;
-               if (ip->i_d.di_flags & XFS_XFLAG_APPEND)
+               if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+                       flags |= LINUX_XFLAG_IUNLINK;
+               if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
                        flags |= LINUX_XFLAG_APPEND;
-               if (ip->i_d.di_flags & XFS_XFLAG_SYNC)
+               if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
                        flags |= LINUX_XFLAG_SYNC;
-               if (ip->i_d.di_flags & XFS_XFLAG_NOATIME)
+               if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
                        flags |= LINUX_XFLAG_NOATIME;
-               if (ip->i_d.di_flags & XFS_XFLAG_NODUMP)
+               if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
                        flags |= LINUX_XFLAG_NODUMP;
                if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags)))
                        return -XFS_ERROR(EFAULT);
@@ -1142,8 +1147,16 @@ xfs_ioc_xattr(
                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                        attr_flags |= ATTR_NONBLOCK;
 
+               old_flags = 0;
+               if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+                       old_flags |= LINUX_XFLAG_IMMUTABLE;
+               if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+                       old_flags |= LINUX_XFLAG_IUNLINK;
+               if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+                       old_flags |= LINUX_XFLAG_BARRIER;
+
                va.va_mask = XFS_AT_XFLAGS;
-               va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags);
+               va.va_xflags = xfs_merge_ioc_xflags(flags, old_flags);
 
                VOP_SETATTR(vp, &va, attr_flags, NULL, error);
                if (!error)
index 4b3e61d..dcc8682 100644 (file)
@@ -490,6 +490,28 @@ linvfs_getattr(
        return 0;
 }
 
+STATIC int
+linvfs_setattr_flags(
+       vattr_t *vap,
+       unsigned int flags)
+{
+       unsigned int oldflags, newflags;
+
+       oldflags = vap->va_xflags;
+       newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE |
+               XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER);
+       if (flags & ATTR_FLAG_IMMUTABLE)
+               newflags |= XFS_XFLAG_IMMUTABLE;
+       if (flags & ATTR_FLAG_IUNLINK)
+               newflags |= XFS_XFLAG_IUNLINK;
+       if (flags & ATTR_FLAG_BARRIER)
+               newflags |= XFS_XFLAG_BARRIER;
+
+       if (oldflags ^ newflags)
+               vap->va_xflags = newflags;
+       return 0;
+}
+
 STATIC int
 linvfs_setattr(
        struct dentry   *dentry,
@@ -541,6 +563,11 @@ linvfs_setattr(
                flags |= ATTR_NONBLOCK;
 #endif
 
+       if (ia_valid & ATTR_ATTR_FLAG) {
+               vattr.va_mask |= XFS_AT_XFLAGS;
+               linvfs_setattr_flags(&vattr, attr->ia_attr_flags);
+       }
+
        VOP_SETATTR(vp, &vattr, flags, NULL, error);
        if (error)
                return(-error); /* Positive error up from XFS */
index bbaf61b..b0efce8 100644 (file)
@@ -189,6 +189,14 @@ xfs_revalidate_inode(
                inode->i_flags |= S_IMMUTABLE;
        else
                inode->i_flags &= ~S_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+               inode->i_flags |= S_IUNLINK;
+       else
+               inode->i_flags &= ~S_IUNLINK;
+       if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+               inode->i_flags |= S_BARRIER;
+       else
+               inode->i_flags &= ~S_BARRIER;
        if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
                inode->i_flags |= S_APPEND;
        else
index 9240efb..44ba5e5 100644 (file)
@@ -217,6 +217,14 @@ vn_revalidate(
                        inode->i_flags |= S_IMMUTABLE;
                else
                        inode->i_flags &= ~S_IMMUTABLE;
+               if (va.va_xflags & XFS_XFLAG_IUNLINK)
+                       inode->i_flags |= S_IUNLINK;
+               else
+                       inode->i_flags &= ~S_IUNLINK;
+               if (va.va_xflags & XFS_XFLAG_BARRIER)
+                       inode->i_flags |= S_BARRIER;
+               else
+                       inode->i_flags &= ~S_BARRIER;
                if (va.va_xflags & XFS_XFLAG_APPEND)
                        inode->i_flags |= S_APPEND;
                else
index e0b529b..f8f6e0f 100644 (file)
@@ -456,6 +456,9 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_SYNC_BIT      5     /* inode is written synchronously */
 #define XFS_DIFLAG_NOATIME_BIT   6     /* do not update atime */
 #define XFS_DIFLAG_NODUMP_BIT    7     /* do not dump */
+#define XFS_DIFLAG_BARRIER_BIT   10    /* chroot() barrier */
+#define XFS_DIFLAG_IUNLINK_BIT   11    /* inode has iunlink */
+
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -464,5 +467,8 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_SYNC          (1 << XFS_DIFLAG_SYNC_BIT)
 #define XFS_DIFLAG_NOATIME       (1 << XFS_DIFLAG_NOATIME_BIT)
 #define XFS_DIFLAG_NODUMP        (1 << XFS_DIFLAG_NODUMP_BIT)
+#define XFS_DIFLAG_BARRIER       (1 << XFS_DIFLAG_BARRIER_BIT)
+#define XFS_DIFLAG_IUNLINK       (1 << XFS_DIFLAG_IUNLINK_BIT)
+
 
 #endif /* __XFS_DINODE_H__ */
index 1ed650e..7e1b82c 100644 (file)
@@ -76,6 +76,8 @@ struct fsxattr {
 #define XFS_XFLAG_SYNC         0x00000020      /* all writes synchronous */
 #define XFS_XFLAG_NOATIME      0x00000040      /* do not update access time */
 #define XFS_XFLAG_NODUMP       0x00000080      /* do not include in backups */
+#define XFS_XFLAG_BARRIER      0x00004000      /* chroot() barrier */
+#define XFS_XFLAG_IUNLINK      0x00008000      /* Immutable unlink */
 #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
 
 /*
index dd20a0a..550986f 100644 (file)
@@ -255,6 +255,10 @@ xfs_getattr(
                vap->va_xflags |= XFS_XFLAG_PREALLOC;
        if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
                vap->va_xflags |= XFS_XFLAG_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+               vap->va_xflags |= XFS_XFLAG_IUNLINK;
+       if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+               vap->va_xflags |= XFS_XFLAG_BARRIER;
        if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
                vap->va_xflags |= XFS_XFLAG_APPEND;
        if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
@@ -850,6 +854,10 @@ xfs_setattr(
                        }
                        if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
                                ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
+                       if (vap->va_xflags & XFS_XFLAG_IUNLINK)
+                               ip->i_d.di_flags |= XFS_DIFLAG_IUNLINK;
+                       if (vap->va_xflags & XFS_XFLAG_BARRIER)
+                               ip->i_d.di_flags |= XFS_DIFLAG_BARRIER;
                        if (vap->va_xflags & XFS_XFLAG_APPEND)
                                ip->i_d.di_flags |= XFS_DIFLAG_APPEND;
                        if (vap->va_xflags & XFS_XFLAG_SYNC)
index 7e65aa4..a898e27 100644 (file)
 #define __NR_osf_memcntl       260     /* not implemented */
 #define __NR_osf_fdatasync     261     /* not implemented */
 
+#define __NR_vserver           273
 
 /*
  * Linux-specific system calls begin at 300
index ab3cad4..a21e6a0 100644 (file)
@@ -58,7 +58,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
        if (rss < freed)
                freed = rss;
-       mm->rss = rss - freed;
+       // mm->rss = rss - freed;
+       vx_rsspages_sub(mm, freed);
 
        if (freed) {
                flush_tlb_mm(mm);
index ee6d11d..214185e 100644 (file)
@@ -38,7 +38,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
         if (rss < freed)
                 freed = rss;
-        mm->rss = rss - freed;
+        // mm->rss = rss - freed;
+       vx_rsspages_sub(mm, freed);
 
         if (freed) {
                 flush_tlb_mm(mm);
index 1358c51..aa7d3c0 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/config.h>
 #include <linux/swap.h>
+#include <linux/vinline.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -91,7 +92,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
        if (rss < freed)
                freed = rss;
-       mm->rss = rss - freed;
+       // mm->rss = rss - freed;
+       vx_rsspages_sub(mm, freed);
        tlb_flush_mmu(tlb, start, end);
 
        /* keep the page table cache within bounds */
index 5d714b2..f1ff709 100644 (file)
@@ -163,7 +163,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
        if (rss < freed)
                freed = rss;
-       mm->rss = rss - freed;
+       // mm->rss = rss - freed;
+       vx_rsspages_sub(mm, freed);
        /*
         * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
         * tlb->end_addr.
index 0cc0b9f..429e617 100644 (file)
 #define __NR_fremovexattr      234
 #define __NR_futex             235
 
-#define NR_syscalls            236
+#define __NR_vserver           273
+
+#define NR_syscalls            274
 
 /* user-visible error numbers are in the range -1 - -124: see
    <asm-m68k/errno.h> */
index c58c979..c6a5208 100644 (file)
 #define __NR_setfsuid32                215
 #define __NR_setfsgid32                216
 
-#define        NR_syscalls             256
+#define __NR_vserver           273
+
+#define NR_syscalls            274
 
 /* user-visible error numbers are in the range -1 - -122: see
    <asm-m68k/errno.h> */
index 8fe42c7..bdf0c3d 100644 (file)
 #define __NR_tgkill                    (__NR_Linux + 266)
 #define __NR_utimes                    (__NR_Linux + 267)
 
+#define __NR_vserver                   (__NR_Linux + 273)
+
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls            267
+#define __NR_Linux_syscalls            273
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
index d810f2b..c4a36aa 100644 (file)
 #define __NR_remap_file_pages  (__NR_Linux + 227)
 #define __NR_semtimedop                (__NR_Linux + 228)
 
+#define __NR_vserver           (__NR_Linux + 273)
 
-#define __NR_Linux_syscalls     228
+#define __NR_Linux_syscalls     273
 
 #define HPUX_GATEWAY_ADDR       0xC0000004
 #define LINUX_GATEWAY_ADDR      0x100
index 6ef6c95..aa19077 100644 (file)
 #define __NR_fadvise64_64      254
 #define __NR_rtas              255
 /* Number 256 is reserved for sys_debug_setcontext */
-/* Number 257 is reserved for vserver */
+#define __NR_vserver           257
 /* Number 258 is reserved for new sys_remap_file_pages */
 /* Number 259 is reserved for new sys_mbind */
 /* Number 260 is reserved for new sys_get_mempolicy */
index 8b3e5d5..6604ae7 100644 (file)
 #define __NR_fadvise64_64      254
 #define __NR_rtas              255
 /* Number 256 is reserved for sys_debug_setcontext */
-/* Number 257 is reserved for vserver */
+#define __NR_vserver           257
 /* Number 258 is reserved for new sys_remap_file_pages */
 /* Number 259 is reserved for new sys_mbind */
 /* Number 260 is reserved for new sys_get_mempolicy */
index c15e6d7..27b8384 100644 (file)
 #define __NR_clock_gettime     (__NR_timer_create+6)
 #define __NR_clock_getres      (__NR_timer_create+7)
 #define __NR_clock_nanosleep   (__NR_timer_create+8)
-/* Number 263 is reserved for vserver */
+#define __NR_vserver           263
 #define __NR_fadvise64_64      264
 #define __NR_statfs64          265
 #define __NR_fstatfs64         266
index 4c0c9fb..a28b621 100644 (file)
 #define __NR_timer_getoverrun  264
 #define __NR_timer_delete      265
 #define __NR_timer_create      266
-/* #define __NR_vserver                267 Reserved for VSERVER */
+#define __NR_vserver           267
 #define __NR_io_setup          268
 #define __NR_io_destroy                269
 #define __NR_io_submit         270
index 751f7de..fa7c78a 100644 (file)
 #define __NR_timer_getoverrun  264
 #define __NR_timer_delete      265
 #define __NR_timer_create      266
-/* #define __NR_vserver                267 Reserved for VSERVER */
+#define __NR_vserver           267
 #define __NR_io_setup          268
 #define __NR_io_destroy                269
 #define __NR_io_submit         270
index c96e7b6..7798d2c 100644 (file)
@@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t;
 /* Allow enabling/disabling tagged queuing on SCSI controllers and sending
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
+/* Allow the selection of a security context */
 
 #define CAP_SYS_ADMIN        21
 
@@ -284,6 +285,11 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_LEASE            28
 
+/* Allow context manipulations */
+/* Allow changing context info on files */
+
+#define CAP_CONTEXT          29
+
 #ifdef __KERNEL__
 /* 
  * Bounding set
index d701ba8..7c6f650 100644 (file)
@@ -192,6 +192,8 @@ struct ext2_group_desc
 #define EXT2_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
 #define EXT2_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
 #define EXT2_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
+#define EXT2_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
+#define EXT2_IUNLINK_FL                        0x08000000 /* Immutable unlink */
 #define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
 
 #define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
@@ -240,7 +242,7 @@ struct ext2_inode {
                struct {
                        __u8    l_i_frag;       /* Fragment number */
                        __u8    l_i_fsize;      /* Fragment size */
-                       __u16   i_pad1;
+                       __u16   l_i_xid;        /* LRU Context */
                        __u16   l_i_uid_high;   /* these 2 fields    */
                        __u16   l_i_gid_high;   /* were reserved2[0] */
                        __u32   l_i_reserved2;
@@ -272,6 +274,7 @@ struct ext2_inode {
 #define i_gid_low      i_gid
 #define i_uid_high     osd2.linux2.l_i_uid_high
 #define i_gid_high     osd2.linux2.l_i_gid_high
+#define i_raw_xid      osd2.linux2.l_i_xid
 #define i_reserved2    osd2.linux2.l_i_reserved2
 #endif
 
@@ -312,6 +315,7 @@ struct ext2_inode {
 #define EXT2_MOUNT_NO_UID32            0x0200  /* Disable 32-bit UIDs */
 #define EXT2_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
 #define EXT2_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
+#define EXT2_MOUNT_TAG_XID             (1<<16) /* Enable Context Tags */
 
 #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
 #define set_opt(o, opt)                        o |= EXT2_MOUNT_##opt
index d90013e..27449a4 100644 (file)
@@ -185,6 +185,8 @@ struct ext3_group_desc
 #define EXT3_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
 #define EXT3_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
 #define EXT3_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
+#define EXT3_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
+#define EXT3_IUNLINK_FL                        0x08000000 /* Immutable unlink */
 #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
 
 #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
@@ -208,6 +210,9 @@ struct ext3_group_desc
 #ifdef CONFIG_JBD_DEBUG
 #define EXT3_IOC_WAIT_FOR_READONLY     _IOR('f', 99, long)
 #endif
+#ifdef CONFIG_VSERVER_LEGACY           
+#define EXT3_IOC_SETXID                        FIOC_SETXIDJ
+#endif
 
 /*
  * Structure of an inode on the disk
@@ -244,7 +249,7 @@ struct ext3_inode {
                struct {
                        __u8    l_i_frag;       /* Fragment number */
                        __u8    l_i_fsize;      /* Fragment size */
-                       __u16   i_pad1;
+                       __u16   l_i_xid;        /* LRU Context */
                        __u16   l_i_uid_high;   /* these 2 fields    */
                        __u16   l_i_gid_high;   /* were reserved2[0] */
                        __u32   l_i_reserved2;
@@ -276,6 +281,7 @@ struct ext3_inode {
 #define i_gid_low      i_gid
 #define i_uid_high     osd2.linux2.l_i_uid_high
 #define i_gid_high     osd2.linux2.l_i_gid_high
+#define i_raw_xid      osd2.linux2.l_i_xid
 #define i_reserved2    osd2.linux2.l_i_reserved2
 
 #elif defined(__GNU__)
@@ -324,6 +330,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_NO_UID32            0x2000  /* Disable 32-bit UIDs */
 #define EXT3_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
 #define EXT3_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
+#define EXT3_MOUNT_TAG_XID             (1<<16) /* Enable Context Tags */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
index 8971ae3..5b70fb8 100644 (file)
@@ -116,6 +116,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
 #define MS_VERBOSE     32768
 #define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
 #define MS_ONE_SECOND  (1<<17) /* fs has 1 sec a/m/ctime resolution */
+#define MS_TAGXID      (1<<24) /* tag inodes with context information */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
@@ -142,6 +143,8 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
 #define S_NOQUOTA      64      /* Inode is not counted to quota */
 #define S_DIRSYNC      128     /* Directory modifications are synchronous */
 #define S_NOCMTIME     256     /* Do not update file c/mtime */
+#define S_BARRIER      512     /* Barrier for chroot() */
+#define S_IUNLINK      1024    /* Immutable unlink */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -169,11 +172,14 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
 #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
 #define IS_APPEND(inode)       ((inode)->i_flags & S_APPEND)
 #define IS_IMMUTABLE(inode)    ((inode)->i_flags & S_IMMUTABLE)
+#define IS_IUNLINK(inode)      ((inode)->i_flags & S_IUNLINK)
+#define IS_IXORUNLINK(inode)   ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
 #define IS_NOATIME(inode)      (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
 #define IS_NODIRATIME(inode)   __IS_FLG(inode, MS_NODIRATIME)
 #define IS_POSIXACL(inode)     __IS_FLG(inode, MS_POSIXACL)
 #define IS_ONE_SECOND(inode)   __IS_FLG(inode, MS_ONE_SECOND)
 
+#define IS_BARRIER(inode)      (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER))
 #define IS_DEADDIR(inode)      ((inode)->i_flags & S_DEAD)
 #define IS_NOCMTIME(inode)     ((inode)->i_flags & S_NOCMTIME)
 
@@ -280,6 +286,9 @@ struct iattr {
 #define ATTR_FLAG_IMMUTABLE    8       /* Immutable file */
 #define ATTR_FLAG_NODIRATIME   16      /* Don't update atime for directory */
 
+#define ATTR_FLAG_BARRIER      512     /* Barrier for chroot() */
+#define ATTR_FLAG_IUNLINK      1024    /* Immutable unlink */
+
 /*
  * Includes for diskquotas.
  */
@@ -415,6 +424,7 @@ struct inode {
        unsigned int            i_nlink;
        uid_t                   i_uid;
        gid_t                   i_gid;
+       xid_t                   i_xid;
        dev_t                   i_rdev;
        loff_t                  i_size;
        struct timespec         i_atime;
index 2918970..3ba25d6 100644 (file)
@@ -112,6 +112,10 @@ extern struct group_info init_groups;
        .proc_lock      = SPIN_LOCK_UNLOCKED,                           \
        .switch_lock    = SPIN_LOCK_UNLOCKED,                           \
        .journal_info   = NULL,                                         \
+       .xid            = 0,                                            \
+       .nid            = 0,                                            \
+       .vx_info        = NULL,                                         \
+       .nx_info        = NULL,                                         \
 }
 
 
index ab799b4..5ea190d 100644 (file)
@@ -111,6 +111,7 @@ struct inet_opt {
        /* Socket demultiplex comparisons on incoming packets. */
        __u32                   daddr;          /* Foreign IPv4 addr */
        __u32                   rcv_saddr;      /* Bound local IPv4 addr */
+       __u32                   rcv_saddr2;     /* Second bound ipv4 addr, for ipv4root */
        __u16                   dport;          /* Destination port */
        __u16                   num;            /* Local port */
        __u32                   saddr;          /* Sending source */
index b291189..079c2fe 100644 (file)
@@ -66,6 +66,7 @@ struct kern_ipc_perm
        mode_t          mode; 
        unsigned long   seq;
        void            *security;
+       xid_t           xid;
 };
 
 #endif /* __KERNEL__ */
index 46c59a6..1de4294 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
+#include <linux/vinline.h>
 #include <linux/fs.h>
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
index fdd8abb..ae9c17d 100644 (file)
@@ -13,6 +13,7 @@ struct namespace {
 };
 
 extern void umount_tree(struct vfsmount *);
+extern void umount_unused(struct vfsmount *, struct fs_struct *);
 extern int copy_namespace(int, struct task_struct *);
 void __put_namespace(struct namespace *namespace);
 
index 3d2cd0c..6293dd3 100644 (file)
@@ -60,6 +60,8 @@ typedef enum {
 #define SOCK_ASYNC_NOSPACE     0
 #define SOCK_ASYNC_WAITDATA    1
 #define SOCK_NOSPACE           2
+#define SOCK_PASS_CRED         16
+#define SOCK_USER_SOCKET       17
 
 /**
  *  struct socket - general BSD socket
@@ -82,7 +84,6 @@ struct socket {
        struct sock             *sk;
        wait_queue_head_t       wait;
        short                   type;
-       unsigned char           passcred;
 };
 
 struct vm_area_struct;
index 2d439a8..3c32260 100644 (file)
@@ -55,6 +55,7 @@ struct proc_dir_entry {
        nlink_t nlink;
        uid_t uid;
        gid_t gid;
+       int vx_flags;
        unsigned long size;
        struct inode_operations * proc_iops;
        struct file_operations * proc_fops;
@@ -237,9 +238,11 @@ extern struct kcore_list *kclist_del(void *);
 struct proc_inode {
        struct task_struct *task;
        int type;
+       int vx_flags;
        union {
                int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
                int (*proc_read)(struct task_struct *task, char *page);
+               int (*proc_vid_read)(int vid, char *page);
        } op;
        struct proc_dir_entry *pde;
        struct inode vfs_inode;
index dfb46b5..f64e252 100644 (file)
@@ -888,6 +888,13 @@ struct stat_data_v1
 #define REISERFS_COMPR_FL     EXT2_COMPR_FL
 #define REISERFS_NOTAIL_FL    EXT2_NOTAIL_FL
 
+/* unfortunately reiserfs sdattr is only 16 bit */
+#define REISERFS_BARRIER_FL   (EXT2_BARRIER_FL >> 16)
+#define REISERFS_IUNLINK_FL   (EXT2_IUNLINK_FL >> 16)
+
+#define        REISERFS_FL_USER_VISIBLE        0x80FF
+#define        REISERFS_FL_USER_MODIFYABLE     0x80FF
+
 /* persistent flags that file inherits from the parent directory */
 #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
                                REISERFS_SYNC_FL |      \
@@ -1957,6 +1964,7 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
                                   struct dentry *dentry, struct inode *inode);
 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
 void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
+int reiserfs_setattr ( struct dentry *dentry, struct iattr *attr);
 
 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode );
 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs );
index 73d7127..27ba7dd 100644 (file)
@@ -102,6 +102,7 @@ extern unsigned long nr_iowait(void);
 #include <linux/timer.h>
 
 #include <asm/processor.h>
+#include <linux/vserver/context.h>
 
 #define TASK_RUNNING           0
 #define TASK_INTERRUPTIBLE     1
@@ -109,6 +110,7 @@ extern unsigned long nr_iowait(void);
 #define TASK_STOPPED           4
 #define TASK_ZOMBIE            8
 #define TASK_DEAD              16
+#define TASK_ONHOLD            32
 
 #define __set_task_state(tsk, state_value)             \
        do { (tsk)->state = (state_value); } while (0)
@@ -221,6 +223,7 @@ struct mm_struct {
 
        /* Architecture-specific MM context */
        mm_context_t context;
+       struct vx_info *mm_vx_info;
 
        /* coredumping support */
        int core_waiters;
@@ -315,9 +318,10 @@ struct user_struct {
        /* Hash table maintenance information */
        struct list_head uidhash_list;
        uid_t uid;
+       xid_t xid;
 };
 
-extern struct user_struct *find_user(uid_t);
+extern struct user_struct *find_user(xid_t, uid_t);
 
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
@@ -481,6 +485,14 @@ struct task_struct {
        void *security;
        struct audit_context *audit_context;
 
+/* vserver context data */
+       xid_t xid;
+       struct vx_info *vx_info;
+
+/* vserver network data */
+       nid_t nid;
+       struct nx_info *nx_info;
+
 /* Thread group tracking */
        u32 parent_exec_id;
        u32 self_exec_id;
@@ -600,7 +612,7 @@ extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(xid_t, uid_t);
 extern void free_uid(struct user_struct *);
 extern void switch_uid(struct user_struct *);
 
index d8929c6..64753c5 100644 (file)
@@ -133,6 +133,7 @@ enum
        KERN_NGROUPS_MAX=63,    /* int: NGROUPS_MAX */
        KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
        KERN_HZ_TIMER=65,       /* int: hz timer on or off */
+       KERN_VSHELPER=66,       /* string: path to vshelper policy agent */
 };
 
 
index 23c414f..288ab65 100644 (file)
@@ -36,6 +36,8 @@ typedef __kernel_uid32_t      uid_t;
 typedef __kernel_gid32_t       gid_t;
 typedef __kernel_uid16_t        uid16_t;
 typedef __kernel_gid16_t        gid16_t;
+typedef unsigned int           xid_t;
+typedef unsigned int           nid_t;
 
 #ifdef CONFIG_UID16
 /* This is defined by include/asm-{arch}/posix_types.h */
index be877d7..b47489b 100644 (file)
@@ -11,9 +11,9 @@ extern rwlock_t unix_table_lock;
 
 extern atomic_t unix_tot_inflight;
 
-static inline struct sock *first_unix_socket(int *i)
+static inline struct sock *next_unix_socket_table(int *i)
 {
-       for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
                if (!hlist_empty(&unix_socket_table[*i]))
                        return __sk_head(&unix_socket_table[*i]);
        }
@@ -22,16 +22,19 @@ static inline struct sock *first_unix_socket(int *i)
 
 static inline struct sock *next_unix_socket(int *i, struct sock *s)
 {
-       struct sock *next = sk_next(s);
-       /* More in this chain? */
-       if (next)
-               return next;
-       /* Look for next non-empty chain. */
-       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-               if (!hlist_empty(&unix_socket_table[*i]))
-                       return __sk_head(&unix_socket_table[*i]);
-       }
-       return NULL;
+       do {
+               if (s)
+                       s = sk_next(s);
+               if (!s)
+                       s = next_unix_socket_table(i);
+       } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH));
+       return s;
+}
+
+static inline struct sock *first_unix_socket(int *i)
+{
+       *i = 0;
+       return next_unix_socket(i, NULL);
 }
 
 #define forall_unix_sockets(i, s) \
index 7a851de..4cbac7d 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/ninline.h>
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -146,6 +147,59 @@ static inline char rt_tos2priority(u8 tos)
        return ip_tos2prio[IPTOS_TOS(tos)>>1];
 }
 
+#define IPI_LOOPBACK   0x0100007f
+
+static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
+{
+       int err;
+       int i, n = nxi->nbipv4;
+       u32 ipv4root = nxi->ipv4[0];
+
+       if (ipv4root == 0)
+               return 0;
+
+       if (fl->fl4_src == 0) {
+               if (n > 1) {
+                       u32 foundsrc;
+
+                       err = __ip_route_output_key(rp, fl);
+                       if (err) {
+                               fl->fl4_src = ipv4root;
+                               err = __ip_route_output_key(rp, fl);
+                       }
+                       if (err)
+                               return err;
+
+                       foundsrc = (*rp)->rt_src;
+                       ip_rt_put(*rp);
+
+                       for (i=0; i<n; i++){
+                               u32 mask = nxi->mask[i];
+                               u32 ipv4 = nxi->ipv4[i];
+                               u32 net4 = ipv4 & mask;
+
+                               if (foundsrc == ipv4) {
+                                       fl->fl4_src = ipv4;
+                                       break;
+                               }
+                               if (!fl->fl4_src && (foundsrc & mask) == net4)
+                                       fl->fl4_src = ipv4;
+                       }
+               }
+               if (fl->fl4_src == 0)
+                       fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
+                               ? IPI_LOOPBACK : ipv4root;
+       } else {
+               for (i=0; i<n; i++) {
+                       if (nxi->ipv4[i] == fl->fl4_src)
+                               break;
+               }
+               if (i == n)
+                       return -EPERM;
+       }
+       return 0;
+}
+
 static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                   u32 src, u32 tos, int oif, u8 protocol,
                                   u16 sport, u16 dport, struct sock *sk)
@@ -160,7 +214,22 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                         .dport = dport } } };
 
        int err;
-       if (!dst || !src) {
+       struct nx_info *nx_info = current->nx_info;
+
+       if (sk)
+               nx_info = sk->sk_nx_info;
+       vxdprintk("ip_route_connect(%p) %p,%p;%lx\n",
+               sk, nx_info, sk->sk_socket,
+               (sk->sk_socket?sk->sk_socket->flags:0));
+
+       if (nx_info) {
+               err = ip_find_src(nx_info, rp, &fl);
+               if (err)
+                       return err;
+               if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+                       fl.fl4_dst = nx_info->ipv4[0];
+       }       
+       if (!fl.fl4_dst || !fl.fl4_src) {
                err = __ip_route_output_key(rp, &fl);
                if (err)
                        return err;
index b7ba74d..30e9fbb 100644 (file)
@@ -51,13 +51,13 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 {
        if (!msg->msg_control)
        {
-               if (sock->passcred || scm->fp)
+               if (test_bit(SOCK_PASS_CRED, &sock->flags) || scm->fp)
                        msg->msg_flags |= MSG_CTRUNC;
                scm_destroy(scm);
                return;
        }
 
-       if (sock->passcred)
+       if (test_bit(SOCK_PASS_CRED, &sock->flags))
                put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
 
        if (!scm->fp)
index e01e617..276d8a6 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/security.h>
 
 #include <linux/filter.h>
+#include <linux/vinline.h>
 
 #include <asm/atomic.h>
 #include <net/dst.h>
@@ -109,6 +110,10 @@ struct sock_common {
        struct hlist_node       skc_node;
        struct hlist_node       skc_bind_node;
        atomic_t                skc_refcnt;
+       xid_t                   skc_xid;
+       struct vx_info          *skc_vx_info;
+       nid_t                   skc_nid;
+       struct nx_info          *skc_nx_info;
 };
 
 /**
@@ -186,6 +191,10 @@ struct sock {
 #define sk_node                        __sk_common.skc_node
 #define sk_bind_node           __sk_common.skc_bind_node
 #define sk_refcnt              __sk_common.skc_refcnt
+#define sk_xid                 __sk_common.skc_xid
+#define sk_vx_info             __sk_common.skc_vx_info
+#define sk_nid                 __sk_common.skc_nid
+#define sk_nx_info             __sk_common.skc_nx_info
        volatile unsigned char  sk_zapped;
        unsigned char           sk_shutdown;
        unsigned char           sk_use_write_queue;
index cba1f70..946d29e 100644 (file)
@@ -195,6 +195,10 @@ struct tcp_tw_bucket {
 #define tw_node                        __tw_common.skc_node
 #define tw_bind_node           __tw_common.skc_bind_node
 #define tw_refcnt              __tw_common.skc_refcnt
+#define tw_xid                 __tw_common.skc_xid
+#define tw_vx_info             __tw_common.skc_vx_info
+#define tw_nid                 __tw_common.skc_nid
+#define tw_nx_info             __tw_common.skc_nx_info
        volatile unsigned char  tw_substate;
        unsigned char           tw_rcv_wscale;
        __u16                   tw_sport;
index 37e2d3b..12c9367 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -96,6 +96,7 @@ static int newque (key_t key, int msgflg)
 
        msq->q_perm.mode = (msgflg & S_IRWXUGO);
        msq->q_perm.key = key;
+       msq->q_perm.xid = current->xid;
 
        msq->q_perm.security = NULL;
        retval = security_msg_queue_alloc(msq);
@@ -788,7 +789,11 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l
        for(i = 0; i <= msg_ids.max_id; i++) {
                struct msg_queue * msq;
                msq = msg_lock(i);
-               if(msq != NULL) {
+               if (msq) {
+                       if (!vx_check(msq->q_perm.xid, VX_IDENT)) {
+                               msg_unlock(msq);
+                               continue;       
+                       }
                        len += sprintf(buffer + len, "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
                                msq->q_perm.key,
                                msg_buildid(i,msq->q_perm.seq),
index 3725673..11ce967 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -175,6 +175,7 @@ static int newary (key_t key, int nsems, int semflg)
 
        sma->sem_perm.mode = (semflg & S_IRWXUGO);
        sma->sem_perm.key = key;
+       sma->sem_perm.xid = current->xid;
 
        sma->sem_perm.security = NULL;
        retval = security_sem_alloc(sma);
@@ -1296,7 +1297,11 @@ static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int l
        for(i = 0; i <= sem_ids.max_id; i++) {
                struct sem_array *sma;
                sma = sem_lock(i);
-               if(sma) {
+               if (sma) {
+                       if (!vx_check(sma->sem_perm.xid, VX_IDENT)) {
+                               sem_unlock(sma);
+                               continue;
+                       }
                        len += sprintf(buffer + len, "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
                                sma->sem_perm.key,
                                sem_buildid(i,sma->sem_perm.seq),
index 714933b..ebe5fa4 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -185,6 +185,7 @@ static int newseg (key_t key, int shmflg, size_t size)
                return -ENOMEM;
 
        shp->shm_perm.key = key;
+       shp->shm_perm.xid = current->xid;
        shp->shm_flags = (shmflg & S_IRWXUGO);
 
        shp->shm_perm.security = NULL;
@@ -843,11 +844,15 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l
                struct shmid_kernel* shp;
 
                shp = shm_lock(i);
-               if(shp!=NULL) {
+               if (shp) {
 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
                        char *format;
 
+                       if (!vx_check(shp->shm_perm.xid, VX_IDENT)) {
+                               shm_unlock(shp);
+                               continue;       
+                       }
                        if (sizeof(size_t) <= sizeof(int))
                                format = SMALL_STRING;
                        else
index f74c5ee..310ccc5 100644 (file)
@@ -103,8 +103,10 @@ int ipc_findkey(struct ipc_ids* ids, key_t key)
         */
        for (id = 0; id <= max_id; id++) {
                p = ids->entries[id].p;
-               if(p==NULL)
+               if (p==NULL)
                        continue;
+               if (!vx_check(p->xid, VX_IDENT))
+                       continue;       
                if (key == p->key)
                        return id;
        }
@@ -367,6 +369,8 @@ int ipcperms (struct kern_ipc_perm *ipcp, short flag)
 {      /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
        int requested_mode, granted_mode;
 
+       if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */
+               return -1;
        requested_mode = (flag >> 6) | (flag >> 3) | flag;
        granted_mode = ipcp->mode;
        if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
index 238c65f..8184995 100644 (file)
@@ -9,6 +9,11 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            rcupdate.o intermodule.o extable.o params.o posix-timers.o \
            kthread.o
 
+# mod-subdirs := vserver
+
+subdir-y  += vserver
+obj-y    += vserver/vserver.o
+
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o
index 96d7394..9b652d8 100644 (file)
@@ -35,6 +35,11 @@ int getrusage(struct task_struct *, int, struct rusage *);
 static void __unhash_process(struct task_struct *p)
 {
        nr_threads--;
+       /* tasklist_lock is held, is this sufficient? */
+       if (p->vx_info) {
+               atomic_dec(&p->vx_info->cacct.nr_threads);
+               atomic_dec(&p->vx_info->limit.res[RLIMIT_NPROC]);
+       }
        detach_pid(p, PIDTYPE_PID);
        detach_pid(p, PIDTYPE_TGID);
        if (thread_group_leader(p)) {
@@ -234,6 +239,7 @@ void reparent_to_init(void)
        ptrace_unlink(current);
        /* Reparent to init */
        REMOVE_LINKS(current);
+       /* FIXME handle vchild_reaper/initpid */
        current->parent = child_reaper;
        current->real_parent = child_reaper;
        SET_LINKS(current);
@@ -378,6 +384,7 @@ static inline void close_files(struct files_struct * files)
                                struct file * file = xchg(&files->fd[i], NULL);
                                if (file)
                                        filp_close(file, files);
+                               vx_openfd_dec(fd);
                        }
                        i++;
                        set >>= 1;
@@ -597,6 +604,7 @@ static inline void forget_original_parent(struct task_struct * father)
        struct task_struct *p, *reaper = father;
        struct list_head *_p, *_n;
 
+       /* FIXME handle vchild_reaper/initpid */
        reaper = father->group_leader;
        if (reaper == father)
                reaper = child_reaper;
index 68597bc..4336cf0 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/vinline.h>
+#include <linux/ninline.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -75,6 +77,8 @@ static kmem_cache_t *task_struct_cachep;
 static void free_task(struct task_struct *tsk)
 {
        free_thread_info(tsk->thread_info);
+       clr_vx_info(&tsk->vx_info);
+       clr_nx_info(&tsk->nx_info);
        free_task_struct(tsk);
 }
 
@@ -405,6 +409,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
+               set_vx_info(&mm->mm_vx_info, current->vx_info);
                return mm;
        }
        free_mm(mm);
@@ -436,6 +441,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
+       clr_vx_info(&mm->mm_vx_info);
        free_mm(mm);
 }
 
@@ -550,6 +556,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 
        /* Copy the current MM stuff.. */
        memcpy(mm, oldmm, sizeof(*mm));
+       mm->mm_vx_info = NULL;
        if (!mm_init(mm))
                goto fail_nomem;
 
@@ -861,6 +868,8 @@ struct task_struct *copy_process(unsigned long clone_flags,
 {
        int retval;
        struct task_struct *p = NULL;
+       struct vx_info *vxi;
+       struct nx_info *nxi;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -885,11 +894,31 @@ struct task_struct *copy_process(unsigned long clone_flags,
                goto fork_out;
 
        retval = -ENOMEM;
+
        p = dup_task_struct(current);
        if (!p)
                goto fork_out;
 
+       vxi = get_vx_info(current->vx_info);
+       nxi = get_nx_info(current->nx_info);
+
+       /* check vserver memory */
+       if (p->mm && !(clone_flags & CLONE_VM)) {
+               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+                       vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+               else
+                       goto bad_fork_free;
+       }
+       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+               if (!vx_rsspages_avail(p->mm, p->mm->rss))
+                       goto bad_fork_free;
+       }
+
        retval = -EAGAIN;
+       if (vxi && (atomic_read(&vxi->limit.res[RLIMIT_NPROC])
+               >= vxi->limit.rlim[RLIMIT_NPROC]))
+               goto bad_fork_free;
+
        if (atomic_read(&p->user->processes) >=
                        p->rlim[RLIMIT_NPROC].rlim_cur) {
                if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
@@ -1074,6 +1103,10 @@ struct task_struct *copy_process(unsigned long clone_flags,
                link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
 
        nr_threads++;
+       if (vxi) {
+               atomic_inc(&vxi->cacct.nr_threads);
+               atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
+       }
        write_unlock_irq(&tasklist_lock);
        retval = 0;
 
index 6ed44f5..a8b7eb3 100644 (file)
@@ -224,7 +224,8 @@ void fastcall detach_pid(task_t *task, enum pid_type type)
 
 task_t *find_task_by_pid(int nr)
 {
-       struct pid *pid = find_pid(PIDTYPE_PID, nr);
+       struct pid *pid = find_pid(PIDTYPE_PID,
+               vx_rmap_tgid(current->vx_info, nr));
 
        if (!pid)
                return NULL;
index 3b74688..bb5ac82 100644 (file)
@@ -247,7 +247,10 @@ int do_syslog(int type, char __user * buf, int len)
        unsigned long i, j, limit, count;
        int do_clear = 0;
        char c;
-       int error = 0;
+       int error = -EPERM;
+
+       if (!vx_check(0, VX_ADMIN|VX_WATCH))
+               return error;
 
        error = security_syslog(type);
        if (error)
index 1493acf..a9f143b 100644 (file)
@@ -39,6 +39,8 @@
 #include <linux/cpu.h>
 #include <linux/percpu.h>
 #include <linux/kthread.h>
+#include <linux/vserver/sched.h>
+#include <linux/vinline.h>
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
@@ -214,6 +216,8 @@ struct runqueue {
 #endif
        task_t *migration_thread;
        struct list_head migration_queue;
+       struct list_head hold_queue;
+       int idle_tokens;
 
        atomic_t nr_iowait;
 };
@@ -373,6 +377,9 @@ static int effective_prio(task_t *p)
        bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
 
        prio = p->static_prio - bonus;
+       if (__vx_task_flags(p, VXF_SCHED_PRIO, 0))
+               prio += effective_vavavoom(p, MAX_USER_PRIO);
+
        if (prio < MAX_RT_PRIO)
                prio = MAX_RT_PRIO;
        if (prio > MAX_PRIO-1)
@@ -1503,6 +1510,9 @@ void scheduler_tick(int user_ticks, int sys_ticks)
        }
 
        if (p == rq->idle) {
+               if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
+                       set_need_resched();     
+
                if (atomic_read(&rq->nr_iowait) > 0)
                        cpustat->iowait += sys_ticks;
                else
@@ -1545,7 +1555,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
                }
                goto out_unlock;
        }
-       if (!--p->time_slice) {
+       if (vx_need_resched(p)) {
                dequeue_task(p, rq->active);
                set_tsk_need_resched(p);
                p->prio = effective_prio(p);
@@ -1606,6 +1616,10 @@ asmlinkage void __sched schedule(void)
        struct list_head *queue;
        unsigned long long now;
        unsigned long run_time;
+#ifdef CONFIG_VSERVER_HARDCPU          
+       struct vx_info *vxi;
+       int maxidle = -HZ;
+#endif
        int idx;
 
        /*
@@ -1656,6 +1670,37 @@ need_resched:
                        deactivate_task(prev, rq);
        }
 
+#ifdef CONFIG_VSERVER_HARDCPU          
+       if (!list_empty(&rq->hold_queue)) {
+               struct list_head *l, *n;
+               int ret;
+
+               vxi = NULL;
+               list_for_each_safe(l, n, &rq->hold_queue) {
+                       next = list_entry(l, task_t, run_list);
+                       if (vxi == next->vx_info)
+                               continue;
+
+                       vxi = next->vx_info;
+                       ret = vx_tokens_recalc(vxi);
+                       // tokens = vx_tokens_avail(next);
+
+                       if (ret > 0) {
+                               list_del(&next->run_list);
+                               next->state &= ~TASK_ONHOLD;
+                               recalc_task_prio(next, now);
+                               __activate_task(next, rq);
+                               // printk("··· unhold %p\n", next);
+                               break;
+                       }
+                       if ((ret < 0) && (maxidle < ret))
+                               maxidle = ret;
+               }       
+       }
+       rq->idle_tokens = -maxidle;
+
+pick_next:
+#endif
        if (unlikely(!rq->nr_running)) {
 #ifdef CONFIG_SMP
                load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
@@ -1683,6 +1728,23 @@ need_resched:
        queue = array->queue + idx;
        next = list_entry(queue->next, task_t, run_list);
 
+#ifdef CONFIG_VSERVER_HARDCPU          
+       vxi = next->vx_info;
+       if (vxi && __vx_flags(vxi->vx_flags,
+               VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
+               int ret = vx_tokens_recalc(vxi);
+
+               if (unlikely(ret <= 0)) {
+                       if (ret && (rq->idle_tokens > -ret))
+                               rq->idle_tokens = -ret;
+                       deactivate_task(next, rq);
+                       list_add_tail(&next->run_list, &rq->hold_queue);
+                       next->state |= TASK_ONHOLD;                     
+                       goto pick_next;
+               }
+       }
+#endif
+
        if (!rt_task(next) && next->activated > 0) {
                unsigned long long delta = now - next->timestamp;
 
@@ -2954,6 +3016,7 @@ void __init sched_init(void)
 
                spin_lock_init(&rq->lock);
                INIT_LIST_HEAD(&rq->migration_queue);
+               INIT_LIST_HEAD(&rq->hold_queue);
                atomic_set(&rq->nr_iowait, 0);
                nr_running_init(rq);
 
index a918155..7a9912a 100644 (file)
@@ -1050,6 +1050,9 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
        unsigned long flags;
        int ret;
 
+       if (!vx_check(vx_task_xid(p), VX_ADMIN|VX_WATCH|VX_IDENT))
+               return -ESRCH;
+
        ret = check_kill_permission(sig, info, p);
        if (!ret && sig && p->sighand) {
                spin_lock_irqsave(&p->sighand->siglock, flags);
index 4d414d9..9523213 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/mman.h>
 #include <linux/smp_lock.h>
 #include <linux/notifier.h>
+#include <linux/kmod.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
 #include <linux/init.h>
@@ -339,7 +340,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
                        if (!who)
                                user = current->user;
                        else
-                               user = find_user(who);
+                               user = find_user(vx_current_xid(), who);
 
                        if (!user)
                                goto out_unlock;
@@ -398,7 +399,7 @@ asmlinkage long sys_getpriority(int which, int who)
                        if (!who)
                                user = current->user;
                        else
-                               user = find_user(who);
+                               user = find_user(vx_current_xid(), who);
 
                        if (!user)
                                goto out_unlock;
@@ -418,6 +419,72 @@ out_unlock:
        return retval;
 }
 
+/*
+ *      vshelper path is set via /proc/sys
+ *      invoked by vserver sys_reboot(), with
+ *      the following arguments
+ *
+ *      argv [0] = vshelper_path;
+ *      argv [1] = action: "restart", "halt", "poweroff", ...
+ *      argv [2] = context identifier
+ *      argv [3] = additional argument (restart2)
+ *
+ *      envp [*] = type-specific parameters
+ */
+char vshelper_path[255] = "/sbin/vshelper";
+
+long vs_reboot(unsigned int cmd, void * arg)
+{
+       char id_buf[8], cmd_buf[32];
+       char uid_buf[32], pid_buf[32];
+       char buffer[256];
+
+       char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+       char *envp[] = {"HOME=/", "TERM=linux",
+                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+                       uid_buf, pid_buf, cmd_buf, 0};
+
+       snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+
+       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
+       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
+
+       switch (cmd) {
+       case LINUX_REBOOT_CMD_RESTART:
+               argv[1] = "restart";
+               break;  
+
+       case LINUX_REBOOT_CMD_HALT:
+               argv[1] = "halt";
+               break;  
+
+       case LINUX_REBOOT_CMD_POWER_OFF:
+               argv[1] = "poweroff";
+               break;  
+
+       case LINUX_REBOOT_CMD_SW_SUSPEND:
+               argv[1] = "swsusp";
+               break;  
+
+       case LINUX_REBOOT_CMD_RESTART2:
+               if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
+                       return -EFAULT;
+               argv[3] = buffer;
+       default:
+               argv[1] = "restart2";
+               break;  
+       }
+
+       /* maybe we should wait ? */
+       if (call_usermodehelper(*argv, argv, envp, 0)) {
+               printk( KERN_WARNING
+                       "vs_reboot(): failed to exec (%s %s %s %s)\n",
+                       vshelper_path, argv[1], argv[2], argv[3]);
+               return -EPERM;
+       }
+       return 0;
+}
 
 /*
  * Reboot system call: for obvious reasons only root may call it,
@@ -443,6 +510,9 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
                        magic2 != LINUX_REBOOT_MAGIC2C))
                return -EINVAL;
 
+       if (!vx_check(0, VX_ADMIN|VX_WATCH))
+               return vs_reboot(cmd, arg);
+
        lock_kernel();
        switch (cmd) {
        case LINUX_REBOOT_CMD_RESTART:
@@ -637,7 +707,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
 {
        struct user_struct *new_user;
 
-       new_user = alloc_uid(new_ruid);
+       new_user = alloc_uid(vx_current_xid(), new_ruid);
        if (!new_user)
                return -EAGAIN;
 
@@ -1376,7 +1446,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
        int errno = 0;
 
        down_read(&uts_sem);
-       if (copy_to_user(name,&system_utsname,sizeof *name))
+       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1387,15 +1457,17 @@ asmlinkage long sys_sethostname(char __user *name, int len)
        int errno;
        char tmp[__NEW_UTS_LEN];
 
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME))
                return -EPERM;
        if (len < 0 || len > __NEW_UTS_LEN)
                return -EINVAL;
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-               memcpy(system_utsname.nodename, tmp, len);
-               system_utsname.nodename[len] = 0;
+               char *ptr = vx_new_uts(nodename);
+
+               memcpy(ptr, tmp, len);
+               ptr[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
@@ -1405,15 +1477,17 @@ asmlinkage long sys_sethostname(char __user *name, int len)
 asmlinkage long sys_gethostname(char __user *name, int len)
 {
        int i, errno;
+       char *ptr;
 
        if (len < 0)
                return -EINVAL;
        down_read(&uts_sem);
-       i = 1 + strlen(system_utsname.nodename);
+       ptr = vx_new_uts(nodename);
+       i = 1 + strlen(ptr);
        if (i > len)
                i = len;
        errno = 0;
-       if (copy_to_user(name, system_utsname.nodename, i))
+       if (copy_to_user(name, ptr, i))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1428,7 +1502,7 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
        int errno;
        char tmp[__NEW_UTS_LEN];
 
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME))
                return -EPERM;
        if (len < 0 || len > __NEW_UTS_LEN)
                return -EINVAL;
@@ -1436,8 +1510,10 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-               memcpy(system_utsname.domainname, tmp, len);
-               system_utsname.domainname[len] = 0;
+               char *ptr = vx_new_uts(domainname);
+
+               memcpy(ptr, tmp, len);
+               ptr[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
@@ -1489,7 +1565,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
        old_rlim = current->rlim + resource;
        if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
             (new_rlim.rlim_max > old_rlim->rlim_max)) &&
-           !capable(CAP_SYS_RESOURCE))
+           !capable(CAP_SYS_RESOURCE) && vx_ccaps(VXC_SET_RLIMIT))
                return -EPERM;
        if (resource == RLIMIT_NOFILE) {
                if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
index 79e7c09..c26f5f4 100644 (file)
@@ -77,6 +77,7 @@ extern char modprobe_path[];
 #ifdef CONFIG_HOTPLUG
 extern char hotplug_path[];
 #endif
+extern char vshelper_path[];
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
 #endif
@@ -409,6 +410,15 @@ static ctl_table kern_table[] = {
                .strategy       = &sysctl_string,
        },
 #endif
+       {
+               .ctl_name       = KERN_VSHELPER,
+               .procname       = "vshelper",
+               .data           = &vshelper_path,
+               .maxlen         = 256,
+               .mode           = 0644,
+               .proc_handler   = &proc_dostring,
+               .strategy       = &sysctl_string,
+       },
 #ifdef CONFIG_CHR_DEV_SG
        {
                .ctl_name       = KERN_SG_BIG_BUFF,
index 08cec6a..88636e6 100644 (file)
@@ -31,6 +31,8 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 #include <linux/cpu.h>
+#include <linux/vserver/sched.h>
+#include <linux/vserver/cvirt.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
@@ -961,7 +963,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
  */
 asmlinkage long sys_getpid(void)
 {
-       return current->tgid;
+       return vx_map_tgid(current->vx_info, current->tgid);
 }
 
 /*
@@ -1005,7 +1007,7 @@ asmlinkage long sys_getppid(void)
 #endif
                break;
        }
-       return pid;
+       return vx_map_tgid(current->vx_info, pid);
 }
 
 asmlinkage long sys_getuid(void)
@@ -1214,6 +1216,8 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
                        tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
                        tp.tv_sec++;
                }
+               if (vx_flags(VXF_VIRT_UPTIME, 0))
+                       vx_vsi_uptime(&tp, NULL);
                val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
                val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
@@ -1223,6 +1227,9 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
                val.procs = nr_threads;
        } while (read_seqretry(&xtime_lock, seq));
 
+/*     if (vx_flags(VXF_VIRT_CPU, 0))
+               vx_vsi_cpu(val);
+*/
        si_meminfo(&val);
        si_swapinfo(&val);
 
index f5c9d42..75faf3d 100644 (file)
@@ -20,8 +20,8 @@
 #define UIDHASH_BITS           8
 #define UIDHASH_SZ             (1 << UIDHASH_BITS)
 #define UIDHASH_MASK           (UIDHASH_SZ - 1)
-#define __uidhashfn(uid)       (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)      (uidhash_table + __uidhashfn((uid)))
+#define __uidhashfn(xid,uid)   ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
+#define uidhashentry(xid,uid)  (uidhash_table + __uidhashfn((xid),(uid)))
 
 static kmem_cache_t *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
@@ -46,7 +46,7 @@ static inline void uid_hash_remove(struct user_struct *up)
        list_del(&up->uidhash_list);
 }
 
-static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
+static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
 {
        struct list_head *up;
 
@@ -55,7 +55,7 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has
 
                user = list_entry(up, struct user_struct, uidhash_list);
 
-               if(user->uid == uid) {
+               if(user->uid == uid && user->xid == xid) {
                        atomic_inc(&user->__count);
                        return user;
                }
@@ -64,9 +64,9 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has
        return NULL;
 }
 
-struct user_struct *find_user(uid_t uid)
+struct user_struct *find_user(xid_t xid, uid_t uid)
 {
-       return uid_hash_find(uid, uidhashentry(uid));
+       return uid_hash_find(xid, uid, uidhashentry(xid, uid));
 }
 
 void free_uid(struct user_struct *up)
@@ -78,13 +78,13 @@ void free_uid(struct user_struct *up)
        }
 }
 
-struct user_struct * alloc_uid(uid_t uid)
+struct user_struct * alloc_uid(xid_t xid, uid_t uid)
 {
-       struct list_head *hashent = uidhashentry(uid);
+       struct list_head *hashent = uidhashentry(xid, uid);
        struct user_struct *up;
 
        spin_lock(&uidhash_lock);
-       up = uid_hash_find(uid, hashent);
+       up = uid_hash_find(xid, uid, hashent);
        spin_unlock(&uidhash_lock);
 
        if (!up) {
@@ -94,6 +94,7 @@ struct user_struct * alloc_uid(uid_t uid)
                if (!new)
                        return NULL;
                new->uid = uid;
+               new->xid = xid;
                atomic_set(&new->__count, 1);
                atomic_set(&new->processes, 0);
                atomic_set(&new->files, 0);
@@ -103,7 +104,7 @@ struct user_struct * alloc_uid(uid_t uid)
                 * on adding the same user already..
                 */
                spin_lock(&uidhash_lock);
-               up = uid_hash_find(uid, hashent);
+               up = uid_hash_find(xid, uid, hashent);
                if (up) {
                        kmem_cache_free(uid_cachep, new);
                } else {
@@ -148,7 +149,7 @@ static int __init uid_cache_init(void)
 
        /* Insert the root user immediately (init already runs as root) */
        spin_lock(&uidhash_lock);
-       uid_hash_insert(&root_user, uidhashentry(0));
+       uid_hash_insert(&root_user, uidhashentry(0,0));
        spin_unlock(&uidhash_lock);
 
        return 0;
index 2c8abe6..1b891a1 100644 (file)
@@ -38,7 +38,8 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
                                        set_page_dirty(page);
                                page_remove_rmap(page, ptep);
                                page_cache_release(page);
-                               mm->rss--;
+                               // mm->rss--;
+                               vx_rsspages_dec(mm);
                        }
                }
        } else {
@@ -68,6 +69,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
        pgd = pgd_offset(mm, addr);
        spin_lock(&mm->page_table_lock);
 
+       if (!vx_rsspages_avail(mm, 1))
+               goto err_unlock;
+
        pmd = pmd_alloc(mm, pgd, addr);
        if (!pmd)
                goto err_unlock;
@@ -78,7 +82,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        zap_pte(mm, vma, addr, pte);
 
-       mm->rss++;
+       // mm->rss++;
+       vx_rsspages_inc(mm);
        flush_icache_page(vma, page);
        set_pte(pte, mk_pte(page, prot));
        pte_chain = page_add_rmap(page, pte, pte_chain);
index 5ae7c99..576d4c9 100644 (file)
@@ -285,6 +285,10 @@ skip_copy_pte_range:
                                struct page *page;
                                unsigned long pfn;
 
+                               if (!vx_rsspages_avail(dst, 1)) {
+                                       spin_unlock(&src->page_table_lock);
+                                       goto nomem;
+                               }
                                /* copy_one_pte */
 
                                if (pte_none(pte))
@@ -328,7 +332,8 @@ skip_copy_pte_range:
                                        pte = pte_mkclean(pte);
                                pte = pte_mkold(pte);
                                get_page(page);
-                               dst->rss++;
+                               // dst->rss++;
+                               vx_rsspages_inc(dst);
 
                                set_pte(dst_pte, pte);
                                pte_chain = page_add_rmap(page, dst_pte,
@@ -1124,7 +1129,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
        page_table = pte_offset_map(pmd, address);
        if (pte_same(*page_table, pte)) {
                if (PageReserved(old_page))
-                       ++mm->rss;
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                page_remove_rmap(old_page, page_table);
                break_cow(vma, new_page, address, page_table);
                pte_chain = page_add_rmap(new_page, page_table, pte_chain);
@@ -1343,6 +1349,10 @@ static int do_swap_page(struct mm_struct * mm,
                inc_page_state(pgmajfault);
        }
 
+       if (!vx_rsspages_avail(mm, 1)) {
+               ret = VM_FAULT_OOM;
+               goto out;
+       }
        mark_page_accessed(page);
        pte_chain = pte_chain_alloc(GFP_KERNEL);
        if (!pte_chain) {
@@ -1372,7 +1382,8 @@ static int do_swap_page(struct mm_struct * mm,
        if (vm_swap_full())
                remove_exclusive_swap_page(page);
 
-       mm->rss++;
+       // mm->rss++;
+       vx_rsspages_inc(mm);
        pte = mk_pte(page, vma->vm_page_prot);
        if (write_access && can_share_swap_page(page))
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1406,6 +1417,11 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct pte_chain *pte_chain;
        int ret;
 
+       if (!vx_rsspages_avail(mm, 1)) {
+               spin_unlock(&mm->page_table_lock);
+               return VM_FAULT_OOM;
+       }
+
        pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
        if (!pte_chain) {
                pte_unmap(page_table);
@@ -1441,7 +1457,8 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        ret = VM_FAULT_MINOR;
                        goto out;
                }
-               mm->rss++;
+               // mm->rss++;
+               vx_rsspages_inc(mm);
                entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                         vma->vm_page_prot)),
                                      vma);
@@ -1509,6 +1526,8 @@ retry:
                return VM_FAULT_SIGBUS;
        if (new_page == NOPAGE_OOM)
                return VM_FAULT_OOM;
+       if (!vx_rsspages_avail(mm, 1))
+               return VM_FAULT_OOM;
 
        pte_chain = pte_chain_alloc(GFP_KERNEL);
        if (!pte_chain)
@@ -1556,7 +1575,8 @@ retry:
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
                if (!PageReserved(new_page))
-                       ++mm->rss;
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
index 0cf446b..2b4bf4d 100644 (file)
@@ -100,7 +100,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
 
 asmlinkage long sys_mlock(unsigned long start, size_t len)
 {
-       unsigned long locked;
+       unsigned long locked, grow;
        unsigned long lock_limit;
        int error = -ENOMEM;
 
@@ -108,8 +108,10 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
        len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
        start &= PAGE_MASK;
 
-       locked = len >> PAGE_SHIFT;
-       locked += current->mm->locked_vm;
+       grow = len >> PAGE_SHIFT;
+       if (!vx_vmlocked_avail(current->mm, grow))
+               goto out;
+       locked = current->mm->locked_vm + grow;
 
        lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
        lock_limit >>= PAGE_SHIFT;
@@ -117,6 +119,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
        /* check against resource limits */
        if (locked <= lock_limit)
                error = do_mlock(start, len, 1);
+out:
        up_write(&current->mm->mmap_sem);
        return error;
 }
@@ -174,6 +177,9 @@ asmlinkage long sys_mlockall(int flags)
        lock_limit >>= PAGE_SHIFT;
 
        ret = -ENOMEM;
+       if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
+               goto out;
+       /* check vserver lock limits? */
        if (current->mm->total_vm <= lock_limit)
                ret = do_mlockall(flags);
 out:
index bbdcb91..f7990f9 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -614,6 +614,10 @@ munmap_back:
            > current->rlim[RLIMIT_AS].rlim_cur)
                return -ENOMEM;
 
+       /* check context space, maybe only Private writable mapping? */
+       if (!vx_vmpages_avail(mm, len >> PAGE_SHIFT))
+               return -ENOMEM;
+
        if (accountable && (!(flags & MAP_NORESERVE) ||
                        sysctl_overcommit_memory > 1)) {
                if (vm_flags & VM_SHARED) {
@@ -708,9 +712,11 @@ munmap_back:
                kmem_cache_free(vm_area_cachep, vma);
        }
 out:   
-       mm->total_vm += len >> PAGE_SHIFT;
+       // mm->total_vm += len >> PAGE_SHIFT;
+       vx_vmpages_add(mm, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
-               mm->locked_vm += len >> PAGE_SHIFT;
+               // mm->locked_vm += len >> PAGE_SHIFT;
+               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
                make_pages_present(addr, addr + len);
        }
        if (flags & MAP_POPULATE) {
@@ -935,7 +941,8 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
        grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
        /* Overcommit.. */
-       if (security_vm_enough_memory(grow)) {
+       if (security_vm_enough_memory(grow) ||
+               !vx_vmpages_avail(vma->vm_mm, grow)) {
                spin_unlock(&vma->vm_mm->page_table_lock);
                return -ENOMEM;
        }
@@ -947,10 +954,13 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
                vm_unacct_memory(grow);
                return -ENOMEM;
        }
+
        vma->vm_end = address;
-       vma->vm_mm->total_vm += grow;
+       // vma->vm_mm->total_vm += grow;
+       vx_vmpages_add(vma->vm_mm, grow);
        if (vma->vm_flags & VM_LOCKED)
-               vma->vm_mm->locked_vm += grow;
+               // vma->vm_mm->locked_vm += grow;
+               vx_vmlocked_add(vma->vm_mm, grow);
        spin_unlock(&vma->vm_mm->page_table_lock);
        return 0;
 }
@@ -989,7 +999,8 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
        grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
        /* Overcommit.. */
-       if (security_vm_enough_memory(grow)) {
+       if (security_vm_enough_memory(grow) ||
+               !vx_vmpages_avail(vma->vm_mm, grow)) {
                spin_unlock(&vma->vm_mm->page_table_lock);
                return -ENOMEM;
        }
@@ -1001,11 +1012,14 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
                vm_unacct_memory(grow);
                return -ENOMEM;
        }
+
        vma->vm_start = address;
        vma->vm_pgoff -= grow;
-       vma->vm_mm->total_vm += grow;
+       // vma->vm_mm->total_vm += grow;
+       vx_vmpages_add(vma->vm_mm, grow);
        if (vma->vm_flags & VM_LOCKED)
-               vma->vm_mm->locked_vm += grow;
+               // vma->vm_mm->locked_vm += grow;
+               vx_vmlocked_add(vma->vm_mm, grow);
        spin_unlock(&vma->vm_mm->page_table_lock);
        return 0;
 }
@@ -1108,9 +1122,12 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
 {
        size_t len = area->vm_end - area->vm_start;
 
-       area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+       // area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+       vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT);
+       
        if (area->vm_flags & VM_LOCKED)
-               area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+               // area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+               vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT);
        /*
         * Is this a new hole at the lowest possible address?
         */
@@ -1365,6 +1382,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
                locked += len;
                if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
                        return -EAGAIN;
+               /* vserver checks ? */
        }
 
        /*
@@ -1386,7 +1404,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        if (mm->map_count > sysctl_max_map_count)
                return -ENOMEM;
 
-       if (security_vm_enough_memory(len >> PAGE_SHIFT))
+       if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
+               !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
        flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
@@ -1419,9 +1438,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        vma_link(mm, vma, prev, rb_link, rb_parent);
 
 out:
-       mm->total_vm += len >> PAGE_SHIFT;
+       // mm->total_vm += len >> PAGE_SHIFT;
+       vx_vmpages_add(mm, len >> PAGE_SHIFT);
        if (flags & VM_LOCKED) {
-               mm->locked_vm += len >> PAGE_SHIFT;
+               // mm->locked_vm += len >> PAGE_SHIFT;
+               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
                make_pages_present(addr, addr + len);
        }
        return addr;
@@ -1455,9 +1476,12 @@ void exit_mmap(struct mm_struct *mm)
        vma = mm->mmap;
        mm->mmap = mm->mmap_cache = NULL;
        mm->mm_rb = RB_ROOT;
-       mm->rss = 0;
-       mm->total_vm = 0;
-       mm->locked_vm = 0;
+       // mm->rss = 0;
+       vx_rsspages_sub(mm, mm->rss);
+       // mm->total_vm = 0;
+       vx_vmpages_sub(mm, mm->total_vm);
+       // mm->locked_vm = 0;
+       vx_vmlocked_sub(mm, mm->locked_vm);
 
        spin_unlock(&mm->page_table_lock);
 
index 9043076..82ceecd 100644 (file)
@@ -238,9 +238,11 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                        vma->vm_next->vm_flags |= VM_ACCOUNT;
        }
 
-       mm->total_vm += new_len >> PAGE_SHIFT;
+       // mm->total_vm += new_len >> PAGE_SHIFT;
+       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
-               mm->locked_vm += new_len >> PAGE_SHIFT;
+               // mm->locked_vm += new_len >> PAGE_SHIFT;
+               vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
                if (new_len > old_len)
                        make_pages_present(new_addr + old_len,
                                           new_addr + new_len);
@@ -349,6 +351,9 @@ unsigned long do_mremap(unsigned long addr,
        if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
            > current->rlim[RLIMIT_AS].rlim_cur)
                goto out;
+       /* check context space, maybe only Private writable mapping? */
+       if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT))
+               goto out;
 
        if (vma->vm_flags & VM_ACCOUNT) {
                charged = (new_len - old_len) >> PAGE_SHIFT;
@@ -371,9 +376,11 @@ unsigned long do_mremap(unsigned long addr,
                        spin_lock(&vma->vm_mm->page_table_lock);
                        vma->vm_end = addr + new_len;
                        spin_unlock(&vma->vm_mm->page_table_lock);
-                       current->mm->total_vm += pages;
+                       // current->mm->total_vm += pages;
+                       vx_vmpages_add(current->mm, pages);
                        if (vma->vm_flags & VM_LOCKED) {
-                               current->mm->locked_vm += pages;
+                               // current->mm->locked_vm += pages;
+                               vx_vmlocked_add(current->mm, pages);
                                make_pages_present(addr + old_len,
                                                   addr + new_len);
                        }
index ed0cee4..2f15290 100644 (file)
@@ -54,6 +54,7 @@ static int badness(struct task_struct *p)
         * The memory size of the process is the basis for the badness.
         */
        points = p->mm->total_vm;
+       /* add vserver badness ;) */
 
        /*
         * CPU time is in seconds and run time is in minutes. There is no
index 8d3f6f4..ff2ffbe 100644 (file)
@@ -982,6 +982,8 @@ void si_meminfo(struct sysinfo *val)
        val->freehigh = 0;
 #endif
        val->mem_unit = PAGE_SIZE;
+       if (vx_flags(VXF_VIRT_MEM, 0))
+               vx_vsi_meminfo(val);
 }
 
 EXPORT_SYMBOL(si_meminfo);
index 5577805..f23ff9c 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -374,7 +374,8 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr)
        if (pte_dirty(pte))
                set_page_dirty(page);
 
-       mm->rss--;
+       // mm->rss--;
+       vx_rsspages_dec(mm);
        page_cache_release(page);
        ret = SWAP_SUCCESS;
 
index 9ca222b..321b09c 100644 (file)
@@ -459,7 +459,8 @@ static void
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
        swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
-       vma->vm_mm->rss++;
+       // vma->vm_mm->rss++;
+       vx_rsspages_inc(vma->vm_mm);
        get_page(page);
        set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
        *pte_chainp = page_add_rmap(page, dir, *pte_chainp);
@@ -1595,6 +1596,8 @@ void si_swapinfo(struct sysinfo *val)
        val->freeswap = nr_swap_pages + nr_to_be_unused;
        val->totalswap = total_swap_pages + nr_to_be_unused;
        swap_list_unlock();
+        if (vx_flags(VXF_VIRT_MEM, 0))
+                vx_vsi_swapinfo(val);
 }
 
 /*
index 43c273e..b6dc213 100644 (file)
@@ -1969,6 +1969,8 @@ static int dev_ifconf(char __user *arg)
 
        total = 0;
        for (dev = dev_base; dev; dev = dev->next) {
+               if (!dev_in_nx_info(dev, current->nx_info))
+                       continue;
                for (i = 0; i < NPROTO; i++) {
                        if (gifconf_list[i]) {
                                int done;
@@ -2029,6 +2031,10 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
+       struct nx_info *nxi = current->nx_info;
+
+       if (!dev_in_nx_info(dev, nxi))
+               return;
        if (dev->get_stats) {
                struct net_device_stats *stats = dev->get_stats(dev);
 
index 26e3577..f577cba 100644 (file)
@@ -224,6 +224,8 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
                if (idx < s_idx)
                        continue;
+               if (!dev_in_nx_info(dev, current->nx_info))
+                       continue;
                if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
                        break;
        }
@@ -309,6 +311,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
        struct sk_buff *skb;
        int size = NLMSG_GOODSIZE;
 
+       if (!dev_in_nx_info(dev, current->nx_info))
+               return;
        skb = alloc_skb(size, GFP_KERNEL);
        if (!skb)
                return;
index 92bc77a..afdc918 100644 (file)
@@ -323,7 +323,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
 
                case SO_PASSCRED:
-                       sock->passcred = valbool;
+                       if (valbool)
+                               set_bit(SOCK_PASS_CRED, &sock->flags);
+                       else
+                               clear_bit(SOCK_PASS_CRED, &sock->flags);
                        break;
 
                case SO_TIMESTAMP:
@@ -546,7 +549,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                        break; 
 
                case SO_PASSCRED:
-                       v.val = sock->passcred;
+                       v.val = test_bit(SOCK_PASS_CRED, &sock->flags)?1:0;
                        break;
 
                case SO_PEERCRED:
@@ -621,6 +624,8 @@ struct sock *sk_alloc(int family, int priority, int zero_it, kmem_cache_t *slab)
                        sock_lock_init(sk);
                }
                sk->sk_slab = slab;
+               sock_vx_init(sk);
+               sock_nx_init(sk);
                
                if (security_sk_alloc(sk, family, priority)) {
                        kmem_cache_free(slab, sk);
@@ -651,6 +656,10 @@ void sk_free(struct sock *sk)
                       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
        security_sk_free(sk);
+       BUG_ON(sk->sk_vx_info);
+       BUG_ON(sk->sk_nx_info);
+/*     clr_vx_info(&sk->sk_vx_info);
+       clr_nx_info(&sk->sk_nx_info);   */
        kmem_cache_free(sk->sk_slab, sk);
        module_put(owner);
 }
@@ -1142,6 +1151,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_stamp.tv_sec     = -1L;
        sk->sk_stamp.tv_usec    = -1L;
 
+       sk->sk_vx_info          =       NULL;
+       sk->sk_xid              =       0;
+       sk->sk_nx_info          =       NULL;
+       sk->sk_nid              =       0;
+
        atomic_set(&sk->sk_refcnt, 1);
 }
 
index edf4570..358b772 100644 (file)
@@ -158,6 +158,9 @@ void inet_sock_destruct(struct sock *sk)
 
        if (inet->opt)
                kfree(inet->opt);
+       
+       BUG_ON(sk->sk_nx_info);
+       BUG_ON(sk->sk_vx_info);
        dst_release(sk->sk_dst_cache);
 #ifdef INET_REFCNT_DEBUG
        atomic_dec(&inet_sock_nr);
@@ -397,6 +400,11 @@ static int inet_create(struct socket *sock, int protocol)
        sk->sk_family      = PF_INET;
        sk->sk_protocol    = protocol;
        sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+       
+       set_vx_info(&sk->sk_vx_info, current->vx_info);
+       sk->sk_xid = vx_current_xid();
+       set_nx_info(&sk->sk_nx_info, current->nx_info);
+       sk->sk_nid = nx_current_nid();
 
        inet->uc_ttl    = -1;
        inet->mc_loop   = 1;
@@ -421,8 +429,13 @@ static int inet_create(struct socket *sock, int protocol)
 
        if (sk->sk_prot->init) {
                err = sk->sk_prot->init(sk);
-               if (err)
-                       inet_sock_release(sk);
+               if (err) {
+/*                     sk->sk_vx_info = NULL;
+                       put_vx_info(current->vx_info);
+                       sk->sk_nx_info = NULL;
+                       put_nx_info(current->nx_info);
+*/                     inet_sock_release(sk);
+               }
        }
 out:
        return err;
@@ -460,6 +473,8 @@ int inet_release(struct socket *sock)
                    !(current->flags & PF_EXITING))
                        timeout = sk->sk_lingertime;
                sock->sk = NULL;
+               clr_vx_info(&sk->sk_vx_info);
+               clr_nx_info(&sk->sk_nx_info);
                sk->sk_prot->close(sk, timeout);
        }
        return 0;
@@ -476,6 +491,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        unsigned short snum;
        int chk_addr_ret;
        int err;
+       __u32 s_addr;   /* Address used for validation */
+       __u32 s_addr1;
+       __u32 s_addr2 = 0xffffffffl;    /* Optional address of the socket */
+       struct nx_info *nxi = sk->sk_nx_info;
 
        /* If the socket has its own bind function then use it. (RAW) */
        if (sk->sk_prot->bind) {
@@ -486,7 +505,36 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        if (addr_len < sizeof(struct sockaddr_in))
                goto out;
 
-       chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+       s_addr = s_addr1 = addr->sin_addr.s_addr;
+       nxdprintk("inet_bind(%p) %p,%p;%lx\n",
+               sk, nx_info, sk->sk_socket,
+               (sk->sk_socket?sk->sk_socket->flags:0));
+       if (nxi) {
+               __u32 v4_bcast = nxi->v4_bcast;
+               __u32 ipv4root = nxi->ipv4[0];
+               int nbipv4 = nxi->nbipv4;
+               if (s_addr == 0) {
+                       s_addr = ipv4root;
+                       if (nbipv4 > 1)
+                               s_addr1 = 0;
+                       else {
+                               s_addr1 = ipv4root;
+                       }
+                       s_addr2 = v4_bcast;
+               } else if (s_addr == 0x0100007f) {
+                       s_addr = s_addr1 = ipv4root;
+               } else if (s_addr != v4_bcast) {
+                       int i;
+                       for (i=0; i<nbipv4; i++) {
+                               if (s_addr == nxi->ipv4[i])
+                                       break;
+                       }
+                       if (i == nbipv4) {
+                               return -EADDRNOTAVAIL;
+                       }
+               }
+       }
+       chk_addr_ret = inet_addr_type(s_addr);
 
        /* Not specified by any standard per-se, however it breaks too
         * many applications when removed.  It is unfortunate since
@@ -498,7 +546,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        err = -EADDRNOTAVAIL;
        if (!sysctl_ip_nonlocal_bind &&
            !inet->freebind &&
-           addr->sin_addr.s_addr != INADDR_ANY &&
+           s_addr != INADDR_ANY &&
            chk_addr_ret != RTN_LOCAL &&
            chk_addr_ret != RTN_MULTICAST &&
            chk_addr_ret != RTN_BROADCAST)
@@ -523,7 +571,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        if (sk->sk_state != TCP_CLOSE || inet->num)
                goto out_release_sock;
 
-       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+       inet->rcv_saddr = inet->saddr = s_addr1;
+       inet->rcv_saddr2 = s_addr2;
        if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
                inet->saddr = 0;  /* Use device */
 
index c9cd619..b97526d 100644 (file)
@@ -488,6 +488,33 @@ static __inline__ int inet_abc_len(u32 addr)
        return rc;
 }
 
+/*
+       Check that a device is not member of the ipv4root assigned to the process
+       Return true if this is the case
+
+       If the process is not bound to specific IP, then it returns 0 (all
+       interface are fine).
+*/
+static inline int devinet_notiproot (struct in_ifaddr *ifa)
+{
+       int ret = 0;
+       struct nx_info *nxi;
+
+       if ((nxi = current->nx_info)) {
+               int i;
+               int nbip = nxi->nbipv4;
+               __u32 addr = ifa->ifa_local;
+               ret = 1;
+               for (i=0; i<nbip; i++) {
+                       if(nxi->ipv4[i] == addr) {
+                               ret = 0;
+                               break;
+                       }
+               }
+       }
+       return ret;
+}
+
 
 int devinet_ioctl(unsigned int cmd, void *arg)
 {
@@ -595,6 +622,8 @@ int devinet_ioctl(unsigned int cmd, void *arg)
        ret = -EADDRNOTAVAIL;
        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
                goto done;
+       if (!ifa_in_nx_info(ifa, current->nx_info))
+               goto done;
 
        switch(cmd) {
        case SIOCGIFADDR:       /* Get interface address */
@@ -724,6 +753,8 @@ static int inet_gifconf(struct net_device *dev, char *buf, int len)
                goto out;
 
        for (; ifa; ifa = ifa->ifa_next) {
+               if (!ifa_in_nx_info(ifa, current->nx_info))
+                       continue;
                if (!buf) {
                        done += sizeof(ifr);
                        continue;
@@ -1059,6 +1090,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
                read_lock(&in_dev->lock);
                for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
                     ifa = ifa->ifa_next, ip_idx++) {
+                       if (!ifa_in_nx_info(ifa, current->nx_info))
+                               continue;
                        if (ip_idx < s_ip_idx)
                                continue;
                        if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
index 2316dfa..3b4f8e3 100644 (file)
@@ -1010,6 +1010,8 @@ static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi
        return flags;
 }
 
+extern int dev_in_nx_info(struct net_device *, struct nx_info *);
+
 /* 
  *     This outputs /proc/net/route.
  *
@@ -1039,7 +1041,7 @@ static int fib_seq_show(struct seq_file *seq, void *v)
        mask    = FZ_MASK(iter->zone);
        flags   = fib_flag_trans(f->fn_type, f->fn_state & FN_S_ZOMBIE,
                                 mask, fi);
-       if (fi)
+       if (fi && dev_in_nx_info(fi->fib_dev, current->nx_info))
                snprintf(bf, sizeof(bf),
                         "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
                         fi->fib_dev ? fi->fib_dev->name : "*", prefix,
index d7b9238..a065088 100644 (file)
@@ -102,6 +102,38 @@ static void raw_v4_unhash(struct sock *sk)
        write_unlock_bh(&raw_v4_lock);
 }
 
+
+/*
+       Check if an address is in the list
+*/
+static inline int raw_addr_in_list (
+       u32 rcv_saddr1,
+       u32 rcv_saddr2,
+       u32 loc_addr,
+       struct nx_info *nx_info)
+{
+       int ret = 0;
+       if (loc_addr != 0 &&
+               (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr))
+               ret = 1;
+       else if (rcv_saddr1 == 0) {
+               /* Accept any address or only the one in the list */
+               if (nx_info == NULL)
+                       ret = 1;
+               else {
+                       int n = nx_info->nbipv4;
+                       int i;
+                       for (i=0; i<n; i++) {
+                               if (nx_info->ipv4[i] == loc_addr) {
+                                       ret = 1;
+                                       break;
+                               }
+                       }
+               }
+       }
+       return ret;
+}
+
 struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
                             unsigned long raddr, unsigned long laddr,
                             int dif)
@@ -113,7 +145,8 @@ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
 
                if (inet->num == num                                    &&
                    !(inet->daddr && inet->daddr != raddr)              &&
-                   !(inet->rcv_saddr && inet->rcv_saddr != laddr)      &&
+                   raw_addr_in_list(inet->rcv_saddr, inet->rcv_saddr2,
+                       laddr, sk->sk_nx_info) &&
                    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
                        goto found; /* gotcha */
        }
@@ -687,7 +720,8 @@ static struct sock *raw_get_first(struct seq_file *seq)
                struct hlist_node *node;
 
                sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-                       if (sk->sk_family == PF_INET)
+                       if (sk->sk_family == PF_INET &&
+                               vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))
                                goto found;
        }
        sk = NULL;
@@ -703,7 +737,8 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
                sk = sk_next(sk);
 try_again:
                ;
-       } while (sk && sk->sk_family != PF_INET);
+       } while (sk && (sk->sk_family != PF_INET ||
+               !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)));
 
        if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
                sk = sk_head(&raw_v4_htable[state->bucket]);
index ac92115..208233e 100644 (file)
@@ -179,9 +179,63 @@ void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
        tcp_sk(sk)->bind_hash = tb;
 }
 
+/*
+       Return 1 if addr match the socket IP list
+       or the socket is INADDR_ANY
+*/
+static inline int tcp_in_list(struct sock *sk, u32 addr)
+{
+       struct nx_info *nxi = sk->sk_nx_info;
+
+       vxdprintk("tcp_in_list(%p) %p,%p;%lx\n",
+               sk, nxi, sk->sk_socket,
+               (sk->sk_socket?sk->sk_socket->flags:0));
+
+       if (nxi) {
+               int n = nxi->nbipv4;
+               int i;
+
+               for (i=0; i<n; i++)
+                       if (nxi->ipv4[i] == addr)
+                               return 1;
+       }
+       else if (!tcp_v4_rcv_saddr(sk) || tcp_v4_rcv_saddr(sk) == addr)
+               return 1;
+       return 0;
+}
+       
+/*
+       Check if the addresses in sk1 conflict with those in sk2
+*/
+int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2)
+{
+       if (sk1 && sk2)
+       nxdprintk("inet_bind(%p,%p) %p,%p;%lx %p,%p;%lx\n",
+               sk1, sk2,
+               sk1->sk_nx_info, sk1->sk_socket,
+               (sk1->sk_socket?sk1->sk_socket->flags:0),
+               sk2->sk_nx_info, sk2->sk_socket,
+               (sk2->sk_socket?sk2->sk_socket->flags:0));
+
+       if (tcp_v4_rcv_saddr(sk1)) {
+               /* Bind to one address only */
+               return tcp_in_list (sk2, tcp_v4_rcv_saddr(sk1));
+       } else if (sk1->sk_nx_info) {
+               /* A restricted bind(any) */
+               struct nx_info *nxi = sk1->sk_nx_info;
+               int n = nxi->nbipv4;
+               int i;
+
+               for (i=0; i<n; i++)
+                       if (tcp_in_list (sk2, nxi->ipv4[i]))
+                               return 1;
+       } else  /* A bind(any) do not allow other bind on the same port */
+               return 1;
+       return 0;
+}
+
 static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
 {
-       const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
        struct sock *sk2;
        struct hlist_node *node;
        int reuse = sk->sk_reuse;
@@ -194,9 +248,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
                     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
                        if (!reuse || !sk2->sk_reuse ||
                            sk2->sk_state == TCP_LISTEN) {
-                               const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
-                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
-                                   sk2_rcv_saddr == sk_rcv_saddr)
+                               if (tcp_ipv4_addr_conflict(sk, sk2))
                                        break;
                        }
                }
@@ -405,6 +457,34 @@ void tcp_unhash(struct sock *sk)
                wake_up(&tcp_lhash_wait);
 }
 
+/*
+       Check if an address is in the list
+*/
+static inline int tcp_addr_in_list(
+       u32 rcv_saddr,
+       u32 daddr,
+       struct nx_info *nx_info)
+{
+       if (rcv_saddr == daddr)
+               return 1;
+       else if (rcv_saddr == 0) {
+               /* Accept any address or check the list */
+               if (!nx_info)
+                       return 1;
+               else {
+                       int n = nx_info->nbipv4;
+                       int i;
+
+                       for (i=0; i<n; i++)
+                               if (nx_info->ipv4[i] == daddr)
+                                       return 1;
+               }
+       }
+       return 0;
+}
+
+
+
 /* Don't inline this cruft.  Here are some nice properties to
  * exploit here.  The BSD API does not allow a listening TCP
  * to specify the remote port nor the remote address for the
@@ -426,11 +506,10 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
                        __u32 rcv_saddr = inet->rcv_saddr;
 
                        score = (sk->sk_family == PF_INET ? 1 : 0);
-                       if (rcv_saddr) {
-                               if (rcv_saddr != daddr)
-                                       continue;
+                       if (tcp_addr_in_list(rcv_saddr, daddr, sk->sk_nx_info))
                                score+=2;
-                       }
+                       else
+                               continue;
                        if (sk->sk_bound_dev_if) {
                                if (sk->sk_bound_dev_if != dif)
                                        continue;
@@ -460,8 +539,8 @@ inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
                struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
 
                if (inet->num == hnum && !sk->sk_node.next &&
-                   (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
                    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+                   tcp_addr_in_list(inet->rcv_saddr, daddr, sk->sk_nx_info) &&
                    !sk->sk_bound_dev_if)
                        goto sherry_cache;
                sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
@@ -2159,6 +2238,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
                req = req->dl_next;
                while (1) {
                        while (req) {
+                               if (!vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
+                                       continue;
                                if (req->class->family == st->family) {
                                        cur = req;
                                        goto out;
@@ -2177,6 +2258,8 @@ get_req:
                sk = sk_next(sk);
 get_sk:
        sk_for_each_from(sk, node) {
+               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                       continue;
                if (sk->sk_family == st->family) {
                        cur = sk;
                        goto out;
@@ -2224,18 +2307,20 @@ static void *established_get_first(struct seq_file *seq)
               
                read_lock(&tcp_ehash[st->bucket].lock);
                sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
-                       if (sk->sk_family != st->family) {
+                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                               continue;
+                       if (sk->sk_family != st->family)
                                continue;
-                       }
                        rc = sk;
                        goto out;
                }
                st->state = TCP_SEQ_STATE_TIME_WAIT;
                tw_for_each(tw, node,
                            &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
-                       if (tw->tw_family != st->family) {
+                       if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
+                               continue;
+                       if (tw->tw_family != st->family)
                                continue;
-                       }
                        rc = tw;
                        goto out;
                }
@@ -2259,7 +2344,8 @@ static void *established_get_next(struct seq_file *seq, void *cur)
                tw = cur;
                tw = tw_next(tw);
 get_tw:
-               while (tw && tw->tw_family != st->family) {
+               while (tw && tw->tw_family != st->family &&
+                       !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) {
                        tw = tw_next(tw);
                }
                if (tw) {
@@ -2279,6 +2365,8 @@ get_tw:
                sk = sk_next(sk);
 
        sk_for_each_from(sk, node) {
+               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                       continue;
                if (sk->sk_family == st->family)
                        goto found;
        }
index d4c0d84..a0a8422 100644 (file)
@@ -362,6 +362,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                tw->tw_ts_recent_stamp  = tp->ts_recent_stamp;
                tw_dead_node_init(tw);
 
+               tw->tw_xid              = sk->sk_xid;
+               tw->tw_vx_info          = NULL;
+               tw->tw_nid              = sk->sk_nid;
+               tw->tw_nx_info          = NULL;
+               
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                if (tw->tw_family == PF_INET6) {
                        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -697,6 +702,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
                newsk->sk_state = TCP_SYN_RECV;
 
                /* SANITY */
+               sock_vx_init(newsk);
+               sock_nx_init(newsk);
                sk_node_init(&newsk->sk_node);
                tcp_sk(newsk)->bind_hash = NULL;
 
@@ -798,6 +805,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
                newsk->sk_err = 0;
                newsk->sk_priority = 0;
                atomic_set(&newsk->sk_refcnt, 2);
+
+               /* hmm, maybe from socket? */
+               set_vx_info(&newsk->sk_vx_info, current->vx_info);
+               set_nx_info(&newsk->sk_nx_info, current->nx_info);
 #ifdef INET_REFCNT_DEBUG
                atomic_inc(&inet_sock_nr);
 #endif
index 05dcb54..8ddc299 100644 (file)
@@ -120,6 +120,8 @@ rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
 /* Shared by v4/v6 udp. */
 int udp_port_rover;
 
+int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2);
+
 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
        struct hlist_node *node;
@@ -179,9 +181,7 @@ gotit:
                            (!sk2->sk_bound_dev_if ||
                             !sk->sk_bound_dev_if ||
                             sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-                           (!inet2->rcv_saddr ||
-                            !inet->rcv_saddr ||
-                            inet2->rcv_saddr == inet->rcv_saddr) &&
+                           tcp_ipv4_addr_conflict(sk2, sk) &&
                            (!sk2->sk_reuse || !sk->sk_reuse))
                                goto fail;
                }
@@ -216,6 +216,17 @@ static void udp_v4_unhash(struct sock *sk)
        write_unlock_bh(&udp_hash_lock);
 }
 
+static inline int udp_in_list(struct nx_info *nx_info, u32 addr)
+{
+       int n = nx_info->nbipv4;
+       int i;
+
+       for (i=0; i<n; i++)
+               if (nx_info->ipv4[i] == addr)
+                       return 1;
+       return 0;
+}
+
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  * harder than this. -DaveM
  */
@@ -235,6 +246,11 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i
                                if (inet->rcv_saddr != daddr)
                                        continue;
                                score+=2;
+                       } else if (sk->sk_nx_info) {
+                               if (udp_in_list(sk->sk_nx_info, daddr))
+                                       score+=2;
+                               else
+                                       continue;
                        }
                        if (inet->daddr) {
                                if (inet->daddr != saddr)
@@ -290,7 +306,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk,
                if (inet->num != hnum                                   ||
                    (inet->daddr && inet->daddr != rmt_addr)            ||
                    (inet->dport != rmt_port && inet->dport)            ||
-                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
+                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr &&
+                    inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr)  ||
                    ipv6_only_sock(s)                                   ||
                    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
                        continue;
@@ -599,6 +616,15 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                                    .uli_u = { .ports =
                                               { .sport = inet->sport,
                                                 .dport = dport } } };
+               struct nx_info *nxi = sk->sk_nx_info;
+
+               if (nxi) {
+                       err = ip_find_src(nxi, &rt, &fl);
+                       if (err)
+                               goto out;
+                       if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+                               daddr = fl.fl4_dst = nxi->ipv4[0];
+               }
                err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
                if (err)
                        goto out;
@@ -1374,8 +1400,10 @@ static struct sock *udp_get_first(struct seq_file *seq)
 
        for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
                struct hlist_node *node;
+
                sk_for_each(sk, node, &udp_hash[state->bucket]) {
-                       if (sk->sk_family == state->family)
+                       if (sk->sk_family == state->family &&
+                               vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))
                                goto found;
                }
        }
@@ -1392,7 +1420,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
                sk = sk_next(sk);
 try_again:
                ;
-       } while (sk && sk->sk_family != state->family);
+       } while (sk && (sk->sk_family != state->family ||
+               !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)));
 
        if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
                sk = sk_head(&udp_hash[state->bucket]);
index bee0949..3f6b90a 100644 (file)
@@ -285,7 +285,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
        ei->socket.ops = NULL;
        ei->socket.sk = NULL;
        ei->socket.file = NULL;
-       ei->socket.passcred = 0;
+       ei->socket.flags = 0;
 
        return &ei->vfs_inode;
 }
@@ -529,7 +529,7 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                                 struct msghdr *msg, size_t size)
 {
        struct sock_iocb *si = kiocb_to_siocb(iocb);
-       int err;
+       int err, len;
 
        si->sock = sock;
        si->scm = NULL;
@@ -540,7 +540,20 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
        if (err)
                return err;
 
-       return sock->ops->sendmsg(iocb, sock, msg, size);
+       len = sock->ops->sendmsg(iocb, sock, msg, size);
+       if (sock->sk) {
+               if (len == size)
+                       vx_sock_send(sock->sk, size);
+               else
+                       vx_sock_fail(sock->sk, size);
+       }
+       vxdprintk("__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d\n",
+               sock, sock->sk,
+               (sock->sk)?sock->sk->sk_nx_info:0,
+               (sock->sk)?sock->sk->sk_vx_info:0,
+               (sock->sk)?sock->sk->sk_xid:0,
+               size, len);
+       return len;
 }
 
 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -559,7 +572,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
                                 struct msghdr *msg, size_t size, int flags)
 {
-       int err;
+       int err, len;
        struct sock_iocb *si = kiocb_to_siocb(iocb);
 
        si->sock = sock;
@@ -572,7 +585,16 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
        if (err)
                return err;
 
-       return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+       len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+       if ((len >= 0) && sock->sk)
+               vx_sock_recv(sock->sk, len);
+       vxdprintk("__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d\n",
+               sock, sock->sk,
+               (sock->sk)?sock->sk->sk_nx_info:0,
+               (sock->sk)?sock->sk->sk_vx_info:0,
+               (sock->sk)?sock->sk->sk_xid:0,
+               size, len);
+       return len;
 }
 
 int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
@@ -1018,6 +1040,10 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
        if (type < 0 || type >= SOCK_MAX)
                return -EINVAL;
 
+       /* disable IPv6 inside vservers for now */
+       if (family == PF_INET6 && !vx_check(0, VX_ADMIN))
+               return -EAFNOSUPPORT;
+
        /* Compatibility.
 
           This uglymoron is moved from INET layer to here to avoid
@@ -1126,6 +1152,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
        if (retval < 0)
                goto out;
 
+       set_bit(SOCK_USER_SOCKET, &sock->flags);
        retval = sock_map_fd(sock);
        if (retval < 0)
                goto out_release;
@@ -1156,10 +1183,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
        err = sock_create(family, type, protocol, &sock1);
        if (err < 0)
                goto out;
+       set_bit(SOCK_USER_SOCKET, &sock1->flags);
 
        err = sock_create(family, type, protocol, &sock2);
        if (err < 0)
                goto out_release_1;
+       set_bit(SOCK_USER_SOCKET, &sock2->flags);
 
        err = sock1->ops->socketpair(sock1, sock2);
        if (err < 0) 
index 89fb7eb..7bd7423 100644 (file)
@@ -405,6 +405,8 @@ static int unix_release_sock (struct sock *sk, int embrion)
                mntput(mnt);
        }
 
+       clr_vx_info(&sk->sk_vx_info);
+       clr_nx_info(&sk->sk_nx_info);
        sock_put(sk);
 
        /* ---- Socket is dead now and most probably destroyed ---- */
@@ -559,6 +561,10 @@ static struct sock * unix_create1(struct socket *sock)
        sock_init_data(sock,sk);
        sk_set_owner(sk, THIS_MODULE);
 
+       set_vx_info(&sk->sk_vx_info, current->vx_info);
+       set_nx_info(&sk->sk_nx_info, current->nx_info);
+       sk->sk_xid = vx_current_xid();
+
        sk->sk_write_space      = unix_write_space;
        sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
        sk->sk_destruct         = unix_sock_destructor;
@@ -870,7 +876,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
                        goto out;
                alen = err;
 
-               if (sock->passcred && !unix_sk(sk)->addr &&
+               if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr &&
                    (err = unix_autobind(sock)) != 0)
                        goto out;
 
@@ -961,7 +967,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                goto out;
        addr_len = err;
 
-       if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
+       if (test_bit(SOCK_PASS_CRED, &sock->flags)
+               && !u->addr && (err = unix_autobind(sock)) != 0)
                goto out;
 
        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
@@ -1295,7 +1302,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
                        goto out;
        }
 
-       if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
+       if (test_bit(SOCK_PASS_CRED, &sock->flags)
+               && !u->addr && (err = unix_autobind(sock)) != 0)
                goto out;
 
        err = -EMSGSIZE;
index f40fc73..9b8cd2d 100644 (file)
@@ -120,7 +120,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
        /* Derived from fs/exec.c:compute_creds. */
        kernel_cap_t new_permitted, working;
 
-       new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
+       new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps());
        working = cap_intersect (bprm->cap_inheritable,
                                 current->cap_inheritable);
        new_permitted = cap_combine (new_permitted, working);